diff --git a/libs/ktem/ktem/index/file/knet/pipelines.py b/libs/ktem/ktem/index/file/knet/pipelines.py index 9741e5a9..9286481c 100644 --- a/libs/ktem/ktem/index/file/knet/pipelines.py +++ b/libs/ktem/ktem/index/file/knet/pipelines.py @@ -91,6 +91,9 @@ class KnetRetrievalPipeline(BaseFileIndexRetriever): chunks = yaml.safe_load(response.content) for chunk in chunks: metadata = chunk["node"]["metadata"] + metadata["page_label"] = metadata.get( + "pageIdx", metadata.get("parentPageIdx", "") + ) metadata["type"] = metadata_translation.get( metadata.pop("content_type", ""), "" )