From 485c6a55100accb6441eb2dc560d5d6fb2a12ad7 Mon Sep 17 00:00:00 2001 From: trducng Date: Wed, 2 Oct 2024 06:16:20 +0000 Subject: [PATCH] fix: page number in knet --- libs/ktem/ktem/index/file/knet/pipelines.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/ktem/ktem/index/file/knet/pipelines.py b/libs/ktem/ktem/index/file/knet/pipelines.py index 9741e5a9..9286481c 100644 --- a/libs/ktem/ktem/index/file/knet/pipelines.py +++ b/libs/ktem/ktem/index/file/knet/pipelines.py @@ -91,6 +91,9 @@ class KnetRetrievalPipeline(BaseFileIndexRetriever): chunks = yaml.safe_load(response.content) for chunk in chunks: metadata = chunk["node"]["metadata"] + metadata["page_label"] = metadata.get( + "pageIdx", metadata.get("parentPageIdx", "") + ) metadata["type"] = metadata_translation.get( metadata.pop("content_type", ""), "" )