Update utils.py (#21105)

This commit is contained in:
Classic298
2026-02-13 20:37:12 +01:00
committed by GitHub
parent d01b1d4880
commit 97a3b1528d

View File

@@ -4,6 +4,7 @@ KEYS_TO_EXCLUDE = ["content", "pages", "tables", "paragraphs", "sections", "figu
def filter_metadata(metadata: dict[str, any]) -> dict[str, any]:
# Removes large/redundant fields from metadata dict.
metadata = {
key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE
}
@@ -13,16 +14,15 @@ def filter_metadata(metadata: dict[str, any]) -> dict[str, any]:
def process_metadata(
metadata: dict[str, any],
) -> dict[str, any]:
# Removes large fields and converts non-serializable types (datetime, list, dict) to strings.
result = {}
for key, value in metadata.items():
# Remove large fields
# Skip large fields
if key in KEYS_TO_EXCLUDE:
del metadata[key]
continue
# Convert non-serializable fields to strings
if (
isinstance(value, datetime)
or isinstance(value, list)
or isinstance(value, dict)
):
metadata[key] = str(value)
return metadata
if isinstance(value, (datetime, list, dict)):
result[key] = str(value)
else:
result[key] = value
return result