diff --git a/backend/open_webui/retrieval/vector/utils.py b/backend/open_webui/retrieval/vector/utils.py index a597390b92..a39d364419 100644 --- a/backend/open_webui/retrieval/vector/utils.py +++ b/backend/open_webui/retrieval/vector/utils.py @@ -4,6 +4,7 @@ KEYS_TO_EXCLUDE = ["content", "pages", "tables", "paragraphs", "sections", "figu def filter_metadata(metadata: dict[str, any]) -> dict[str, any]: + # Removes large/redundant fields from metadata dict. metadata = { key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE } @@ -13,16 +14,15 @@ def filter_metadata(metadata: dict[str, any]) -> dict[str, any]: def process_metadata( metadata: dict[str, any], ) -> dict[str, any]: + # Removes large fields and converts non-serializable types (datetime, list, dict) to strings. + result = {} for key, value in metadata.items(): - # Remove large fields + # Skip large fields if key in KEYS_TO_EXCLUDE: - del metadata[key] - + continue # Convert non-serializable fields to strings - if ( - isinstance(value, datetime) - or isinstance(value, list) - or isinstance(value, dict) - ): - metadata[key] = str(value) - return metadata + if isinstance(value, (datetime, list, dict)): + result[key] = str(value) + else: + result[key] = value + return result