from datetime import datetime KEYS_TO_EXCLUDE = ["content", "pages", "tables", "paragraphs", "sections", "figures"] def filter_metadata(metadata: dict[str, any]) -> dict[str, any]: # Removes large/redundant fields from metadata dict. metadata = { key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE } return metadata def process_metadata( metadata: dict[str, any], ) -> dict[str, any]: # Removes large fields and converts non-serializable types (datetime, list, dict) to strings. result = {} for key, value in metadata.items(): # Skip large fields if key in KEYS_TO_EXCLUDE: continue # Convert non-serializable fields to strings if isinstance(value, (datetime, list, dict)): result[key] = str(value) else: result[key] = value return result