From 00b3583dc2fe77f7f2223ea2fa2fe4302d4cfdcd Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Thu, 22 Jan 2026 00:36:08 +0100 Subject: [PATCH] fix: fix reindex not working due to unnecessary dupe check (#20857) * Update retrieval.py * Update knowledge.py * Update retrieval.py * Update knowledge.py --- backend/open_webui/routers/retrieval.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 318e7bf8ce..5a3a346ab7 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1425,8 +1425,16 @@ def save_docs_to_vector_db( if result is not None and result.ids and len(result.ids) > 0: existing_doc_ids = result.ids[0] if existing_doc_ids: - log.info(f"Document with hash {metadata['hash']} already exists") - raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) + # Check if the existing document belongs to the same file + # If same file_id, this is a re-add/reindex - allow it + # If different file_id, this is a duplicate - block it + existing_file_id = None + if result.metadatas and result.metadatas[0]: + existing_file_id = result.metadatas[0][0].get("file_id") + + if existing_file_id != metadata.get("file_id"): + log.info(f"Document with hash {metadata['hash']} already exists") + raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) if split: if request.app.state.config.ENABLE_MARKDOWN_HEADER_TEXT_SPLITTER: