fix: fix reindex not working due to unnecessary dupe check (#20857)

* Update retrieval.py

* Update knowledge.py

* Update retrieval.py

* Update knowledge.py
This commit is contained in:
Classic298
2026-01-22 00:36:08 +01:00
committed by GitHub
parent 4d9a7cc6c0
commit 00b3583dc2

View File

@@ -1425,8 +1425,16 @@ def save_docs_to_vector_db(
if result is not None and result.ids and len(result.ids) > 0:
existing_doc_ids = result.ids[0]
if existing_doc_ids:
log.info(f"Document with hash {metadata['hash']} already exists")
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
# Check if the existing document belongs to the same file
# If same file_id, this is a re-add/reindex - allow it
# If different file_id, this is a duplicate - block it
existing_file_id = None
if result.metadatas and result.metadatas[0]:
existing_file_id = result.metadatas[0][0].get("file_id")
if existing_file_id != metadata.get("file_id"):
log.info(f"Document with hash {metadata['hash']} already exists")
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
if split:
if request.app.state.config.ENABLE_MARKDOWN_HEADER_TEXT_SPLITTER: