mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
enh: vector db hash collision check
This commit is contained in:
@@ -641,6 +641,16 @@ def save_docs_to_vector_db(
|
||||
) -> bool:
|
||||
log.info(f"save_docs_to_vector_db {docs} {collection_name}")
|
||||
|
||||
# Check if entries with the same hash (metadata.hash) already exist
|
||||
if metadata and "hash" in metadata:
|
||||
existing_docs = VECTOR_DB_CLIENT.query(
|
||||
collection_name=collection_name,
|
||||
filter={"hash": metadata["hash"]},
|
||||
)
|
||||
if existing_docs:
|
||||
log.info(f"Document with hash {metadata['hash']} already exists")
|
||||
return True
|
||||
|
||||
if split:
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=app.state.config.CHUNK_SIZE,
|
||||
|
||||
Reference in New Issue
Block a user