mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 20:07:49 +01:00
refac: documents file handling
This commit is contained in:
@@ -930,7 +930,9 @@ def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
||||
)
|
||||
|
||||
|
||||
def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
|
||||
def store_data_in_vector_db(
|
||||
data, collection_name, metadata: Optional[dict] = None, overwrite: bool = False
|
||||
) -> bool:
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=app.state.config.CHUNK_SIZE,
|
||||
@@ -942,7 +944,7 @@ def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> b
|
||||
|
||||
if len(docs) > 0:
|
||||
log.info(f"store_data_in_vector_db {docs}")
|
||||
return store_docs_in_vector_db(docs, collection_name, overwrite), None
|
||||
return store_docs_in_vector_db(docs, collection_name, metadata, overwrite), None
|
||||
else:
|
||||
raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
|
||||
|
||||
@@ -956,14 +958,16 @@ def store_text_in_vector_db(
|
||||
add_start_index=True,
|
||||
)
|
||||
docs = text_splitter.create_documents([text], metadatas=[metadata])
|
||||
return store_docs_in_vector_db(docs, collection_name, overwrite)
|
||||
return store_docs_in_vector_db(docs, collection_name, overwrite=overwrite)
|
||||
|
||||
|
||||
def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
|
||||
def store_docs_in_vector_db(
|
||||
docs, collection_name, metadata: Optional[dict] = None, overwrite: bool = False
|
||||
) -> bool:
|
||||
log.info(f"store_docs_in_vector_db {docs} {collection_name}")
|
||||
|
||||
texts = [doc.page_content for doc in docs]
|
||||
metadatas = [doc.metadata for doc in docs]
|
||||
metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs]
|
||||
|
||||
# ChromaDB does not like datetime formats
|
||||
# for meta-data so convert them to string.
|
||||
@@ -1237,13 +1241,21 @@ def process_doc(
|
||||
data = loader.load()
|
||||
|
||||
try:
|
||||
result = store_data_in_vector_db(data, collection_name)
|
||||
result = store_data_in_vector_db(
|
||||
data,
|
||||
collection_name,
|
||||
{
|
||||
"file_id": form_data.file_id,
|
||||
"name": file.meta.get("name", file.filename),
|
||||
},
|
||||
)
|
||||
|
||||
if result:
|
||||
return {
|
||||
"status": True,
|
||||
"collection_name": collection_name,
|
||||
"known_type": known_type,
|
||||
"filename": file.meta.get("name", file.filename),
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
|
||||
Reference in New Issue
Block a user