This commit is contained in:
Timothy J. Baek
2024-10-03 22:22:22 -07:00
parent 9dd76b72b4
commit b291271df3
12 changed files with 152 additions and 79 deletions

View File

@@ -731,7 +731,7 @@ def process_file(
collection_name = form_data.collection_name
if collection_name is None:
collection_name = file.id
collection_name = f"file-{file.id}"
loader = Loader(
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
@@ -758,12 +758,11 @@ def process_file(
log.debug(f"text_content: {text_content}")
hash = calculate_sha256_string(text_content)
res = Files.update_file_data_by_id(
Files.update_file_data_by_id(
file.id,
{"content": text_content},
)
print(res)
Files.update_file_hash_by_id(form_data.file_id, hash)
Files.update_file_hash_by_id(file.id, hash)
try:
result = save_docs_to_vector_db(
@@ -778,6 +777,13 @@ def process_file(
)
if result:
Files.update_file_metadata_by_id(
file.id,
{
"collection_name": collection_name,
},
)
return {
"status": True,
"collection_name": collection_name,

View File

@@ -319,7 +319,7 @@ def get_rag_context(
for file in files:
if file.get("context") == "full":
context = {
"documents": [[file.get("file").get("content")]],
"documents": [[file.get("file").get("data", {}).get("content")]],
"metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]],
}
else:

View File

@@ -6,7 +6,8 @@ from pathlib import Path
from typing import Optional
from open_webui.apps.webui.models.files import FileForm, FileModel, Files
from open_webui.apps.webui.models.knowledge import Knowledges
from open_webui.apps.retrieval.main import process_file, ProcessFileForm
from open_webui.config import UPLOAD_DIR
from open_webui.constants import ERROR_MESSAGES
from open_webui.env import SRC_LOG_LEVELS
@@ -61,6 +62,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
),
)
try:
process_file(ProcessFileForm(file_id=id))
file = Files.get_file_by_id(id=id)
except Exception as e:
log.exception(e)
log.error(f"Error processing file: {file.id}")
if file:
return file
else:

View File

@@ -17,7 +17,6 @@ from open_webui.utils.utils import get_admin_user, get_verified_user
from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
router = APIRouter()
############################
@@ -132,7 +131,7 @@ class KnowledgeFileIdForm(BaseModel):
@router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse])
async def add_file_to_knowledge_by_id(
def add_file_to_knowledge_by_id(
id: str,
form_data: KnowledgeFileIdForm,
user=Depends(get_admin_user),
@@ -144,6 +143,11 @@ async def add_file_to_knowledge_by_id(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.NOT_FOUND,
)
if not file.data:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_PROCESSED,
)
if knowledge:
data = knowledge.data or {}
@@ -191,7 +195,7 @@ class KnowledgeFileIdForm(BaseModel):
@router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse])
async def remove_file_from_knowledge_by_id(
def remove_file_from_knowledge_by_id(
id: str,
form_data: KnowledgeFileIdForm,
user=Depends(get_admin_user),

View File

@@ -95,6 +95,7 @@ class ERROR_MESSAGES(str, Enum):
)
DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."
class TASKS(str, Enum):