refac: file upload

This commit is contained in:
Timothy J. Baek
2024-06-18 13:50:18 -07:00
parent 1000bcaeb7
commit 9e7b7a895e
5 changed files with 285 additions and 61 deletions

View File

@@ -55,6 +55,9 @@ from apps.webui.models.documents import (
DocumentForm,
DocumentResponse,
)
from apps.webui.models.files import (
Files,
)
from apps.rag.utils import (
get_model_path,
@@ -1131,6 +1134,57 @@ def store_doc(
)
class ProcessDocForm(BaseModel):
file_id: str
@app.post("/process/doc")
def process_doc(
form_data: ProcessDocForm,
user=Depends(get_current_user),
):
try:
file = Files.get_file_by_id(form_data.file_id)
file_path = file.meta.get("path", f"{UPLOAD_DIR}/{file.filename}")
f = open(file_path, "rb")
if collection_name == None:
collection_name = calculate_sha256(f)[:63]
f.close()
loader, known_type = get_loader(
file.filename, file.meta.get("content_type"), file_path
)
data = loader.load()
try:
result = store_data_in_vector_db(data, collection_name)
if result:
return {
"status": True,
"collection_name": collection_name,
"known_type": known_type,
}
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=e,
)
except Exception as e:
log.exception(e)
if "No pandoc was found" in str(e):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
class TextRAGForm(BaseModel):
name: str
content: str