Merge pull request #12571 from hurxxxx/feat/reindex-knowledge-files

feat: reindex knowledge files
This commit is contained in:
Tim Jaeryang Baek
2025-04-08 22:10:35 -07:00
committed by GitHub
55 changed files with 236 additions and 5 deletions

View File

@@ -159,6 +159,72 @@ async def create_new_knowledge(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_EXISTS,
)
############################
# ReindexKnowledgeFiles
############################
@router.post("/reindex", response_model=bool)
async def reindex_knowledge_files(
request: Request,
user=Depends(get_verified_user)
):
if user.role != "admin":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=ERROR_MESSAGES.UNAUTHORIZED,
)
knowledge_bases = Knowledges.get_knowledge_bases()
log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases")
for knowledge_base in knowledge_bases:
try:
files = Files.get_files_by_ids(knowledge_base.data.get("file_ids", []))
try:
if VECTOR_DB_CLIENT.has_collection(collection_name=knowledge_base.id):
VECTOR_DB_CLIENT.delete_collection(
collection_name=knowledge_base.id
)
except Exception as e:
log.error(f"Error deleting collection {knowledge_base.id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error deleting vector DB collection"
)
failed_files = []
for file in files:
try:
process_file(
request,
ProcessFileForm(file_id=file.id, collection_name=knowledge_base.id),
user=user,
)
except Exception as e:
log.error(f"Error processing file {file.filename} (ID: {file.id}): {str(e)}")
failed_files.append({"file_id": file.id, "error": str(e)})
continue
except Exception as e:
log.error(f"Error processing knowledge base {knowledge_base.id}: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error processing knowledge base"
)
if failed_files:
log.warning(f"Failed to process {len(failed_files)} files in knowledge base {knowledge_base.id}")
for failed in failed_files:
log.warning(f"File ID: {failed['file_id']}, Error: {failed['error']}")
log.info("Reindexing completed successfully")
return True
############################
@@ -676,3 +742,6 @@ def add_files_to_knowledge_batch(
return KnowledgeFilesResponse(
**knowledge.model_dump(), files=Files.get_files_by_ids(existing_file_ids)
)