This commit is contained in:
Timothy J. Baek
2024-09-28 02:56:56 +02:00
parent b8b994a820
commit 9d2ed3d2be
4 changed files with 34 additions and 4 deletions

View File

@@ -725,8 +725,16 @@ def process_file(
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
)
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
raw_content = " ".join([doc.page_content for doc in docs])
print(raw_content)
raw_text_content = " ".join([doc.page_content for doc in docs])
Files.update_files_metadata_by_id(
form_data.file_id,
{
"content": {
"text": raw_text_content,
}
},
)
try:
result = save_docs_to_vector_db(