Added support for using Apache Tika as a document loader.

Added persistent configuration options to configure use and location of Tika service.

Updated backend.apps.rag.main:get_loader() to make use of Tika document loader.
This commit is contained in:
Nicko van Someren
2024-06-30 15:49:15 -06:00
parent 7bc88eb00d
commit 9cf622d981
3 changed files with 104 additions and 41 deletions

View File

@@ -878,6 +878,22 @@ WEBUI_SESSION_COOKIE_SECURE = os.environ.get(
if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
raise ValueError(ERROR_MESSAGES.ENV_VAR_NOT_FOUND)
####################################
# RAG document text extraction
####################################
DOCUMENT_USE_TIKA = PersistentConfig(
"DOCUMENT_USE_TIKA",
"rag.document_use_tika",
os.environ.get("DOCUMENT_USE_TIKA", "false").lower() == "true"
)
TIKA_SERVER_URL = PersistentConfig(
"TIKA_SERVER_URL",
"rag.tika_server_url",
os.getenv("TIKA_SERVER_URL", "http://tika:9998"), # Default for sidecar deployment
)
####################################
# RAG
####################################