From a7e614ca4c915db6a2bbfe4bb7998dfb0a41d841 Mon Sep 17 00:00:00 2001 From: Henne <65833107+HennieLP@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:41:09 +0100 Subject: [PATCH] feat: Adds document intelligence model configuration (#19692) * Adds document intelligence model configuration Enables the configuration of the Document Intelligence model to be used by the RAG pipeline. This allows users to specify the model they want to use for document processing, providing flexibility and control over the extraction process. * Added Titel to Document Intelligence Model Config Added Titel to Document Intelligence Model Config --- backend/open_webui/config.py | 6 ++++++ backend/open_webui/main.py | 2 ++ backend/open_webui/retrieval/loaders/main.py | 2 ++ backend/open_webui/routers/retrieval.py | 9 +++++++++ src/lib/apis/retrieval/index.ts | 1 + src/lib/components/admin/Settings/Documents.svelte | 12 ++++++++++++ src/lib/i18n/locales/en-GB/translation.json | 2 ++ src/lib/i18n/locales/en-US/translation.json | 2 ++ 8 files changed, 36 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index a3a9050f78..41e88df5d2 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2590,6 +2590,12 @@ DOCUMENT_INTELLIGENCE_KEY = PersistentConfig( os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""), ) +DOCUMENT_INTELLIGENCE_MODEL = PersistentConfig( + "DOCUMENT_INTELLIGENCE_MODEL", + "rag.document_intelligence_model", + os.getenv("DOCUMENT_INTELLIGENCE_MODEL", "prebuilt-layout"), +) + MISTRAL_OCR_API_BASE_URL = PersistentConfig( "MISTRAL_OCR_API_BASE_URL", "rag.MISTRAL_OCR_API_BASE_URL", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 087dc5fb03..e1f3b39a3e 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -273,6 +273,7 @@ from open_webui.config import ( DOCLING_PARAMS, DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY, RAG_TEXT_SPLITTER, @@ -871,6 +872,7 @@ app.state.config.DOCLING_API_KEY = DOCLING_API_KEY app.state.config.DOCLING_PARAMS = DOCLING_PARAMS app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY +app.state.config.DOCUMENT_INTELLIGENCE_MODEL = DOCUMENT_INTELLIGENCE_MODEL app.state.config.MISTRAL_OCR_API_BASE_URL = MISTRAL_OCR_API_BASE_URL app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY app.state.config.MINERU_API_MODE = MINERU_API_MODE diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index fcc507e088..1346cd065c 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -322,12 +322,14 @@ class Loader: file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) else: loader = AzureAIDocumentIntelligenceLoader( file_path=file_path, api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), azure_credential=DefaultAzureCredential(), + api_model=self.kwargs.get("DOCUMENT_INTELLIGENCE_MODEL"), ) elif self.engine == "mineru" and file_ext in [ "pdf" diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 190f001edd..b7ed993895 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -468,6 +468,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -647,6 +648,7 @@ class ConfigForm(BaseModel): DOCLING_PARAMS: Optional[dict] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None + DOCUMENT_INTELLIGENCE_MODEL: Optional[str] = None MISTRAL_OCR_API_BASE_URL: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None @@ -842,6 +844,11 @@ async def update_rag_config( if form_data.DOCUMENT_INTELLIGENCE_KEY is not None else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY ) + request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL = ( + form_data.DOCUMENT_INTELLIGENCE_MODEL + if form_data.DOCUMENT_INTELLIGENCE_MODEL is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL + ) request.app.state.config.MISTRAL_OCR_API_BASE_URL = ( form_data.MISTRAL_OCR_API_BASE_URL @@ -1131,6 +1138,7 @@ async def update_rag_config( "DOCLING_PARAMS": request.app.state.config.DOCLING_PARAMS, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "DOCUMENT_INTELLIGENCE_MODEL": request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, "MISTRAL_OCR_API_BASE_URL": request.app.state.config.MISTRAL_OCR_API_BASE_URL, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, # MinerU settings @@ -1543,6 +1551,7 @@ def process_file( PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + DOCUMENT_INTELLIGENCE_MODEL=request.app.state.config.DOCUMENT_INTELLIGENCE_MODEL, MISTRAL_OCR_API_BASE_URL=request.app.state.config.MISTRAL_OCR_API_BASE_URL, MISTRAL_OCR_API_KEY=request.app.state.config.MISTRAL_OCR_API_KEY, MINERU_API_MODE=request.app.state.config.MINERU_API_MODE, diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 5cb0f60a72..75065910d6 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -35,6 +35,7 @@ type ChunkConfigForm = { type DocumentIntelligenceConfigForm = { key: string; endpoint: string; + model: string; }; type ContentExtractConfigForm = { diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 26c23028ed..0b9accd4bf 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -597,6 +597,18 @@ required={false} /> +