mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
feat: docling support for document preprocessing
This commit is contained in:
@@ -351,6 +351,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"content_extraction": {
|
||||
"engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"tika_server_url": request.app.state.config.TIKA_SERVER_URL,
|
||||
"docling_server_url": request.app.state.config.DOCLING_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": request.app.state.config.TEXT_SPLITTER,
|
||||
@@ -403,6 +404,7 @@ class FileConfig(BaseModel):
|
||||
class ContentExtractionConfig(BaseModel):
|
||||
engine: str = ""
|
||||
tika_server_url: Optional[str] = None
|
||||
docling_server_url: Optional[str] = None
|
||||
|
||||
|
||||
class ChunkParamUpdateForm(BaseModel):
|
||||
@@ -483,6 +485,9 @@ async def update_rag_config(
|
||||
request.app.state.config.TIKA_SERVER_URL = (
|
||||
form_data.content_extraction.tika_server_url
|
||||
)
|
||||
request.app.state.config.DOCLING_SERVER_URL = (
|
||||
form_data.content_extraction.docling_server_url
|
||||
)
|
||||
|
||||
if form_data.chunk is not None:
|
||||
request.app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter
|
||||
@@ -559,6 +564,7 @@ async def update_rag_config(
|
||||
"content_extraction": {
|
||||
"engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"tika_server_url": request.app.state.config.TIKA_SERVER_URL,
|
||||
"docling_server_url": request.app.state.config.DOCLING_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": request.app.state.config.TEXT_SPLITTER,
|
||||
@@ -879,6 +885,7 @@ def process_file(
|
||||
loader = Loader(
|
||||
engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL,
|
||||
DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL,
|
||||
PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
|
||||
)
|
||||
docs = loader.load(
|
||||
|
||||
Reference in New Issue
Block a user