Merge pull request #14311 from Hisma/marker-api-content-extraction

feat: Marker api content extraction support
This commit is contained in:
Tim Jaeryang Baek
2025-05-29 02:21:13 +04:00
committed by GitHub
6 changed files with 517 additions and 1 deletions

View File

@@ -212,6 +212,15 @@ from open_webui.config import (
CHUNK_OVERLAP,
CHUNK_SIZE,
CONTENT_EXTRACTION_ENGINE,
DATALAB_MARKER_API_KEY,
DATALAB_MARKER_LANGS,
DATALAB_MARKER_SKIP_CACHE,
DATALAB_MARKER_FORCE_OCR,
DATALAB_MARKER_PAGINATE,
DATALAB_MARKER_STRIP_EXISTING_OCR,
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
DATALAB_MARKER_OUTPUT_FORMAT,
DATALAB_MARKER_USE_LLM,
EXTERNAL_DOCUMENT_LOADER_URL,
EXTERNAL_DOCUMENT_LOADER_API_KEY,
TIKA_SERVER_URL,
@@ -666,6 +675,15 @@ app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERIFICATION
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
app.state.config.DATALAB_MARKER_API_KEY = DATALAB_MARKER_API_KEY
app.state.config.DATALAB_MARKER_LANGS = DATALAB_MARKER_LANGS
app.state.config.DATALAB_MARKER_SKIP_CACHE = DATALAB_MARKER_SKIP_CACHE
app.state.config.DATALAB_MARKER_FORCE_OCR = DATALAB_MARKER_FORCE_OCR
app.state.config.DATALAB_MARKER_PAGINATE = DATALAB_MARKER_PAGINATE
app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR = DATALAB_MARKER_STRIP_EXISTING_OCR
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY = EXTERNAL_DOCUMENT_LOADER_API_KEY
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL