feat: Marker api content extraction support

This commit is contained in:
Hisma
2025-05-27 00:44:07 -04:00
parent b8e16211b9
commit a9405cc101
6 changed files with 516 additions and 1 deletions

View File

@@ -1848,6 +1848,60 @@ CONTENT_EXTRACTION_ENGINE = PersistentConfig(
os.environ.get("CONTENT_EXTRACTION_ENGINE", "").lower(),
)
DATALAB_MARKER_API_KEY = PersistentConfig(
"DATALAB_MARKER_API_KEY",
"rag.datalab_marker_api_key",
os.environ.get("DATALAB_MARKER_API_KEY", ""),
)
DATALAB_MARKER_LANGS = PersistentConfig(
"DATALAB_MARKER_LANGS",
"rag.datalab_marker_langs",
os.environ.get("DATALAB_MARKER_LANGS", ""),
)
DATALAB_MARKER_USE_LLM = PersistentConfig(
"DATALAB_MARKER_USE_LLM",
"rag.DATALAB_MARKER_USE_LLM",
os.environ.get("DATALAB_MARKER_USE_LLM", "false") == "true",
)
DATALAB_MARKER_SKIP_CACHE = PersistentConfig(
"DATALAB_MARKER_SKIP_CACHE",
"rag.datalab_marker_skip_cache",
os.environ.get("DATALAB_MARKER_SKIP_CACHE", "false") == "true",
)
DATALAB_MARKER_FORCE_OCR = PersistentConfig(
"DATALAB_MARKER_FORCE_OCR",
"rag.datalab_marker_force_ocr",
os.environ.get("DATALAB_MARKER_FORCE_OCR", "false") == "true",
)
DATALAB_MARKER_PAGINATE = PersistentConfig(
"DATALAB_MARKER_PAGINATE",
"rag.datalab_marker_paginate",
os.environ.get("DATALAB_MARKER_PAGINATE", "false") == "true",
)
DATALAB_MARKER_STRIP_EXISTING_OCR = PersistentConfig(
"DATALAB_MARKER_STRIP_EXISTING_OCR",
"rag.datalab_marker_strip_existing_ocr",
os.environ.get("DATALAB_MARKER_STRIP_EXISTING_OCR", "false") == "true",
)
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION",
"rag.datalab_marker_disable_image_extraction",
os.environ.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", "false") == "true",
)
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
"DATALAB_MARKER_OUTPUT_FORMAT",
"rag.datalab_marker_output_format",
os.environ.get("DATALAB_MARKER_OUTPUT_FORMAT", ""),
)
EXTERNAL_DOCUMENT_LOADER_URL = PersistentConfig(
"EXTERNAL_DOCUMENT_LOADER_URL",
"rag.external_document_loader_url",