mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
Merge pull request #15903 from Hisma/marker-api-update
feat: Add configurable API URL (for self-hosting) and additional_config parameter for Datalab Marker API
This commit is contained in:
@@ -2032,10 +2032,16 @@ DATALAB_MARKER_API_KEY = PersistentConfig(
|
||||
os.environ.get("DATALAB_MARKER_API_KEY", ""),
|
||||
)
|
||||
|
||||
DATALAB_MARKER_LANGS = PersistentConfig(
|
||||
"DATALAB_MARKER_LANGS",
|
||||
"rag.datalab_marker_langs",
|
||||
os.environ.get("DATALAB_MARKER_LANGS", ""),
|
||||
DATALAB_MARKER_API_BASE_URL = PersistentConfig(
|
||||
"DATALAB_MARKER_API_BASE_URL",
|
||||
"rag.datalab_marker_api_base_url",
|
||||
os.environ.get("DATALAB_MARKER_API_BASE_URL", ""),
|
||||
)
|
||||
|
||||
DATALAB_MARKER_ADDITIONAL_CONFIG = PersistentConfig(
|
||||
"DATALAB_MARKER_ADDITIONAL_CONFIG",
|
||||
"rag.datalab_marker_additional_config",
|
||||
os.environ.get("DATALAB_MARKER_ADDITIONAL_CONFIG", ""),
|
||||
)
|
||||
|
||||
DATALAB_MARKER_USE_LLM = PersistentConfig(
|
||||
@@ -2075,6 +2081,12 @@ DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = PersistentConfig(
|
||||
== "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_FORMAT_LINES = PersistentConfig(
|
||||
"DATALAB_MARKER_FORMAT_LINES",
|
||||
"rag.datalab_marker_format_lines",
|
||||
os.environ.get("DATALAB_MARKER_FORMAT_LINES", "false").lower() == "true",
|
||||
)
|
||||
|
||||
DATALAB_MARKER_OUTPUT_FORMAT = PersistentConfig(
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT",
|
||||
"rag.datalab_marker_output_format",
|
||||
|
||||
@@ -226,12 +226,14 @@ from open_webui.config import (
|
||||
CHUNK_SIZE,
|
||||
CONTENT_EXTRACTION_ENGINE,
|
||||
DATALAB_MARKER_API_KEY,
|
||||
DATALAB_MARKER_LANGS,
|
||||
DATALAB_MARKER_API_BASE_URL,
|
||||
DATALAB_MARKER_ADDITIONAL_CONFIG,
|
||||
DATALAB_MARKER_SKIP_CACHE,
|
||||
DATALAB_MARKER_FORCE_OCR,
|
||||
DATALAB_MARKER_PAGINATE,
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
DATALAB_MARKER_FORMAT_LINES,
|
||||
DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
DATALAB_MARKER_USE_LLM,
|
||||
EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
@@ -771,7 +773,8 @@ app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERI
|
||||
|
||||
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
|
||||
app.state.config.DATALAB_MARKER_API_KEY = DATALAB_MARKER_API_KEY
|
||||
app.state.config.DATALAB_MARKER_LANGS = DATALAB_MARKER_LANGS
|
||||
app.state.config.DATALAB_MARKER_API_BASE_URL = DATALAB_MARKER_API_BASE_URL
|
||||
app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG = DATALAB_MARKER_ADDITIONAL_CONFIG
|
||||
app.state.config.DATALAB_MARKER_SKIP_CACHE = DATALAB_MARKER_SKIP_CACHE
|
||||
app.state.config.DATALAB_MARKER_FORCE_OCR = DATALAB_MARKER_FORCE_OCR
|
||||
app.state.config.DATALAB_MARKER_PAGINATE = DATALAB_MARKER_PAGINATE
|
||||
@@ -779,6 +782,7 @@ app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR = DATALAB_MARKER_STRIP_EXISTI
|
||||
app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION = (
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||
)
|
||||
app.state.config.DATALAB_MARKER_FORMAT_LINES = DATALAB_MARKER_FORMAT_LINES
|
||||
app.state.config.DATALAB_MARKER_USE_LLM = DATALAB_MARKER_USE_LLM
|
||||
app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = DATALAB_MARKER_OUTPUT_FORMAT
|
||||
app.state.config.EXTERNAL_DOCUMENT_LOADER_URL = EXTERNAL_DOCUMENT_LOADER_URL
|
||||
|
||||
@@ -15,24 +15,28 @@ class DatalabMarkerLoader:
|
||||
self,
|
||||
file_path: str,
|
||||
api_key: str,
|
||||
langs: Optional[str] = None,
|
||||
api_base_url: str,
|
||||
additional_config: Optional[str] = None,
|
||||
use_llm: bool = False,
|
||||
skip_cache: bool = False,
|
||||
force_ocr: bool = False,
|
||||
paginate: bool = False,
|
||||
strip_existing_ocr: bool = False,
|
||||
disable_image_extraction: bool = False,
|
||||
format_lines: bool = False,
|
||||
output_format: str = None,
|
||||
):
|
||||
self.file_path = file_path
|
||||
self.api_key = api_key
|
||||
self.langs = langs
|
||||
self.api_base_url = api_base_url
|
||||
self.additional_config = additional_config
|
||||
self.use_llm = use_llm
|
||||
self.skip_cache = skip_cache
|
||||
self.force_ocr = force_ocr
|
||||
self.paginate = paginate
|
||||
self.strip_existing_ocr = strip_existing_ocr
|
||||
self.disable_image_extraction = disable_image_extraction
|
||||
self.format_lines = format_lines
|
||||
self.output_format = output_format
|
||||
|
||||
def _get_mime_type(self, filename: str) -> str:
|
||||
@@ -60,7 +64,7 @@ class DatalabMarkerLoader:
|
||||
return mime_map.get(ext, "application/octet-stream")
|
||||
|
||||
def check_marker_request_status(self, request_id: str) -> dict:
|
||||
url = f"https://www.datalab.to/api/v1/marker/{request_id}"
|
||||
url = f"{self.api_base_url}/{request_id}"
|
||||
headers = {"X-Api-Key": self.api_key}
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
@@ -81,22 +85,25 @@ class DatalabMarkerLoader:
|
||||
)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
url = "https://www.datalab.to/api/v1/marker"
|
||||
url = self.api_base_url
|
||||
filename = os.path.basename(self.file_path)
|
||||
mime_type = self._get_mime_type(filename)
|
||||
headers = {"X-Api-Key": self.api_key}
|
||||
|
||||
form_data = {
|
||||
"langs": self.langs,
|
||||
"use_llm": str(self.use_llm).lower(),
|
||||
"skip_cache": str(self.skip_cache).lower(),
|
||||
"force_ocr": str(self.force_ocr).lower(),
|
||||
"paginate": str(self.paginate).lower(),
|
||||
"strip_existing_ocr": str(self.strip_existing_ocr).lower(),
|
||||
"disable_image_extraction": str(self.disable_image_extraction).lower(),
|
||||
"format_lines": str(self.format_lines).lower(),
|
||||
"output_format": self.output_format,
|
||||
}
|
||||
|
||||
if self.additional_config and self.additional_config.strip():
|
||||
form_data["additional_config"] = self.additional_config
|
||||
|
||||
log.info(
|
||||
f"Datalab Marker POST request parameters: {{'filename': '{filename}', 'mime_type': '{mime_type}', **{form_data}}}"
|
||||
)
|
||||
@@ -133,74 +140,92 @@ class DatalabMarkerLoader:
|
||||
|
||||
check_url = result.get("request_check_url")
|
||||
request_id = result.get("request_id")
|
||||
if not check_url:
|
||||
raise HTTPException(
|
||||
status.HTTP_502_BAD_GATEWAY, detail="No request_check_url returned."
|
||||
)
|
||||
|
||||
for _ in range(300): # Up to 10 minutes
|
||||
time.sleep(2)
|
||||
try:
|
||||
poll_response = requests.get(check_url, headers=headers)
|
||||
poll_response.raise_for_status()
|
||||
poll_result = poll_response.json()
|
||||
except (requests.HTTPError, ValueError) as e:
|
||||
raw_body = poll_response.text
|
||||
log.error(f"Polling error: {e}, response body: {raw_body}")
|
||||
raise HTTPException(
|
||||
status.HTTP_502_BAD_GATEWAY, detail=f"Polling failed: {e}"
|
||||
)
|
||||
|
||||
status_val = poll_result.get("status")
|
||||
success_val = poll_result.get("success")
|
||||
|
||||
if status_val == "complete":
|
||||
summary = {
|
||||
k: poll_result.get(k)
|
||||
for k in (
|
||||
"status",
|
||||
"output_format",
|
||||
"success",
|
||||
"error",
|
||||
"page_count",
|
||||
"total_cost",
|
||||
# Check if this is a direct response (self-hosted) or polling response (DataLab)
|
||||
if check_url:
|
||||
# DataLab polling pattern
|
||||
for _ in range(300): # Up to 10 minutes
|
||||
time.sleep(2)
|
||||
try:
|
||||
poll_response = requests.get(check_url, headers=headers)
|
||||
poll_response.raise_for_status()
|
||||
poll_result = poll_response.json()
|
||||
except (requests.HTTPError, ValueError) as e:
|
||||
raw_body = poll_response.text
|
||||
log.error(f"Polling error: {e}, response body: {raw_body}")
|
||||
raise HTTPException(
|
||||
status.HTTP_502_BAD_GATEWAY, detail=f"Polling failed: {e}"
|
||||
)
|
||||
}
|
||||
log.info(
|
||||
f"Marker processing completed successfully: {json.dumps(summary, indent=2)}"
|
||||
)
|
||||
break
|
||||
|
||||
if status_val == "failed" or success_val is False:
|
||||
log.error(
|
||||
f"Marker poll failed full response: {json.dumps(poll_result, indent=2)}"
|
||||
)
|
||||
error_msg = (
|
||||
poll_result.get("error")
|
||||
or "Marker returned failure without error message"
|
||||
status_val = poll_result.get("status")
|
||||
success_val = poll_result.get("success")
|
||||
|
||||
if status_val == "complete":
|
||||
summary = {
|
||||
k: poll_result.get(k)
|
||||
for k in (
|
||||
"status",
|
||||
"output_format",
|
||||
"success",
|
||||
"error",
|
||||
"page_count",
|
||||
"total_cost",
|
||||
)
|
||||
}
|
||||
log.info(
|
||||
f"Marker processing completed successfully: {json.dumps(summary, indent=2)}"
|
||||
)
|
||||
break
|
||||
|
||||
if status_val == "failed" or success_val is False:
|
||||
log.error(
|
||||
f"Marker poll failed full response: {json.dumps(poll_result, indent=2)}"
|
||||
)
|
||||
error_msg = (
|
||||
poll_result.get("error")
|
||||
or "Marker returned failure without error message"
|
||||
)
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Marker processing failed: {error_msg}",
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status.HTTP_504_GATEWAY_TIMEOUT,
|
||||
detail="Marker processing timed out",
|
||||
)
|
||||
|
||||
if not poll_result.get("success", False):
|
||||
error_msg = poll_result.get("error") or "Unknown processing error"
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Marker processing failed: {error_msg}",
|
||||
detail=f"Final processing failed: {error_msg}",
|
||||
)
|
||||
|
||||
# DataLab format - content in format-specific fields
|
||||
content_key = self.output_format.lower()
|
||||
raw_content = poll_result.get(content_key)
|
||||
final_result = poll_result
|
||||
else:
|
||||
raise HTTPException(
|
||||
status.HTTP_504_GATEWAY_TIMEOUT, detail="Marker processing timed out"
|
||||
)
|
||||
# Self-hosted direct response - content in "output" field
|
||||
if "output" in result:
|
||||
log.info("Self-hosted Marker returned direct response without polling")
|
||||
raw_content = result.get("output")
|
||||
final_result = result
|
||||
else:
|
||||
available_fields = (
|
||||
list(result.keys())
|
||||
if isinstance(result, dict)
|
||||
else "non-dict response"
|
||||
)
|
||||
raise HTTPException(
|
||||
status.HTTP_502_BAD_GATEWAY,
|
||||
detail=f"Custom Marker endpoint returned success but no 'output' field found. Available fields: {available_fields}. Expected either 'request_check_url' for polling or 'output' field for direct response.",
|
||||
)
|
||||
|
||||
if not poll_result.get("success", False):
|
||||
error_msg = poll_result.get("error") or "Unknown processing error"
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Final processing failed: {error_msg}",
|
||||
)
|
||||
|
||||
content_key = self.output_format.lower()
|
||||
raw_content = poll_result.get(content_key)
|
||||
|
||||
if content_key == "json":
|
||||
if self.output_format.lower() == "json":
|
||||
full_text = json.dumps(raw_content, indent=2)
|
||||
elif content_key in {"markdown", "html"}:
|
||||
elif self.output_format.lower() in {"markdown", "html"}:
|
||||
full_text = str(raw_content).strip()
|
||||
else:
|
||||
raise HTTPException(
|
||||
@@ -211,14 +236,14 @@ class DatalabMarkerLoader:
|
||||
if not full_text:
|
||||
raise HTTPException(
|
||||
status.HTTP_400_BAD_REQUEST,
|
||||
detail="Datalab Marker returned empty content",
|
||||
detail="Marker returned empty content",
|
||||
)
|
||||
|
||||
marker_output_dir = os.path.join("/app/backend/data/uploads", "marker_output")
|
||||
os.makedirs(marker_output_dir, exist_ok=True)
|
||||
|
||||
file_ext_map = {"markdown": "md", "json": "json", "html": "html"}
|
||||
file_ext = file_ext_map.get(content_key, "txt")
|
||||
file_ext = file_ext_map.get(self.output_format.lower(), "txt")
|
||||
output_filename = f"{os.path.splitext(filename)[0]}.{file_ext}"
|
||||
output_path = os.path.join(marker_output_dir, output_filename)
|
||||
|
||||
@@ -231,13 +256,13 @@ class DatalabMarkerLoader:
|
||||
|
||||
metadata = {
|
||||
"source": filename,
|
||||
"output_format": poll_result.get("output_format", self.output_format),
|
||||
"page_count": poll_result.get("page_count", 0),
|
||||
"output_format": final_result.get("output_format", self.output_format),
|
||||
"page_count": final_result.get("page_count", 0),
|
||||
"processed_with_llm": self.use_llm,
|
||||
"request_id": request_id or "",
|
||||
}
|
||||
|
||||
images = poll_result.get("images", {})
|
||||
images = final_result.get("images", {})
|
||||
if images:
|
||||
metadata["image_count"] = len(images)
|
||||
metadata["images"] = json.dumps(list(images.keys()))
|
||||
|
||||
@@ -281,10 +281,15 @@ class Loader:
|
||||
"tiff",
|
||||
]
|
||||
):
|
||||
api_base_url = self.kwargs.get("DATALAB_MARKER_API_BASE_URL", "")
|
||||
if not api_base_url or api_base_url.strip() == "":
|
||||
api_base_url = "https://www.datalab.to/api/v1/marker"
|
||||
|
||||
loader = DatalabMarkerLoader(
|
||||
file_path=file_path,
|
||||
api_key=self.kwargs["DATALAB_MARKER_API_KEY"],
|
||||
langs=self.kwargs.get("DATALAB_MARKER_LANGS"),
|
||||
api_base_url=api_base_url,
|
||||
additional_config=self.kwargs.get("DATALAB_MARKER_ADDITIONAL_CONFIG"),
|
||||
use_llm=self.kwargs.get("DATALAB_MARKER_USE_LLM", False),
|
||||
skip_cache=self.kwargs.get("DATALAB_MARKER_SKIP_CACHE", False),
|
||||
force_ocr=self.kwargs.get("DATALAB_MARKER_FORCE_OCR", False),
|
||||
@@ -295,6 +300,7 @@ class Loader:
|
||||
disable_image_extraction=self.kwargs.get(
|
||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
|
||||
),
|
||||
format_lines=self.kwargs.get("DATALAB_MARKER_FORMAT_LINES", False),
|
||||
output_format=self.kwargs.get(
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
|
||||
),
|
||||
|
||||
@@ -401,12 +401,14 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
|
||||
"DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY,
|
||||
"DATALAB_MARKER_LANGS": request.app.state.config.DATALAB_MARKER_LANGS,
|
||||
"DATALAB_MARKER_API_BASE_URL": request.app.state.config.DATALAB_MARKER_API_BASE_URL,
|
||||
"DATALAB_MARKER_ADDITIONAL_CONFIG": request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
|
||||
"DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
|
||||
"DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR,
|
||||
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||
"DATALAB_MARKER_STRIP_EXISTING_OCR": request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION": request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
"DATALAB_MARKER_FORMAT_LINES": request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||
"DATALAB_MARKER_USE_LLM": request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||
"DATALAB_MARKER_OUTPUT_FORMAT": request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
"EXTERNAL_DOCUMENT_LOADER_URL": request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
@@ -566,12 +568,14 @@ class ConfigForm(BaseModel):
|
||||
CONTENT_EXTRACTION_ENGINE: Optional[str] = None
|
||||
PDF_EXTRACT_IMAGES: Optional[bool] = None
|
||||
DATALAB_MARKER_API_KEY: Optional[str] = None
|
||||
DATALAB_MARKER_LANGS: Optional[str] = None
|
||||
DATALAB_MARKER_API_BASE_URL: Optional[str] = None
|
||||
DATALAB_MARKER_ADDITIONAL_CONFIG: Optional[str] = None
|
||||
DATALAB_MARKER_SKIP_CACHE: Optional[bool] = None
|
||||
DATALAB_MARKER_FORCE_OCR: Optional[bool] = None
|
||||
DATALAB_MARKER_PAGINATE: Optional[bool] = None
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR: Optional[bool] = None
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION: Optional[bool] = None
|
||||
DATALAB_MARKER_FORMAT_LINES: Optional[bool] = None
|
||||
DATALAB_MARKER_USE_LLM: Optional[bool] = None
|
||||
DATALAB_MARKER_OUTPUT_FORMAT: Optional[str] = None
|
||||
EXTERNAL_DOCUMENT_LOADER_URL: Optional[str] = None
|
||||
@@ -683,10 +687,15 @@ async def update_rag_config(
|
||||
if form_data.DATALAB_MARKER_API_KEY is not None
|
||||
else request.app.state.config.DATALAB_MARKER_API_KEY
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_LANGS = (
|
||||
form_data.DATALAB_MARKER_LANGS
|
||||
if form_data.DATALAB_MARKER_LANGS is not None
|
||||
else request.app.state.config.DATALAB_MARKER_LANGS
|
||||
request.app.state.config.DATALAB_MARKER_API_BASE_URL = (
|
||||
form_data.DATALAB_MARKER_API_BASE_URL
|
||||
if form_data.DATALAB_MARKER_API_BASE_URL is not None
|
||||
else request.app.state.config.DATALAB_MARKER_API_BASE_URL
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG = (
|
||||
form_data.DATALAB_MARKER_ADDITIONAL_CONFIG
|
||||
if form_data.DATALAB_MARKER_ADDITIONAL_CONFIG is not None
|
||||
else request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_SKIP_CACHE = (
|
||||
form_data.DATALAB_MARKER_SKIP_CACHE
|
||||
@@ -713,6 +722,11 @@ async def update_rag_config(
|
||||
if form_data.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION is not None
|
||||
else request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_FORMAT_LINES = (
|
||||
form_data.DATALAB_MARKER_FORMAT_LINES
|
||||
if form_data.DATALAB_MARKER_FORMAT_LINES is not None
|
||||
else request.app.state.config.DATALAB_MARKER_FORMAT_LINES
|
||||
)
|
||||
request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT = (
|
||||
form_data.DATALAB_MARKER_OUTPUT_FORMAT
|
||||
if form_data.DATALAB_MARKER_OUTPUT_FORMAT is not None
|
||||
@@ -1006,7 +1020,8 @@ async def update_rag_config(
|
||||
"CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES,
|
||||
"DATALAB_MARKER_API_KEY": request.app.state.config.DATALAB_MARKER_API_KEY,
|
||||
"DATALAB_MARKER_LANGS": request.app.state.config.DATALAB_MARKER_LANGS,
|
||||
"DATALAB_MARKER_API_BASE_URL": request.app.state.config.DATALAB_MARKER_API_BASE_URL,
|
||||
"DATALAB_MARKER_ADDITIONAL_CONFIG": request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
|
||||
"DATALAB_MARKER_SKIP_CACHE": request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
|
||||
"DATALAB_MARKER_FORCE_OCR": request.app.state.config.DATALAB_MARKER_FORCE_OCR,
|
||||
"DATALAB_MARKER_PAGINATE": request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||
@@ -1393,12 +1408,14 @@ def process_file(
|
||||
loader = Loader(
|
||||
engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
DATALAB_MARKER_API_KEY=request.app.state.config.DATALAB_MARKER_API_KEY,
|
||||
DATALAB_MARKER_LANGS=request.app.state.config.DATALAB_MARKER_LANGS,
|
||||
DATALAB_MARKER_API_BASE_URL=request.app.state.config.DATALAB_MARKER_API_BASE_URL,
|
||||
DATALAB_MARKER_ADDITIONAL_CONFIG=request.app.state.config.DATALAB_MARKER_ADDITIONAL_CONFIG,
|
||||
DATALAB_MARKER_SKIP_CACHE=request.app.state.config.DATALAB_MARKER_SKIP_CACHE,
|
||||
DATALAB_MARKER_FORCE_OCR=request.app.state.config.DATALAB_MARKER_FORCE_OCR,
|
||||
DATALAB_MARKER_PAGINATE=request.app.state.config.DATALAB_MARKER_PAGINATE,
|
||||
DATALAB_MARKER_STRIP_EXISTING_OCR=request.app.state.config.DATALAB_MARKER_STRIP_EXISTING_OCR,
|
||||
DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION=request.app.state.config.DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION,
|
||||
DATALAB_MARKER_FORMAT_LINES=request.app.state.config.DATALAB_MARKER_FORMAT_LINES,
|
||||
DATALAB_MARKER_USE_LLM=request.app.state.config.DATALAB_MARKER_USE_LLM,
|
||||
DATALAB_MARKER_OUTPUT_FORMAT=request.app.state.config.DATALAB_MARKER_OUTPUT_FORMAT,
|
||||
EXTERNAL_DOCUMENT_LOADER_URL=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_URL,
|
||||
|
||||
Reference in New Issue
Block a user