mirror of
https://github.com/open-webui/open-webui.git
synced 2026-02-24 04:00:31 +01:00
refac
This commit is contained in:
@@ -121,6 +121,27 @@ def has_access_to_file(
|
||||
############################
|
||||
|
||||
|
||||
def _is_text_file(file_path: str, chunk_size: int = 8192) -> bool:
|
||||
"""Check if a file is likely a text file by reading a chunk and validating UTF-8.
|
||||
|
||||
This catches files whose extensions are mis-mapped by mimetypes/browsers
|
||||
(e.g. TypeScript .ts → video/mp2t) without maintaining an extension whitelist.
|
||||
"""
|
||||
try:
|
||||
resolved = Storage.get_file(file_path)
|
||||
with open(resolved, "rb") as f:
|
||||
chunk = f.read(chunk_size)
|
||||
if not chunk:
|
||||
return False
|
||||
# Null bytes are a strong indicator of binary content
|
||||
if b"\x00" in chunk:
|
||||
return False
|
||||
chunk.decode("utf-8")
|
||||
return True
|
||||
except (UnicodeDecodeError, Exception):
|
||||
return False
|
||||
|
||||
|
||||
def process_uploaded_file(
|
||||
request,
|
||||
file,
|
||||
@@ -132,13 +153,20 @@ def process_uploaded_file(
|
||||
):
|
||||
def _process_handler(db_session):
|
||||
try:
|
||||
if file.content_type:
|
||||
content_type = file.content_type
|
||||
|
||||
# Detect mis-labeled text files (e.g. .ts → video/mp2t)
|
||||
if content_type and content_type.startswith(("image/", "video/")):
|
||||
if _is_text_file(file_path):
|
||||
content_type = "text/plain"
|
||||
|
||||
if content_type:
|
||||
stt_supported_content_types = getattr(
|
||||
request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", []
|
||||
)
|
||||
|
||||
if strict_match_mime_type(
|
||||
stt_supported_content_types, file.content_type
|
||||
stt_supported_content_types, content_type
|
||||
):
|
||||
file_path_processed = Storage.get_file(file_path)
|
||||
result = transcribe(
|
||||
@@ -153,7 +181,7 @@ def process_uploaded_file(
|
||||
user=user,
|
||||
db=db_session,
|
||||
)
|
||||
elif (not file.content_type.startswith(("image/", "video/"))) or (
|
||||
elif (not content_type.startswith(("image/", "video/"))) or (
|
||||
request.app.state.config.CONTENT_EXTRACTION_ENGINE == "external"
|
||||
):
|
||||
process_file(
|
||||
@@ -164,7 +192,7 @@ def process_uploaded_file(
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
f"File type {file.content_type} is not supported for processing"
|
||||
f"File type {content_type} is not supported for processing"
|
||||
)
|
||||
else:
|
||||
log.info(
|
||||
|
||||
Reference in New Issue
Block a user