enh/refac: url input handling

This commit is contained in:
Timothy Jaeryang Baek
2025-10-04 02:02:26 -05:00
parent ce83276fa4
commit a2a2bafdf6
5 changed files with 82 additions and 41 deletions

View File

@@ -40,7 +40,10 @@ from open_webui.routers.tasks import (
generate_image_prompt,
generate_chat_tags,
)
from open_webui.routers.retrieval import process_web_search, SearchForm
from open_webui.routers.retrieval import (
process_web_search,
SearchForm,
)
from open_webui.routers.images import (
load_b64_image_data,
image_generations,
@@ -76,6 +79,7 @@ from open_webui.utils.task import (
)
from open_webui.utils.misc import (
deep_update,
extract_urls,
get_message_list,
add_or_update_system_message,
add_or_update_user_message,
@@ -823,7 +827,11 @@ async def chat_completion_files_handler(
if files := body.get("metadata", {}).get("files", None):
# Check if all files are in full context mode
all_full_context = all(item.get("context") == "full" for item in files)
all_full_context = all(
item.get("context") == "full"
for item in files
if item.get("type") == "file"
)
queries = []
if not all_full_context:
@@ -855,10 +863,6 @@ async def chat_completion_files_handler(
except:
pass
if len(queries) == 0:
queries = [get_last_user_message(body["messages"])]
if not all_full_context:
await __event_emitter__(
{
"type": "status",
@@ -870,6 +874,9 @@ async def chat_completion_files_handler(
}
)
if len(queries) == 0:
queries = [get_last_user_message(body["messages"])]
try:
# Offload get_sources_from_items to a separate thread
loop = asyncio.get_running_loop()
@@ -908,7 +915,6 @@ async def chat_completion_files_handler(
log.debug(f"rag_contexts:sources: {sources}")
unique_ids = set()
for source in sources or []:
if not source or len(source.keys()) == 0:
continue
@@ -927,7 +933,6 @@ async def chat_completion_files_handler(
unique_ids.add(_id)
sources_count = len(unique_ids)
await __event_emitter__(
{
"type": "status",
@@ -1170,8 +1175,15 @@ async def process_chat_payload(request, form_data, user, metadata, model):
tool_ids = form_data.pop("tool_ids", None)
files = form_data.pop("files", None)
# Remove files duplicates
if files:
prompt = get_last_user_message(form_data["messages"])
urls = extract_urls(prompt)
if files or urls:
if not files:
files = []
files = [*files, *[{"type": "url", "url": url, "name": url} for url in urls]]
# Remove duplicate files based on their content
files = list({json.dumps(f, sort_keys=True): f for f in files}.values())
metadata = {
@@ -1372,8 +1384,6 @@ async def process_chat_payload(request, form_data, user, metadata, model):
)
context_string = context_string.strip()
prompt = get_last_user_message(form_data["messages"])
if prompt is None:
raise Exception("No user message found")