diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 48ebddb7ef..50232a7f23 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -92,6 +92,7 @@ from open_webui.utils.misc import ( prepend_to_first_user_message_content, convert_logit_bias_input_to_json, get_content_from_message, + convert_output_to_messages, ) from open_webui.utils.tools import ( get_tools, @@ -1399,6 +1400,30 @@ async def convert_url_images_to_base64(form_data): return form_data +def process_messages_with_output(messages: list[dict]) -> list[dict]: + """ + Process messages with OR-aligned output items for LLM consumption. + + For assistant messages with 'output' field, produces properly formatted + OpenAI-style messages (tool_calls + tool results). Strips 'output' before LLM. + """ + processed = [] + + for message in messages: + if message.get("role") == "assistant" and message.get("output"): + # Use output items for clean OpenAI-format messages + output_messages = convert_output_to_messages(message["output"]) + if output_messages: + processed.extend(output_messages) + continue + + # Strip 'output' field before adding (LLM shouldn't see it) + clean_message = {k: v for k, v in message.items() if k != "output"} + processed.append(clean_message) + + return processed + + async def process_chat_payload(request, form_data, user, metadata, model): # Pipeline Inlet -> Filter Inlet -> Chat Memory -> Chat Web Search -> Chat Image Generation # -> Chat Code Interpreter (Form Data Update) -> (Default) Chat Tools Function Calling @@ -1407,6 +1432,9 @@ async def process_chat_payload(request, form_data, user, metadata, model): form_data = apply_params_to_form_data(form_data, model) log.debug(f"form_data: {form_data}") + # Process messages with OR-aligned output items for clean LLM messages + form_data["messages"] = process_messages_with_output(form_data.get("messages", [])) + system_message = get_system_message(form_data.get("messages", [])) if system_message: # Chat Controls/User Settings try: @@ -2517,6 +2545,86 @@ async def process_chat_response( return messages + def convert_content_blocks_to_output(content_blocks): + """ + Convert content_blocks to Open Responses-aligned output items. + See: https://openresponses.org/specification + """ + output_items = [] + item_counter = 0 + + def next_id(prefix): + nonlocal item_counter + item_counter += 1 + return f"{prefix}_{item_counter}" + + for block in content_blocks: + block_type = block.get("type", "") + + if block_type == "text": + text_content = block.get("content", "").strip() + if text_content: + output_items.append({ + "type": "message", + "id": next_id("msg"), + "status": "completed", + "role": "assistant", + "content": [{"type": "output_text", "text": text_content}], + }) + + elif block_type == "tool_calls": + tool_calls = block.get("content", []) + results = block.get("results", []) + + # Emit function_call items + for tool_call in tool_calls: + call_id = tool_call.get("id", "") + func = tool_call.get("function", {}) + output_items.append({ + "type": "function_call", + "id": next_id("fc"), + "call_id": call_id, + "name": func.get("name", ""), + "arguments": func.get("arguments", "{}"), + "status": "completed" if results else "in_progress", + }) + + # Emit function_call_output items + for result in results: + output_items.append({ + "type": "function_call_output", + "id": next_id("fco"), + "call_id": result.get("tool_call_id", ""), + "output": [{"type": "input_text", "text": result.get("content", "")}], + "status": "completed", + }) + + elif block_type == "reasoning": + reasoning_content = block.get("content", "").strip() + duration = block.get("duration") + output_items.append({ + "type": "reasoning", + "id": next_id("r"), + "status": "completed" if duration is not None else "in_progress", + "content": [{"type": "output_text", "text": reasoning_content}] if reasoning_content else None, + "summary": None, + }) + + elif block_type == "code_interpreter": + code = block.get("content", "") + output_val = block.get("output") + attrs = block.get("attributes", {}) + output_items.append({ + "type": "open_webui:code_interpreter", + "id": next_id("ci"), + "status": "completed" if output_val is not None else "in_progress", + "lang": attrs.get("lang", ""), + "code": code, + "output": output_val, + }) + + return output_items + def tag_content_handler(content_type, tags, content, content_blocks): end_flag = False @@ -3132,6 +3240,9 @@ async def process_chat_response( "content": serialize_content_blocks( content_blocks ), + "output": convert_content_blocks_to_output( + content_blocks + ), }, ) else: @@ -3221,6 +3332,7 @@ async def process_chat_response( "type": "chat:completion", "data": { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, } ) @@ -3400,6 +3512,7 @@ async def process_chat_response( "type": "chat:completion", "data": { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, } ) @@ -3446,6 +3559,7 @@ async def process_chat_response( "type": "chat:completion", "data": { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, } ) @@ -3577,6 +3691,7 @@ async def process_chat_response( "type": "chat:completion", "data": { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, } ) @@ -3616,6 +3731,7 @@ async def process_chat_response( data = { "done": True, "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), "title": title, } @@ -3626,6 +3742,7 @@ async def process_chat_response( metadata["message_id"], { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, ) @@ -3664,6 +3781,7 @@ async def process_chat_response( metadata["message_id"], { "content": serialize_content_blocks(content_blocks), + "output": convert_content_blocks_to_output(content_blocks), }, ) diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index e293f3d257..b931476ca9 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -128,6 +128,86 @@ def get_content_from_message(message: dict) -> Optional[str]: return None +def convert_output_to_messages(output: list) -> list[dict]: + """ + Convert OR-aligned output items to OpenAI-format messages for LLM consumption. + + This is the inverse of convert_content_blocks_to_output() in middleware.py. + """ + if not output or not isinstance(output, list): + return [] + + messages = [] + pending_tool_calls = [] + pending_content = [] + + for item in output: + item_type = item.get("type", "") + + if item_type == "message": + # Extract text from output_text content parts + content_parts = item.get("content", []) + text = "" + for part in content_parts: + if part.get("type") == "output_text": + text += part.get("text", "") + if text: + pending_content.append(text) + + elif item_type == "function_call": + # Collect tool calls to batch into assistant message + pending_tool_calls.append({ + "id": item.get("call_id", ""), + "type": "function", + "function": { + "name": item.get("name", ""), + "arguments": item.get("arguments", "{}"), + } + }) + + elif item_type == "function_call_output": + # Flush any pending content/tool_calls before adding tool result + if pending_content or pending_tool_calls: + messages.append({ + "role": "assistant", + "content": "\n".join(pending_content) if pending_content else "", + **({"tool_calls": pending_tool_calls} if pending_tool_calls else {}), + }) + pending_content = [] + pending_tool_calls = [] + + # Extract text from output content parts + output_parts = item.get("output", []) + content = "" + for part in output_parts: + if part.get("type") == "input_text": + content += part.get("text", "") + + messages.append({ + "role": "tool", + "tool_call_id": item.get("call_id", ""), + "content": content, + }) + + elif item_type == "reasoning": + # Skip reasoning blocks for LLM messages + pass + + elif item_type.startswith("open_webui:"): + # Skip extension types + pass + + # Flush remaining content/tool_calls + if pending_content or pending_tool_calls: + messages.append({ + "role": "assistant", + "content": "\n".join(pending_content) if pending_content else "", + **({"tool_calls": pending_tool_calls} if pending_tool_calls else {}), + }) + + return messages + + def get_last_user_message(messages: list[dict]) -> Optional[str]: message = get_last_user_message_item(messages) if message is None: diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index d7341257f2..2e9ca148b4 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -1403,7 +1403,12 @@ }; const chatCompletionEventHandler = async (data, message, chatId) => { - const { id, done, choices, content, sources, selected_model_id, error, usage } = data; + const { id, done, choices, content, output, sources, selected_model_id, error, usage } = data; + + // Store raw OR-aligned output items from backend + if (output) { + message.output = output; + } if (error) { await handleOpenAIError(error, message); @@ -1899,7 +1904,9 @@ : undefined, ..._messages.map((message) => ({ ...message, - content: processDetails(message.content) + content: processDetails(message.content), + // Include output for temp chats (backend will use it and strip before LLM) + ...(message.output ? { output: message.output } : {}) })) ].filter((message) => message);