This commit is contained in:
Timothy Jaeryang Baek
2026-05-09 15:46:33 +09:00
parent 75e72ea2f9
commit 3ccf263b10
2 changed files with 78 additions and 37 deletions

View File

@@ -2165,7 +2165,27 @@ async def load_messages_from_db(chat_id: str, message_id: str) -> Optional[list[
return [{k: v for k, v in msg.items() if k in ('role', 'content', 'output', 'files')} for msg in db_messages]
def process_messages_with_output(messages: list[dict]) -> list[dict]:
def get_reasoning_format(model: dict) -> str | None:
"""
Determine how reasoning should be included in reconstructed messages.
Returns:
'think_tags': Ollama expects <think> tags in content.
'reasoning_content': llama.cpp supports reasoning_content as a top-level field.
None: skip reasoning (safe default for strict providers).
"""
provider = model.get('provider', '')
if provider == 'ollama':
return 'think_tags'
if provider == 'llama.cpp':
return 'reasoning_content'
return None
def process_messages_with_output(
messages: list[dict],
reasoning_format: str | None = None,
) -> list[dict]:
"""
Process messages with OR-aligned output items for LLM consumption.
@@ -2177,7 +2197,9 @@ def process_messages_with_output(messages: list[dict]) -> list[dict]:
for message in messages:
if message.get('role') == 'assistant' and message.get('output'):
# Use output items for clean OpenAI-format messages
output_messages = convert_output_to_messages(message['output'], raw=True)
output_messages = convert_output_to_messages(
message['output'], raw=True, reasoning_format=reasoning_format,
)
if output_messages:
processed.extend(output_messages)
continue
@@ -2315,7 +2337,10 @@ async def process_chat_payload(request, form_data, user, metadata, model):
form_data['messages'].append({'role': 'user', 'content': regeneration_prompt})
# Process messages with OR-aligned output items for clean LLM messages
form_data['messages'] = process_messages_with_output(form_data.get('messages', []))
form_data['messages'] = process_messages_with_output(
form_data.get('messages', []),
reasoning_format=get_reasoning_format(model),
)
system_message = get_system_message(form_data.get('messages', []))
if system_message: # Chat Controls/User Settings
@@ -4741,10 +4766,10 @@ async def streaming_chat_response_handler(response, ctx):
system_message = get_system_message(form_data['messages'])
new_form_data['messages'] = (
[system_message] if system_message else []
) + convert_output_to_messages(output, raw=True)
) + convert_output_to_messages(output, raw=True, reasoning_format=get_reasoning_format(model))
new_form_data['previous_response_id'] = last_response_id
else:
tool_messages = convert_output_to_messages(output, raw=True)
tool_messages = convert_output_to_messages(output, raw=True, reasoning_format=get_reasoning_format(model))
# Chat Completions providers don't support multimodal
# tool messages. Extract images into a user message.
@@ -4964,7 +4989,7 @@ async def streaming_chat_response_handler(response, ctx):
'metadata': metadata,
'messages': [
*form_data['messages'],
*convert_output_to_messages(output, raw=True),
*convert_output_to_messages(output, raw=True, reasoning_format=get_reasoning_format(model)),
],
}

View File

@@ -129,7 +129,11 @@ def get_content_from_message(message: dict) -> Optional[str]:
return None
def convert_output_to_messages(output: list, raw: bool = False) -> list[dict]:
def convert_output_to_messages(
output: list,
raw: bool = False,
reasoning_format: str | None = None,
) -> list[dict]:
"""
Convert OR-aligned output items to OpenAI Chat Completion-format messages.
@@ -139,8 +143,14 @@ def convert_output_to_messages(output: list, raw: bool = False) -> list[dict]:
Args:
output: List of OR-aligned output items (Responses API format).
raw: If True, include reasoning blocks (with original tags) and code
interpreter blocks for LLM re-processing follow-ups.
raw: If True, include code interpreter blocks for LLM re-processing
follow-ups.
reasoning_format: How to include reasoning blocks in the output:
- None: skip reasoning (default, safe for strict providers).
- ``'think_tags'``: wrap in ``<think>`` tags inside content
(for Ollama, which expects reasoning as tagged content).
- ``'reasoning_content'``: set as ``reasoning_content`` top-level field
(for llama.cpp, which routes it via the chat template).
"""
if not output or not isinstance(output, list):
return []
@@ -148,19 +158,26 @@ def convert_output_to_messages(output: list, raw: bool = False) -> list[dict]:
messages = []
pending_tool_calls = []
pending_content = []
pending_reasoning = [] # Only populated when reasoning_format == 'reasoning_content'
def flush_pending():
nonlocal pending_content, pending_tool_calls
if pending_content or pending_tool_calls:
messages.append(
{
'role': 'assistant',
'content': '\n'.join(pending_content) if pending_content else '',
**({'tool_calls': pending_tool_calls} if pending_tool_calls else {}),
}
)
pending_content = []
pending_tool_calls = []
nonlocal pending_content, pending_tool_calls, pending_reasoning
if not pending_content and not pending_tool_calls and not pending_reasoning:
return
message = {
'role': 'assistant',
'content': '\n'.join(pending_content) if pending_content else '',
**({'tool_calls': pending_tool_calls} if pending_tool_calls else {}),
}
if pending_reasoning:
message['reasoning_content'] = '\n'.join(pending_reasoning)
messages.append(message)
pending_content = []
pending_tool_calls = []
pending_reasoning = []
for item in output:
item_type = item.get('type', '')
@@ -231,27 +248,26 @@ def convert_output_to_messages(output: list, raw: bool = False) -> list[dict]:
)
elif item_type == 'reasoning':
if raw:
# Include reasoning with original tags for LLM re-processing
reasoning_text = ''
source_list = item.get('summary', []) or item.get('content', [])
for part in source_list:
if part.get('type') == 'output_text':
reasoning_text += part.get('text', '')
elif 'text' in part:
reasoning_text += part.get('text', '')
if not reasoning_format:
continue
if reasoning_text:
reasoning_text = ''
source_list = item.get('summary', []) or item.get('content', [])
for part in source_list:
if part.get('type') == 'output_text':
reasoning_text += part.get('text', '')
elif 'text' in part:
reasoning_text += part.get('text', '')
if reasoning_text:
if reasoning_format == 'think_tags':
# Ollama: embed in content with the item's original tags
start_tag = item.get('start_tag', '<think>')
end_tag = item.get('end_tag', '</think>')
pending_content.append(f'{start_tag}{reasoning_text}{end_tag}')
# NOTE: Some providers (e.g. Moonshot/Kimi K2.5) require
# reasoning_content as a top-level field on assistant
# messages. This should be handled externally via a
# pipeline filter or connection-level middleware, not
# here — adding it universally breaks strict providers
# (OpenAI, Vertex AI, Azure) that reject unknown fields.
# else: skip reasoning blocks for normal LLM messages
elif reasoning_format == 'reasoning_content':
# llama.cpp: collect for reasoning_content field
pending_reasoning.append(reasoning_text)
elif item_type == 'open_webui:code_interpreter':
# Always include code interpreter content so the LLM knows