mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
Merge remote-tracking branch 'origin' into logit_bias
This commit is contained in:
@@ -4,6 +4,7 @@ from open_webui.utils.misc import (
|
||||
)
|
||||
|
||||
from typing import Callable, Optional
|
||||
import json
|
||||
|
||||
|
||||
# inplace function: form_data is modified
|
||||
@@ -67,38 +68,49 @@ def apply_model_params_to_body_openai(params: dict, form_data: dict) -> dict:
|
||||
|
||||
|
||||
def apply_model_params_to_body_ollama(params: dict, form_data: dict) -> dict:
|
||||
opts = [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"seed",
|
||||
"mirostat",
|
||||
"mirostat_eta",
|
||||
"mirostat_tau",
|
||||
"num_ctx",
|
||||
"num_batch",
|
||||
"num_keep",
|
||||
"repeat_last_n",
|
||||
"tfs_z",
|
||||
"top_k",
|
||||
"min_p",
|
||||
"use_mmap",
|
||||
"use_mlock",
|
||||
"num_thread",
|
||||
"num_gpu",
|
||||
]
|
||||
mappings = {i: lambda x: x for i in opts}
|
||||
form_data = apply_model_params_to_body(params, form_data, mappings)
|
||||
|
||||
# Convert OpenAI parameter names to Ollama parameter names if needed.
|
||||
name_differences = {
|
||||
"max_tokens": "num_predict",
|
||||
"frequency_penalty": "repeat_penalty",
|
||||
}
|
||||
|
||||
for key, value in name_differences.items():
|
||||
if (param := params.get(key, None)) is not None:
|
||||
form_data[value] = param
|
||||
# Copy the parameter to new name then delete it, to prevent Ollama warning of invalid option provided
|
||||
params[value] = params[key]
|
||||
del params[key]
|
||||
|
||||
return form_data
|
||||
# See https://github.com/ollama/ollama/blob/main/docs/api.md#request-8
|
||||
mappings = {
|
||||
"temperature": float,
|
||||
"top_p": float,
|
||||
"seed": lambda x: x,
|
||||
"mirostat": int,
|
||||
"mirostat_eta": float,
|
||||
"mirostat_tau": float,
|
||||
"num_ctx": int,
|
||||
"num_batch": int,
|
||||
"num_keep": int,
|
||||
"num_predict": int,
|
||||
"repeat_last_n": int,
|
||||
"top_k": int,
|
||||
"min_p": float,
|
||||
"typical_p": float,
|
||||
"repeat_penalty": float,
|
||||
"presence_penalty": float,
|
||||
"frequency_penalty": float,
|
||||
"penalize_newline": bool,
|
||||
"stop": lambda x: [bytes(s, "utf-8").decode("unicode_escape") for s in x],
|
||||
"numa": bool,
|
||||
"num_gpu": int,
|
||||
"main_gpu": int,
|
||||
"low_vram": bool,
|
||||
"vocab_only": bool,
|
||||
"use_mmap": bool,
|
||||
"use_mlock": bool,
|
||||
"num_thread": int,
|
||||
}
|
||||
|
||||
return apply_model_params_to_body(params, form_data, mappings)
|
||||
|
||||
|
||||
def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]:
|
||||
@@ -109,11 +121,38 @@ def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]:
|
||||
new_message = {"role": message["role"]}
|
||||
|
||||
content = message.get("content", [])
|
||||
tool_calls = message.get("tool_calls", None)
|
||||
tool_call_id = message.get("tool_call_id", None)
|
||||
|
||||
# Check if the content is a string (just a simple message)
|
||||
if isinstance(content, str):
|
||||
if isinstance(content, str) and not tool_calls:
|
||||
# If the content is a string, it's pure text
|
||||
new_message["content"] = content
|
||||
|
||||
# If message is a tool call, add the tool call id to the message
|
||||
if tool_call_id:
|
||||
new_message["tool_call_id"] = tool_call_id
|
||||
|
||||
elif tool_calls:
|
||||
# If tool calls are present, add them to the message
|
||||
ollama_tool_calls = []
|
||||
for tool_call in tool_calls:
|
||||
ollama_tool_call = {
|
||||
"index": tool_call.get("index", 0),
|
||||
"id": tool_call.get("id", None),
|
||||
"function": {
|
||||
"name": tool_call.get("function", {}).get("name", ""),
|
||||
"arguments": json.loads(
|
||||
tool_call.get("function", {}).get("arguments", {})
|
||||
),
|
||||
},
|
||||
}
|
||||
ollama_tool_calls.append(ollama_tool_call)
|
||||
new_message["tool_calls"] = ollama_tool_calls
|
||||
|
||||
# Put the content to empty string (Ollama requires an empty string for tool calls)
|
||||
new_message["content"] = ""
|
||||
|
||||
else:
|
||||
# Otherwise, assume the content is a list of dicts, e.g., text followed by an image URL
|
||||
content_text = ""
|
||||
@@ -174,33 +213,28 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict:
|
||||
ollama_payload["format"] = openai_payload["format"]
|
||||
|
||||
# If there are advanced parameters in the payload, format them in Ollama's options field
|
||||
ollama_options = {}
|
||||
|
||||
if openai_payload.get("options"):
|
||||
ollama_payload["options"] = openai_payload["options"]
|
||||
ollama_options = openai_payload["options"]
|
||||
|
||||
# Handle parameters which map directly
|
||||
for param in ["temperature", "top_p", "seed"]:
|
||||
if param in openai_payload:
|
||||
ollama_options[param] = openai_payload[param]
|
||||
# Re-Mapping OpenAI's `max_tokens` -> Ollama's `num_predict`
|
||||
if "max_tokens" in ollama_options:
|
||||
ollama_options["num_predict"] = ollama_options["max_tokens"]
|
||||
del ollama_options[
|
||||
"max_tokens"
|
||||
] # To prevent Ollama warning of invalid option provided
|
||||
|
||||
# Mapping OpenAI's `max_tokens` -> Ollama's `num_predict`
|
||||
if "max_completion_tokens" in openai_payload:
|
||||
ollama_options["num_predict"] = openai_payload["max_completion_tokens"]
|
||||
elif "max_tokens" in openai_payload:
|
||||
ollama_options["num_predict"] = openai_payload["max_tokens"]
|
||||
# Ollama lacks a "system" prompt option. It has to be provided as a direct parameter, so we copy it down.
|
||||
if "system" in ollama_options:
|
||||
ollama_payload["system"] = ollama_options["system"]
|
||||
del ollama_options[
|
||||
"system"
|
||||
] # To prevent Ollama warning of invalid option provided
|
||||
|
||||
# Handle frequency / presence_penalty, which needs renaming and checking
|
||||
if "frequency_penalty" in openai_payload:
|
||||
ollama_options["repeat_penalty"] = openai_payload["frequency_penalty"]
|
||||
|
||||
if "presence_penalty" in openai_payload and "penalty" not in ollama_options:
|
||||
# We are assuming presence penalty uses a similar concept in Ollama, which needs custom handling if exists.
|
||||
ollama_options["new_topic_penalty"] = openai_payload["presence_penalty"]
|
||||
|
||||
# Add options to payload if any have been set
|
||||
if ollama_options:
|
||||
# If there is the "stop" parameter in the openai_payload, remap it to the ollama_payload.options
|
||||
if "stop" in openai_payload:
|
||||
ollama_options = ollama_payload.get("options", {})
|
||||
ollama_options["stop"] = openai_payload.get("stop")
|
||||
ollama_payload["options"] = ollama_options
|
||||
|
||||
if "metadata" in openai_payload:
|
||||
|
||||
Reference in New Issue
Block a user