Merge remote-tracking branch 'origin' into logit_bias

2025-12-16 11:57:51 +01:00 · 2025-02-27 23:48:22 -08:00
parent 90aa29528c 95cadaca72
commit f4bd094128
181 changed files with 10428 additions and 5218 deletions
--- a/backend/open_webui/utils/payload.py
+++ b/backend/open_webui/utils/payload.py
@@ -4,6 +4,7 @@ from open_webui.utils.misc import (
 )

 from typing import Callable, Optional
+import json


 # inplace function: form_data is modified
@@ -67,38 +68,49 @@ def apply_model_params_to_body_openai(params: dict, form_data: dict) -> dict:


 def apply_model_params_to_body_ollama(params: dict, form_data: dict) -> dict:
-    opts = [
-        "temperature",
-        "top_p",
-        "seed",
-        "mirostat",
-        "mirostat_eta",
-        "mirostat_tau",
-        "num_ctx",
-        "num_batch",
-        "num_keep",
-        "repeat_last_n",
-        "tfs_z",
-        "top_k",
-        "min_p",
-        "use_mmap",
-        "use_mlock",
-        "num_thread",
-        "num_gpu",
-    ]
-    mappings = {i: lambda x: x for i in opts}
-    form_data = apply_model_params_to_body(params, form_data, mappings)
-
+    # Convert OpenAI parameter names to Ollama parameter names if needed.
    name_differences = {
        "max_tokens": "num_predict",
-        "frequency_penalty": "repeat_penalty",
    }

    for key, value in name_differences.items():
        if (param := params.get(key, None)) is not None:
-            form_data[value] = param
+            # Copy the parameter to new name then delete it, to prevent Ollama warning of invalid option provided
+            params[value] = params[key]
+            del params[key]

-    return form_data
+    # See https://github.com/ollama/ollama/blob/main/docs/api.md#request-8
+    mappings = {
+        "temperature": float,
+        "top_p": float,
+        "seed": lambda x: x,
+        "mirostat": int,
+        "mirostat_eta": float,
+        "mirostat_tau": float,
+        "num_ctx": int,
+        "num_batch": int,
+        "num_keep": int,
+        "num_predict": int,
+        "repeat_last_n": int,
+        "top_k": int,
+        "min_p": float,
+        "typical_p": float,
+        "repeat_penalty": float,
+        "presence_penalty": float,
+        "frequency_penalty": float,
+        "penalize_newline": bool,
+        "stop": lambda x: [bytes(s, "utf-8").decode("unicode_escape") for s in x],
+        "numa": bool,
+        "num_gpu": int,
+        "main_gpu": int,
+        "low_vram": bool,
+        "vocab_only": bool,
+        "use_mmap": bool,
+        "use_mlock": bool,
+        "num_thread": int,
+    }
+
+    return apply_model_params_to_body(params, form_data, mappings)


 def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]:
@@ -109,11 +121,38 @@ def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]:
        new_message = {"role": message["role"]}

        content = message.get("content", [])
+        tool_calls = message.get("tool_calls", None)
+        tool_call_id = message.get("tool_call_id", None)

        # Check if the content is a string (just a simple message)
-        if isinstance(content, str):
+        if isinstance(content, str) and not tool_calls:
            # If the content is a string, it's pure text
            new_message["content"] = content
+
+            # If message is a tool call, add the tool call id to the message
+            if tool_call_id:
+                new_message["tool_call_id"] = tool_call_id
+
+        elif tool_calls:
+            # If tool calls are present, add them to the message
+            ollama_tool_calls = []
+            for tool_call in tool_calls:
+                ollama_tool_call = {
+                    "index": tool_call.get("index", 0),
+                    "id": tool_call.get("id", None),
+                    "function": {
+                        "name": tool_call.get("function", {}).get("name", ""),
+                        "arguments": json.loads(
+                            tool_call.get("function", {}).get("arguments", {})
+                        ),
+                    },
+                }
+                ollama_tool_calls.append(ollama_tool_call)
+            new_message["tool_calls"] = ollama_tool_calls
+
+            # Put the content to empty string (Ollama requires an empty string for tool calls)
+            new_message["content"] = ""
+
        else:
            # Otherwise, assume the content is a list of dicts, e.g., text followed by an image URL
            content_text = ""
@@ -174,33 +213,28 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict:
        ollama_payload["format"] = openai_payload["format"]

    # If there are advanced parameters in the payload, format them in Ollama's options field
-    ollama_options = {}
-
    if openai_payload.get("options"):
        ollama_payload["options"] = openai_payload["options"]
        ollama_options = openai_payload["options"]

-    # Handle parameters which map directly
-    for param in ["temperature", "top_p", "seed"]:
-        if param in openai_payload:
-            ollama_options[param] = openai_payload[param]
+        # Re-Mapping OpenAI's `max_tokens` -> Ollama's `num_predict`
+        if "max_tokens" in ollama_options:
+            ollama_options["num_predict"] = ollama_options["max_tokens"]
+            del ollama_options[
+                "max_tokens"
+            ]  # To prevent Ollama warning of invalid option provided

-    # Mapping OpenAI's `max_tokens` -> Ollama's `num_predict`
-    if "max_completion_tokens" in openai_payload:
-        ollama_options["num_predict"] = openai_payload["max_completion_tokens"]
-    elif "max_tokens" in openai_payload:
-        ollama_options["num_predict"] = openai_payload["max_tokens"]
+        # Ollama lacks a "system" prompt option. It has to be provided as a direct parameter, so we copy it down.
+        if "system" in ollama_options:
+            ollama_payload["system"] = ollama_options["system"]
+            del ollama_options[
+                "system"
+            ]  # To prevent Ollama warning of invalid option provided

-    # Handle frequency / presence_penalty, which needs renaming and checking
-    if "frequency_penalty" in openai_payload:
-        ollama_options["repeat_penalty"] = openai_payload["frequency_penalty"]
-
-    if "presence_penalty" in openai_payload and "penalty" not in ollama_options:
-        # We are assuming presence penalty uses a similar concept in Ollama, which needs custom handling if exists.
-        ollama_options["new_topic_penalty"] = openai_payload["presence_penalty"]
-
-    # Add options to payload if any have been set
-    if ollama_options:
+    # If there is the "stop" parameter in the openai_payload, remap it to the ollama_payload.options
+    if "stop" in openai_payload:
+        ollama_options = ollama_payload.get("options", {})
+        ollama_options["stop"] = openai_payload.get("stop")
        ollama_payload["options"] = ollama_options

    if "metadata" in openai_payload: