diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py index 241c598269..52539860aa 100644 --- a/backend/open_webui/utils/response.py +++ b/backend/open_webui/utils/response.py @@ -65,7 +65,19 @@ def convert_ollama_tool_call_to_openai(tool_calls: list) -> list: def convert_ollama_usage_to_openai(data: dict) -> dict: + input_tokens = int(data.get("prompt_eval_count", 0)) + output_tokens = int(data.get("eval_count", 0)) + total_tokens = input_tokens + output_tokens + return { + # Standardized fields + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "total_tokens": total_tokens, + # OpenAI-compatible fields (for backward compatibility) + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + # Ollama-specific metrics "response_token/s": ( round( ( @@ -97,22 +109,13 @@ def convert_ollama_usage_to_openai(data: dict) -> dict: "total_duration": data.get("total_duration", 0), "load_duration": data.get("load_duration", 0), "prompt_eval_count": data.get("prompt_eval_count", 0), - "prompt_tokens": int( - data.get("prompt_eval_count", 0) - ), # This is the OpenAI compatible key "prompt_eval_duration": data.get("prompt_eval_duration", 0), "eval_count": data.get("eval_count", 0), - "completion_tokens": int( - data.get("eval_count", 0) - ), # This is the OpenAI compatible key "eval_duration": data.get("eval_duration", 0), "approximate_total": (lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s")( (data.get("total_duration", 0) or 0) // 1_000_000_000 ), - "total_tokens": int( # This is the OpenAI compatible key - data.get("prompt_eval_count", 0) + data.get("eval_count", 0) - ), - "completion_tokens_details": { # This is the OpenAI compatible key + "completion_tokens_details": { "reasoning_tokens": 0, "accepted_prediction_tokens": 0, "rejected_prediction_tokens": 0,