From 24044b42ea97f8fd855472b2c0abc497a813843b Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Sun, 11 Jan 2026 20:35:38 +0100
Subject: [PATCH] fix(db): release connection before LLM call in Ollama
 /v1/chat/completions (#20569)

Remove Depends(get_session) from the /v1/chat/completions endpoint to prevent database connections from being held during the entire duration of LLM calls.

Previously, the database session was acquired at request start and held until the streaming response completed. Under concurrent load, this exhausted the connection pool, causing QueuePool timeout errors.

The fix allows Models.get_model_by_id() and has_access() to manage their own short-lived sessions internally, releasing the connection immediately after authorization checks complete.
---
 backend/open_webui/routers/ollama.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py
index b269aa329a..9cfff0c9de 100644
--- a/backend/open_webui/routers/ollama.py
+++ b/backend/open_webui/routers/ollama.py
@@ -1464,8 +1464,11 @@ async def generate_openai_chat_completion(
     form_data: dict,
     url_idx: Optional[int] = None,
     user=Depends(get_verified_user),
-    db: Session = Depends(get_session),
 ):
+    # NOTE: We intentionally do NOT use Depends(get_session) here.
+    # Database operations (get_model_by_id, has_access) manage their own short-lived sessions.
+    # This prevents holding a connection during the entire LLM call (30-60+ seconds),
+    # which would exhaust the connection pool under concurrent load.
     metadata = form_data.pop("metadata", None)
 
     try:
@@ -1485,7 +1488,7 @@ async def generate_openai_chat_completion(
     if ":" not in model_id:
         model_id = f"{model_id}:latest"
 
-    model_info = Models.get_model_by_id(model_id, db=db)
+    model_info = Models.get_model_by_id(model_id)
     if model_info:
         if model_info.base_model_id:
             payload["model"] = model_info.base_model_id
@@ -1506,7 +1509,6 @@ async def generate_openai_chat_completion(
                     user.id,
                     type="read",
                     access_control=model_info.access_control,
-                    db=db,
                 )
             ):
                 raise HTTPException(