From d0c2bfdbff2b12e8190379cef8f442b1cf210470 Mon Sep 17 00:00:00 2001
From: Classic298 <27028174+Classic298@users.noreply.github.com>
Date: Sun, 11 Jan 2026 20:34:11 +0100
Subject: [PATCH] fix(db): release connection before LLM call in OpenAI
 /chat/completions (#20572)

Remove Depends(get_session) from the /chat/completions endpoint to prevent database connections from being held during the entire duration of LLM calls (30-60+ seconds for streaming responses).

Previously, the database session was acquired at request start and held until the streaming response completed. Under concurrent load, this exhausted the connection pool, causing QueuePool timeout errors for other database operations.

The fix allows Models.get_model_by_id() and has_access() to manage their own short-lived sessions internally, releasing the connection immediately after the quick authorization checks complete - before the slow external LLM API call begins.
---
 backend/open_webui/routers/openai.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py
index ec4ce2f4a8..44575e57f2 100644
--- a/backend/open_webui/routers/openai.py
+++ b/backend/open_webui/routers/openai.py
@@ -801,8 +801,11 @@ async def generate_chat_completion(
     user=Depends(get_verified_user),
     bypass_filter: Optional[bool] = False,
     bypass_system_prompt: bool = False,
-    db: Session = Depends(get_session),
 ):
+    # NOTE: We intentionally do NOT use Depends(get_session) here.
+    # Database operations (get_model_by_id, has_access) manage their own short-lived sessions.
+    # This prevents holding a connection during the entire LLM call (30-60+ seconds),
+    # which would exhaust the connection pool under concurrent load.
     if BYPASS_MODEL_ACCESS_CONTROL:
         bypass_filter = True
 
@@ -812,7 +815,7 @@ async def generate_chat_completion(
     metadata = payload.pop("metadata", None)
 
     model_id = form_data.get("model")
-    model_info = Models.get_model_by_id(model_id, db=db)
+    model_info = Models.get_model_by_id(model_id)
 
     # Check model info and override the payload
     if model_info:
@@ -842,7 +845,6 @@ async def generate_chat_completion(
                     user.id,
                     type="read",
                     access_control=model_info.access_control,
-                    db=db,
                 )
             ):
                 raise HTTPException(