mirror of
https://github.com/open-webui/open-webui.git
synced 2026-02-24 12:11:56 +01:00
fix(db): release connection before LLM call in OpenAI /chat/completions (#20572)
Remove Depends(get_session) from the /chat/completions endpoint to prevent database connections from being held during the entire duration of LLM calls (30-60+ seconds for streaming responses). Previously, the database session was acquired at request start and held until the streaming response completed. Under concurrent load, this exhausted the connection pool, causing QueuePool timeout errors for other database operations. The fix allows Models.get_model_by_id() and has_access() to manage their own short-lived sessions internally, releasing the connection immediately after the quick authorization checks complete - before the slow external LLM API call begins.
This commit is contained in:
@@ -801,8 +801,11 @@ async def generate_chat_completion(
|
||||
user=Depends(get_verified_user),
|
||||
bypass_filter: Optional[bool] = False,
|
||||
bypass_system_prompt: bool = False,
|
||||
db: Session = Depends(get_session),
|
||||
):
|
||||
# NOTE: We intentionally do NOT use Depends(get_session) here.
|
||||
# Database operations (get_model_by_id, has_access) manage their own short-lived sessions.
|
||||
# This prevents holding a connection during the entire LLM call (30-60+ seconds),
|
||||
# which would exhaust the connection pool under concurrent load.
|
||||
if BYPASS_MODEL_ACCESS_CONTROL:
|
||||
bypass_filter = True
|
||||
|
||||
@@ -812,7 +815,7 @@ async def generate_chat_completion(
|
||||
metadata = payload.pop("metadata", None)
|
||||
|
||||
model_id = form_data.get("model")
|
||||
model_info = Models.get_model_by_id(model_id, db=db)
|
||||
model_info = Models.get_model_by_id(model_id)
|
||||
|
||||
# Check model info and override the payload
|
||||
if model_info:
|
||||
@@ -842,7 +845,6 @@ async def generate_chat_completion(
|
||||
user.id,
|
||||
type="read",
|
||||
access_control=model_info.access_control,
|
||||
db=db,
|
||||
)
|
||||
):
|
||||
raise HTTPException(
|
||||
|
||||
Reference in New Issue
Block a user