fix(db): release connection before LLM call in Ollama /v1/completions (#20570)

Remove Depends(get_session) from the /v1/completions endpoint to prevent database connections from being held during the entire duration of LLM calls.

Previously, the database session was acquired at request start and held until the response completed. Under concurrent load, this exhausted the connection pool, causing QueuePool timeout errors.

The fix allows Models.get_model_by_id() and has_access() to manage their own short-lived sessions internally, releasing the connection immediately after authorization checks complete.
This commit is contained in:
Classic298
2026-01-11 20:35:46 +01:00
committed by GitHub
parent 24044b42ea
commit 9e596f8616

View File

@@ -1381,8 +1381,11 @@ async def generate_openai_completion(
form_data: dict,
url_idx: Optional[int] = None,
user=Depends(get_verified_user),
db: Session = Depends(get_session),
):
# NOTE: We intentionally do NOT use Depends(get_session) here.
# Database operations (get_model_by_id, has_access) manage their own short-lived sessions.
# This prevents holding a connection during the entire LLM call (30-60+ seconds),
# which would exhaust the connection pool under concurrent load.
metadata = form_data.pop("metadata", None)
try:
@@ -1402,7 +1405,7 @@ async def generate_openai_completion(
if ":" not in model_id:
model_id = f"{model_id}:latest"
model_info = Models.get_model_by_id(model_id, db=db)
model_info = Models.get_model_by_id(model_id)
if model_info:
if model_info.base_model_id:
payload["model"] = model_info.base_model_id
@@ -1419,7 +1422,6 @@ async def generate_openai_completion(
user.id,
type="read",
access_control=model_info.access_control,
db=db,
)
):
raise HTTPException(