diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 9cfff0c9de..cfbdbcac08 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -1381,8 +1381,11 @@ async def generate_openai_completion( form_data: dict, url_idx: Optional[int] = None, user=Depends(get_verified_user), - db: Session = Depends(get_session), ): + # NOTE: We intentionally do NOT use Depends(get_session) here. + # Database operations (get_model_by_id, has_access) manage their own short-lived sessions. + # This prevents holding a connection during the entire LLM call (30-60+ seconds), + # which would exhaust the connection pool under concurrent load. metadata = form_data.pop("metadata", None) try: @@ -1402,7 +1405,7 @@ async def generate_openai_completion( if ":" not in model_id: model_id = f"{model_id}:latest" - model_info = Models.get_model_by_id(model_id, db=db) + model_info = Models.get_model_by_id(model_id) if model_info: if model_info.base_model_id: payload["model"] = model_info.base_model_id @@ -1419,7 +1422,6 @@ async def generate_openai_completion( user.id, type="read", access_control=model_info.access_control, - db=db, ) ): raise HTTPException(