diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a6e55e0ac5..c7163a580d 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -591,6 +591,10 @@ https://github.com/open-webui/open-webui @asynccontextmanager async def lifespan(app: FastAPI): + # Store reference to main event loop for sync->async calls (e.g., embedding generation) + # This allows sync functions to schedule work on the main loop without blocking health checks + app.state.main_loop = asyncio.get_running_loop() + app.state.instance_id = INSTANCE_ID start_logger() diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 68f8a1ee9b..fe0ac652cc 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1590,14 +1590,20 @@ def save_docs_to_vector_db( enable_async=request.app.state.config.ENABLE_ASYNC_EMBEDDING, ) - # Run async embedding in sync context - embeddings = asyncio.run( + # Run async embedding in sync context using the main event loop + # This allows the main loop to stay responsive to health checks during long operations + embedding_timeout_str = os.environ.get("RAG_EMBEDDING_TIMEOUT") + embedding_timeout = int(embedding_timeout_str) if embedding_timeout_str else None + + future = asyncio.run_coroutine_threadsafe( embedding_function( list(map(lambda x: x.replace("\n", " "), texts)), prefix=RAG_EMBEDDING_CONTENT_PREFIX, user=user, - ) + ), + request.app.state.main_loop, ) + embeddings = future.result(timeout=embedding_timeout) log.info(f"embeddings generated {len(embeddings)} for {len(texts)} items") items = [