diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index a6e55e0ac5..c7163a580d 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -591,6 +591,10 @@ https://github.com/open-webui/open-webui
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # Store reference to main event loop for sync->async calls (e.g., embedding generation)
+    # This allows sync functions to schedule work on the main loop without blocking health checks
+    app.state.main_loop = asyncio.get_running_loop()
+
     app.state.instance_id = INSTANCE_ID
     start_logger()
 
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 68f8a1ee9b..fe0ac652cc 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -1590,14 +1590,20 @@ def save_docs_to_vector_db(
             enable_async=request.app.state.config.ENABLE_ASYNC_EMBEDDING,
         )
 
-        # Run async embedding in sync context
-        embeddings = asyncio.run(
+        # Run async embedding in sync context using the main event loop
+        # This allows the main loop to stay responsive to health checks during long operations
+        embedding_timeout_str = os.environ.get("RAG_EMBEDDING_TIMEOUT")
+        embedding_timeout = int(embedding_timeout_str) if embedding_timeout_str else None
+
+        future = asyncio.run_coroutine_threadsafe(
             embedding_function(
                 list(map(lambda x: x.replace("\n", " "), texts)),
                 prefix=RAG_EMBEDDING_CONTENT_PREFIX,
                 user=user,
-            )
+            ),
+            request.app.state.main_loop,
         )
+        embeddings = future.result(timeout=embedding_timeout)
         log.info(f"embeddings generated {len(embeddings)} for {len(texts)} items")
 
         items = [