mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
feat/enh: async embedding processing setting
Co-Authored-By: Classic298 <27028174+Classic298@users.noreply.github.com>
This commit is contained in:
@@ -2713,6 +2713,12 @@ RAG_EMBEDDING_BATCH_SIZE = PersistentConfig(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ENABLE_ASYNC_EMBEDDING = PersistentConfig(
|
||||||
|
"ENABLE_ASYNC_EMBEDDING",
|
||||||
|
"rag.enable_async_embedding",
|
||||||
|
os.environ.get("ENABLE_ASYNC_EMBEDDING", "True").lower() == "true",
|
||||||
|
)
|
||||||
|
|
||||||
RAG_EMBEDDING_QUERY_PREFIX = os.environ.get("RAG_EMBEDDING_QUERY_PREFIX", None)
|
RAG_EMBEDDING_QUERY_PREFIX = os.environ.get("RAG_EMBEDDING_QUERY_PREFIX", None)
|
||||||
|
|
||||||
RAG_EMBEDDING_CONTENT_PREFIX = os.environ.get("RAG_EMBEDDING_CONTENT_PREFIX", None)
|
RAG_EMBEDDING_CONTENT_PREFIX = os.environ.get("RAG_EMBEDDING_CONTENT_PREFIX", None)
|
||||||
|
|||||||
@@ -230,6 +230,7 @@ from open_webui.config import (
|
|||||||
RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
|
RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
|
||||||
RAG_EMBEDDING_ENGINE,
|
RAG_EMBEDDING_ENGINE,
|
||||||
RAG_EMBEDDING_BATCH_SIZE,
|
RAG_EMBEDDING_BATCH_SIZE,
|
||||||
|
ENABLE_ASYNC_EMBEDDING,
|
||||||
RAG_TOP_K,
|
RAG_TOP_K,
|
||||||
RAG_TOP_K_RERANKER,
|
RAG_TOP_K_RERANKER,
|
||||||
RAG_RELEVANCE_THRESHOLD,
|
RAG_RELEVANCE_THRESHOLD,
|
||||||
@@ -884,6 +885,7 @@ app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP
|
|||||||
app.state.config.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
|
app.state.config.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
|
||||||
app.state.config.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
|
app.state.config.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
|
||||||
app.state.config.RAG_EMBEDDING_BATCH_SIZE = RAG_EMBEDDING_BATCH_SIZE
|
app.state.config.RAG_EMBEDDING_BATCH_SIZE = RAG_EMBEDDING_BATCH_SIZE
|
||||||
|
app.state.config.ENABLE_ASYNC_EMBEDDING = ENABLE_ASYNC_EMBEDDING
|
||||||
|
|
||||||
app.state.config.RAG_RERANKING_ENGINE = RAG_RERANKING_ENGINE
|
app.state.config.RAG_RERANKING_ENGINE = RAG_RERANKING_ENGINE
|
||||||
app.state.config.RAG_RERANKING_MODEL = RAG_RERANKING_MODEL
|
app.state.config.RAG_RERANKING_MODEL = RAG_RERANKING_MODEL
|
||||||
|
|||||||
@@ -782,6 +782,7 @@ def get_embedding_function(
|
|||||||
key,
|
key,
|
||||||
embedding_batch_size,
|
embedding_batch_size,
|
||||||
azure_api_version=None,
|
azure_api_version=None,
|
||||||
|
enable_async=True,
|
||||||
) -> Awaitable:
|
) -> Awaitable:
|
||||||
if embedding_engine == "":
|
if embedding_engine == "":
|
||||||
# Sentence transformers: CPU-bound sync operation
|
# Sentence transformers: CPU-bound sync operation
|
||||||
@@ -816,16 +817,26 @@ def get_embedding_function(
|
|||||||
query[i : i + embedding_batch_size]
|
query[i : i + embedding_batch_size]
|
||||||
for i in range(0, len(query), embedding_batch_size)
|
for i in range(0, len(query), embedding_batch_size)
|
||||||
]
|
]
|
||||||
log.debug(
|
|
||||||
f"generate_multiple_async: Processing {len(batches)} batches in parallel"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Execute all batches in parallel
|
if enable_async:
|
||||||
tasks = [
|
log.debug(
|
||||||
embedding_function(batch, prefix=prefix, user=user)
|
f"generate_multiple_async: Processing {len(batches)} batches in parallel"
|
||||||
for batch in batches
|
)
|
||||||
]
|
# Execute all batches in parallel
|
||||||
batch_results = await asyncio.gather(*tasks)
|
tasks = [
|
||||||
|
embedding_function(batch, prefix=prefix, user=user)
|
||||||
|
for batch in batches
|
||||||
|
]
|
||||||
|
batch_results = await asyncio.gather(*tasks)
|
||||||
|
else:
|
||||||
|
log.debug(
|
||||||
|
f"generate_multiple_async: Processing {len(batches)} batches sequentially"
|
||||||
|
)
|
||||||
|
batch_results = []
|
||||||
|
for batch in batches:
|
||||||
|
batch_results.append(
|
||||||
|
await embedding_function(batch, prefix=prefix, user=user)
|
||||||
|
)
|
||||||
|
|
||||||
# Flatten results
|
# Flatten results
|
||||||
embeddings = []
|
embeddings = []
|
||||||
|
|||||||
@@ -248,6 +248,7 @@ async def get_status(request: Request):
|
|||||||
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
||||||
"reranking_model": request.app.state.config.RAG_RERANKING_MODEL,
|
"reranking_model": request.app.state.config.RAG_RERANKING_MODEL,
|
||||||
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
||||||
|
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -258,6 +259,7 @@ async def get_embedding_config(request: Request, user=Depends(get_admin_user)):
|
|||||||
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
|
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
|
||||||
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
||||||
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
||||||
|
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
|
||||||
"openai_config": {
|
"openai_config": {
|
||||||
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,
|
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,
|
||||||
"key": request.app.state.config.RAG_OPENAI_API_KEY,
|
"key": request.app.state.config.RAG_OPENAI_API_KEY,
|
||||||
@@ -297,6 +299,7 @@ class EmbeddingModelUpdateForm(BaseModel):
|
|||||||
embedding_engine: str
|
embedding_engine: str
|
||||||
embedding_model: str
|
embedding_model: str
|
||||||
embedding_batch_size: Optional[int] = 1
|
embedding_batch_size: Optional[int] = 1
|
||||||
|
ENABLE_ASYNC_EMBEDDING: Optional[bool] = True
|
||||||
|
|
||||||
|
|
||||||
@router.post("/embedding/update")
|
@router.post("/embedding/update")
|
||||||
@@ -358,6 +361,10 @@ async def update_embedding_config(
|
|||||||
form_data.embedding_batch_size
|
form_data.embedding_batch_size
|
||||||
)
|
)
|
||||||
|
|
||||||
|
request.app.state.config.ENABLE_ASYNC_EMBEDDING = (
|
||||||
|
form_data.enable_async_embedding
|
||||||
|
)
|
||||||
|
|
||||||
request.app.state.ef = get_ef(
|
request.app.state.ef = get_ef(
|
||||||
request.app.state.config.RAG_EMBEDDING_ENGINE,
|
request.app.state.config.RAG_EMBEDDING_ENGINE,
|
||||||
request.app.state.config.RAG_EMBEDDING_MODEL,
|
request.app.state.config.RAG_EMBEDDING_MODEL,
|
||||||
@@ -391,6 +398,7 @@ async def update_embedding_config(
|
|||||||
if request.app.state.config.RAG_EMBEDDING_ENGINE == "azure_openai"
|
if request.app.state.config.RAG_EMBEDDING_ENGINE == "azure_openai"
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
enable_async=request.app.state.config.ENABLE_ASYNC_EMBEDDING,
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -398,6 +406,7 @@ async def update_embedding_config(
|
|||||||
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
|
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
|
||||||
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
|
||||||
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
|
||||||
|
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
|
||||||
"openai_config": {
|
"openai_config": {
|
||||||
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,
|
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,
|
||||||
"key": request.app.state.config.RAG_OPENAI_API_KEY,
|
"key": request.app.state.config.RAG_OPENAI_API_KEY,
|
||||||
|
|||||||
@@ -41,6 +41,8 @@
|
|||||||
let embeddingEngine = '';
|
let embeddingEngine = '';
|
||||||
let embeddingModel = '';
|
let embeddingModel = '';
|
||||||
let embeddingBatchSize = 1;
|
let embeddingBatchSize = 1;
|
||||||
|
let ENABLE_ASYNC_EMBEDDING = true;
|
||||||
|
|
||||||
let rerankingModel = '';
|
let rerankingModel = '';
|
||||||
|
|
||||||
let OpenAIUrl = '';
|
let OpenAIUrl = '';
|
||||||
@@ -105,6 +107,7 @@
|
|||||||
embedding_engine: embeddingEngine,
|
embedding_engine: embeddingEngine,
|
||||||
embedding_model: embeddingModel,
|
embedding_model: embeddingModel,
|
||||||
embedding_batch_size: embeddingBatchSize,
|
embedding_batch_size: embeddingBatchSize,
|
||||||
|
ENABLE_ASYNC_EMBEDDING: ENABLE_ASYNC_EMBEDDING,
|
||||||
ollama_config: {
|
ollama_config: {
|
||||||
key: OllamaKey,
|
key: OllamaKey,
|
||||||
url: OllamaUrl
|
url: OllamaUrl
|
||||||
@@ -237,6 +240,7 @@
|
|||||||
embeddingEngine = embeddingConfig.embedding_engine;
|
embeddingEngine = embeddingConfig.embedding_engine;
|
||||||
embeddingModel = embeddingConfig.embedding_model;
|
embeddingModel = embeddingConfig.embedding_model;
|
||||||
embeddingBatchSize = embeddingConfig.embedding_batch_size ?? 1;
|
embeddingBatchSize = embeddingConfig.embedding_batch_size ?? 1;
|
||||||
|
ENABLE_ASYNC_EMBEDDING = embeddingConfig.ENABLE_ASYNC_EMBEDDING ?? true;
|
||||||
|
|
||||||
OpenAIKey = embeddingConfig.openai_config.key;
|
OpenAIKey = embeddingConfig.openai_config.key;
|
||||||
OpenAIUrl = embeddingConfig.openai_config.url;
|
OpenAIUrl = embeddingConfig.openai_config.url;
|
||||||
@@ -927,6 +931,22 @@
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class=" mb-2.5 flex w-full justify-between">
|
||||||
|
<div class="self-center text-xs font-medium">
|
||||||
|
<Tooltip
|
||||||
|
content={$i18n.t(
|
||||||
|
'Runs embedding tasks concurrently to speed up processing. Turn off if rate limits become an issue.'
|
||||||
|
)}
|
||||||
|
placement="top-start"
|
||||||
|
>
|
||||||
|
{$i18n.t('Async Embedding Processing')}
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<Switch bind:state={ENABLE_ASYNC_EMBEDDING} />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user