mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-18 05:05:09 +02:00
feat: brave search llm context
This commit is contained in:
@@ -3345,6 +3345,12 @@ BRAVE_SEARCH_API_KEY = PersistentConfig(
|
||||
os.getenv('BRAVE_SEARCH_API_KEY', ''),
|
||||
)
|
||||
|
||||
BRAVE_SEARCH_CONTEXT_TOKENS = PersistentConfig(
|
||||
'BRAVE_SEARCH_CONTEXT_TOKENS',
|
||||
'rag.web.search.brave_search_context_tokens',
|
||||
int(os.getenv('BRAVE_SEARCH_CONTEXT_TOKENS', '8192')),
|
||||
)
|
||||
|
||||
KAGI_SEARCH_API_KEY = PersistentConfig(
|
||||
'KAGI_SEARCH_API_KEY',
|
||||
'rag.web.search.kagi_search_api_key',
|
||||
|
||||
@@ -345,6 +345,7 @@ from open_webui.config import (
|
||||
BING_SEARCH_V7_ENDPOINT,
|
||||
BING_SEARCH_V7_SUBSCRIPTION_KEY,
|
||||
BRAVE_SEARCH_API_KEY,
|
||||
BRAVE_SEARCH_CONTEXT_TOKENS,
|
||||
EXA_API_KEY,
|
||||
PERPLEXITY_API_KEY,
|
||||
PERPLEXITY_MODEL,
|
||||
@@ -1108,6 +1109,7 @@ app.state.config.YACY_PASSWORD = YACY_PASSWORD
|
||||
app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
|
||||
app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
|
||||
app.state.config.BRAVE_SEARCH_API_KEY = BRAVE_SEARCH_API_KEY
|
||||
app.state.config.BRAVE_SEARCH_CONTEXT_TOKENS = BRAVE_SEARCH_CONTEXT_TOKENS
|
||||
app.state.config.KAGI_SEARCH_API_KEY = KAGI_SEARCH_API_KEY
|
||||
app.state.config.MOJEEK_SEARCH_API_KEY = MOJEEK_SEARCH_API_KEY
|
||||
app.state.config.BOCHA_SEARCH_API_KEY = BOCHA_SEARCH_API_KEY
|
||||
|
||||
@@ -135,6 +135,7 @@ class PptxLoader:
|
||||
]
|
||||
|
||||
|
||||
|
||||
class TikaLoader:
|
||||
def __init__(self, url, file_path, mime_type=None, extract_images=None):
|
||||
self.url = url
|
||||
|
||||
66
backend/open_webui/retrieval/web/brave_llm_context.py
Normal file
66
backend/open_webui/retrieval/web/brave_llm_context.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def search_brave_llm_context(
|
||||
api_key: str,
|
||||
query: str,
|
||||
count: int,
|
||||
filter_list: Optional[list[str]] = None,
|
||||
context_tokens: int = 8192,
|
||||
) -> list[SearchResult]:
|
||||
"""Search using Brave's LLM Context API and return pre-extracted, relevance-scored
|
||||
page content ready for LLM consumption.
|
||||
|
||||
Uses /res/v1/llm/context instead of /res/v1/web/search. Same API key, same pricing.
|
||||
Returns full extracted passages per URL rather than short snippets, eliminating
|
||||
the need for post-search scraping.
|
||||
|
||||
Args:
|
||||
api_key (str): A Brave Search API key (same key as web search)
|
||||
query (str): The query to search for
|
||||
count (int): Maximum number of results to return
|
||||
filter_list (list[str], optional): Domain filter list
|
||||
context_tokens (int): Maximum total tokens to retrieve (1024–32768, default 8192)
|
||||
"""
|
||||
url = 'https://api.search.brave.com/res/v1/llm/context'
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Accept-Encoding': 'gzip',
|
||||
'X-Subscription-Token': api_key,
|
||||
}
|
||||
params = {
|
||||
'q': query,
|
||||
'count': count,
|
||||
'maximum_number_of_tokens': context_tokens,
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
|
||||
# Handle 429 rate limiting - same rate limits as web search
|
||||
if response.status_code == 429:
|
||||
log.info('Brave LLM Context API rate limited (429), retrying after 1 second...')
|
||||
time.sleep(1)
|
||||
response = requests.get(url, headers=headers, params=params)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
json_response = response.json()
|
||||
results = json_response.get('grounding', {}).get('generic', [])
|
||||
if filter_list:
|
||||
results = get_filtered_results(results, filter_list)
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
link=result['url'],
|
||||
title=result.get('title'),
|
||||
snippet='\n\n'.join(result.get('snippets', [])),
|
||||
)
|
||||
for result in results[:count]
|
||||
]
|
||||
@@ -57,6 +57,7 @@ from open_webui.retrieval.web.utils import get_web_loader
|
||||
from open_webui.retrieval.web.ollama import search_ollama_cloud
|
||||
from open_webui.retrieval.web.perplexity_search import search_perplexity_search
|
||||
from open_webui.retrieval.web.brave import search_brave
|
||||
from open_webui.retrieval.web.brave_llm_context import search_brave_llm_context
|
||||
from open_webui.retrieval.web.kagi import search_kagi
|
||||
from open_webui.retrieval.web.mojeek import search_mojeek
|
||||
from open_webui.retrieval.web.bocha import search_bocha
|
||||
@@ -531,6 +532,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
'GOOGLE_PSE_API_KEY': request.app.state.config.GOOGLE_PSE_API_KEY,
|
||||
'GOOGLE_PSE_ENGINE_ID': request.app.state.config.GOOGLE_PSE_ENGINE_ID,
|
||||
'BRAVE_SEARCH_API_KEY': request.app.state.config.BRAVE_SEARCH_API_KEY,
|
||||
'BRAVE_SEARCH_CONTEXT_TOKENS': request.app.state.config.BRAVE_SEARCH_CONTEXT_TOKENS,
|
||||
'KAGI_SEARCH_API_KEY': request.app.state.config.KAGI_SEARCH_API_KEY,
|
||||
'MOJEEK_SEARCH_API_KEY': request.app.state.config.MOJEEK_SEARCH_API_KEY,
|
||||
'BOCHA_SEARCH_API_KEY': request.app.state.config.BOCHA_SEARCH_API_KEY,
|
||||
@@ -599,6 +601,7 @@ class WebConfig(BaseModel):
|
||||
GOOGLE_PSE_API_KEY: Optional[str] = None
|
||||
GOOGLE_PSE_ENGINE_ID: Optional[str] = None
|
||||
BRAVE_SEARCH_API_KEY: Optional[str] = None
|
||||
BRAVE_SEARCH_CONTEXT_TOKENS: Optional[int] = None
|
||||
KAGI_SEARCH_API_KEY: Optional[str] = None
|
||||
MOJEEK_SEARCH_API_KEY: Optional[str] = None
|
||||
BOCHA_SEARCH_API_KEY: Optional[str] = None
|
||||
@@ -1081,6 +1084,8 @@ async def update_rag_config(request: Request, form_data: ConfigForm, user=Depend
|
||||
request.app.state.config.GOOGLE_PSE_API_KEY = form_data.web.GOOGLE_PSE_API_KEY
|
||||
request.app.state.config.GOOGLE_PSE_ENGINE_ID = form_data.web.GOOGLE_PSE_ENGINE_ID
|
||||
request.app.state.config.BRAVE_SEARCH_API_KEY = form_data.web.BRAVE_SEARCH_API_KEY
|
||||
if form_data.web.BRAVE_SEARCH_CONTEXT_TOKENS is not None:
|
||||
request.app.state.config.BRAVE_SEARCH_CONTEXT_TOKENS = form_data.web.BRAVE_SEARCH_CONTEXT_TOKENS
|
||||
request.app.state.config.KAGI_SEARCH_API_KEY = form_data.web.KAGI_SEARCH_API_KEY
|
||||
request.app.state.config.MOJEEK_SEARCH_API_KEY = form_data.web.MOJEEK_SEARCH_API_KEY
|
||||
request.app.state.config.BOCHA_SEARCH_API_KEY = form_data.web.BOCHA_SEARCH_API_KEY
|
||||
@@ -1216,6 +1221,7 @@ async def update_rag_config(request: Request, form_data: ConfigForm, user=Depend
|
||||
'GOOGLE_PSE_API_KEY': request.app.state.config.GOOGLE_PSE_API_KEY,
|
||||
'GOOGLE_PSE_ENGINE_ID': request.app.state.config.GOOGLE_PSE_ENGINE_ID,
|
||||
'BRAVE_SEARCH_API_KEY': request.app.state.config.BRAVE_SEARCH_API_KEY,
|
||||
'BRAVE_SEARCH_CONTEXT_TOKENS': request.app.state.config.BRAVE_SEARCH_CONTEXT_TOKENS,
|
||||
'KAGI_SEARCH_API_KEY': request.app.state.config.KAGI_SEARCH_API_KEY,
|
||||
'MOJEEK_SEARCH_API_KEY': request.app.state.config.MOJEEK_SEARCH_API_KEY,
|
||||
'BOCHA_SEARCH_API_KEY': request.app.state.config.BOCHA_SEARCH_API_KEY,
|
||||
@@ -1963,6 +1969,17 @@ def search_web(request: Request, engine: str, query: str, user=None) -> list[Sea
|
||||
)
|
||||
else:
|
||||
raise Exception('No BRAVE_SEARCH_API_KEY found in environment variables')
|
||||
elif engine == 'brave_llm_context':
|
||||
if request.app.state.config.BRAVE_SEARCH_API_KEY:
|
||||
return search_brave_llm_context(
|
||||
request.app.state.config.BRAVE_SEARCH_API_KEY,
|
||||
query,
|
||||
request.app.state.config.WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
request.app.state.config.BRAVE_SEARCH_CONTEXT_TOKENS,
|
||||
)
|
||||
else:
|
||||
raise Exception('No BRAVE_SEARCH_API_KEY found in environment variables')
|
||||
elif engine == 'kagi':
|
||||
if request.app.state.config.KAGI_SEARCH_API_KEY:
|
||||
return search_kagi(
|
||||
|
||||
Reference in New Issue
Block a user