mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
Merge branch 'dev' into logit_bias
This commit is contained in:
@@ -1977,6 +1977,12 @@ EXA_API_KEY = PersistentConfig(
|
||||
os.getenv("EXA_API_KEY", ""),
|
||||
)
|
||||
|
||||
PERPLEXITY_API_KEY = PersistentConfig(
|
||||
"PERPLEXITY_API_KEY",
|
||||
"rag.web.search.perplexity_api_key",
|
||||
os.getenv("PERPLEXITY_API_KEY", ""),
|
||||
)
|
||||
|
||||
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
|
||||
"RAG_WEB_SEARCH_RESULT_COUNT",
|
||||
"rag.web.search.result_count",
|
||||
|
||||
@@ -215,6 +215,7 @@ from open_webui.config import (
|
||||
BING_SEARCH_V7_SUBSCRIPTION_KEY,
|
||||
BRAVE_SEARCH_API_KEY,
|
||||
EXA_API_KEY,
|
||||
PERPLEXITY_API_KEY,
|
||||
KAGI_SEARCH_API_KEY,
|
||||
MOJEEK_SEARCH_API_KEY,
|
||||
BOCHA_SEARCH_API_KEY,
|
||||
@@ -603,6 +604,7 @@ app.state.config.JINA_API_KEY = JINA_API_KEY
|
||||
app.state.config.BING_SEARCH_V7_ENDPOINT = BING_SEARCH_V7_ENDPOINT
|
||||
app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_KEY
|
||||
app.state.config.EXA_API_KEY = EXA_API_KEY
|
||||
app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY
|
||||
|
||||
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
|
||||
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS
|
||||
|
||||
@@ -414,6 +414,13 @@ def get_sources_from_files(
|
||||
]
|
||||
],
|
||||
}
|
||||
elif file.get("file").get("data"):
|
||||
context = {
|
||||
"documents": [[file.get("file").get("data", {}).get("content")]],
|
||||
"metadatas": [
|
||||
[file.get("file").get("data", {}).get("metadata", {})]
|
||||
],
|
||||
}
|
||||
else:
|
||||
collection_names = []
|
||||
if file.get("type") == "collection":
|
||||
|
||||
87
backend/open_webui/retrieval/web/perplexity.py
Normal file
87
backend/open_webui/retrieval/web/perplexity.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
import requests
|
||||
|
||||
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
|
||||
def search_perplexity(
|
||||
api_key: str,
|
||||
query: str,
|
||||
count: int,
|
||||
filter_list: Optional[list[str]] = None,
|
||||
) -> list[SearchResult]:
|
||||
"""Search using Perplexity API and return the results as a list of SearchResult objects.
|
||||
|
||||
Args:
|
||||
api_key (str): A Perplexity API key
|
||||
query (str): The query to search for
|
||||
count (int): Maximum number of results to return
|
||||
|
||||
"""
|
||||
|
||||
# Handle PersistentConfig object
|
||||
if hasattr(api_key, "__str__"):
|
||||
api_key = str(api_key)
|
||||
|
||||
try:
|
||||
url = "https://api.perplexity.ai/chat/completions"
|
||||
|
||||
# Create payload for the API call
|
||||
payload = {
|
||||
"model": "sonar",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a search assistant. Provide factual information with citations.",
|
||||
},
|
||||
{"role": "user", "content": query},
|
||||
],
|
||||
"temperature": 0.2, # Lower temperature for more factual responses
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# Make the API request
|
||||
response = requests.request("POST", url, json=payload, headers=headers)
|
||||
|
||||
# Parse the JSON response
|
||||
json_response = response.json()
|
||||
|
||||
# Extract citations from the response
|
||||
citations = json_response.get("citations", [])
|
||||
|
||||
# Create search results from citations
|
||||
results = []
|
||||
for i, citation in enumerate(citations[:count]):
|
||||
# Extract content from the response to use as snippet
|
||||
content = ""
|
||||
if "choices" in json_response and json_response["choices"]:
|
||||
if i == 0:
|
||||
content = json_response["choices"][0]["message"]["content"]
|
||||
|
||||
result = {"link": citation, "title": f"Source {i+1}", "snippet": content}
|
||||
results.append(result)
|
||||
|
||||
if filter_list:
|
||||
|
||||
results = get_filtered_results(results, filter_list)
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
link=result["link"], title=result["title"], snippet=result["snippet"]
|
||||
)
|
||||
for result in results[:count]
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error searching with Perplexity API: {e}")
|
||||
return []
|
||||
@@ -59,7 +59,7 @@ from open_webui.retrieval.web.serpstack import search_serpstack
|
||||
from open_webui.retrieval.web.tavily import search_tavily
|
||||
from open_webui.retrieval.web.bing import search_bing
|
||||
from open_webui.retrieval.web.exa import search_exa
|
||||
|
||||
from open_webui.retrieval.web.perplexity import search_perplexity
|
||||
|
||||
from open_webui.retrieval.utils import (
|
||||
get_embedding_function,
|
||||
@@ -405,6 +405,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT,
|
||||
"bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY,
|
||||
"exa_api_key": request.app.state.config.EXA_API_KEY,
|
||||
"perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY,
|
||||
"result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
"trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||
"concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
@@ -465,6 +466,7 @@ class WebSearchConfig(BaseModel):
|
||||
bing_search_v7_endpoint: Optional[str] = None
|
||||
bing_search_v7_subscription_key: Optional[str] = None
|
||||
exa_api_key: Optional[str] = None
|
||||
perplexity_api_key: Optional[str] = None
|
||||
result_count: Optional[int] = None
|
||||
concurrent_requests: Optional[int] = None
|
||||
trust_env: Optional[bool] = None
|
||||
@@ -617,6 +619,8 @@ async def update_rag_config(
|
||||
|
||||
request.app.state.config.EXA_API_KEY = form_data.web.search.exa_api_key
|
||||
|
||||
request.app.state.config.PERPLEXITY_API_KEY = form_data.web.search.perplexity_api_key
|
||||
|
||||
request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = (
|
||||
form_data.web.search.result_count
|
||||
)
|
||||
@@ -683,6 +687,7 @@ async def update_rag_config(
|
||||
"bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT,
|
||||
"bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY,
|
||||
"exa_api_key": request.app.state.config.EXA_API_KEY,
|
||||
"perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY,
|
||||
"result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
"concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
"trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
|
||||
@@ -1182,9 +1187,13 @@ def process_web(
|
||||
content = " ".join([doc.page_content for doc in docs])
|
||||
|
||||
log.debug(f"text_content: {content}")
|
||||
save_docs_to_vector_db(
|
||||
request, docs, collection_name, overwrite=True, user=user
|
||||
)
|
||||
|
||||
if not request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
||||
save_docs_to_vector_db(
|
||||
request, docs, collection_name, overwrite=True, user=user
|
||||
)
|
||||
else:
|
||||
collection_name = None
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
@@ -1196,6 +1205,7 @@ def process_web(
|
||||
},
|
||||
"meta": {
|
||||
"name": form_data.url,
|
||||
"source": form_data.url,
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1221,6 +1231,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
|
||||
- SERPLY_API_KEY
|
||||
- TAVILY_API_KEY
|
||||
- EXA_API_KEY
|
||||
- PERPLEXITY_API_KEY
|
||||
- SEARCHAPI_API_KEY + SEARCHAPI_ENGINE (by default `google`)
|
||||
- SERPAPI_API_KEY + SERPAPI_ENGINE (by default `google`)
|
||||
Args:
|
||||
@@ -1385,6 +1396,13 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
|
||||
request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
)
|
||||
elif engine == "perplexity":
|
||||
return search_perplexity(
|
||||
request.app.state.config.PERPLEXITY_API_KEY,
|
||||
query,
|
||||
request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
|
||||
request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST,
|
||||
)
|
||||
else:
|
||||
raise Exception("No search engine API key found in environment variables")
|
||||
|
||||
|
||||
@@ -101,19 +101,33 @@ class LocalStorageProvider(StorageProvider):
|
||||
|
||||
class S3StorageProvider(StorageProvider):
|
||||
def __init__(self):
|
||||
self.s3_client = boto3.client(
|
||||
"s3",
|
||||
region_name=S3_REGION_NAME,
|
||||
endpoint_url=S3_ENDPOINT_URL,
|
||||
aws_access_key_id=S3_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=S3_SECRET_ACCESS_KEY,
|
||||
config=Config(
|
||||
s3={
|
||||
"use_accelerate_endpoint": S3_USE_ACCELERATE_ENDPOINT,
|
||||
"addressing_style": S3_ADDRESSING_STYLE,
|
||||
},
|
||||
),
|
||||
config = Config(
|
||||
s3={
|
||||
"use_accelerate_endpoint": S3_USE_ACCELERATE_ENDPOINT,
|
||||
"addressing_style": S3_ADDRESSING_STYLE,
|
||||
},
|
||||
)
|
||||
|
||||
# If access key and secret are provided, use them for authentication
|
||||
if S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY:
|
||||
self.s3_client = boto3.client(
|
||||
"s3",
|
||||
region_name=S3_REGION_NAME,
|
||||
endpoint_url=S3_ENDPOINT_URL,
|
||||
aws_access_key_id=S3_ACCESS_KEY_ID,
|
||||
aws_secret_access_key=S3_SECRET_ACCESS_KEY,
|
||||
config=config,
|
||||
)
|
||||
else:
|
||||
# If no explicit credentials are provided, fall back to default AWS credentials
|
||||
# This supports workload identity (IAM roles for EC2, EKS, etc.)
|
||||
self.s3_client = boto3.client(
|
||||
"s3",
|
||||
region_name=S3_REGION_NAME,
|
||||
endpoint_url=S3_ENDPOINT_URL,
|
||||
config=config,
|
||||
)
|
||||
|
||||
self.bucket_name = S3_BUCKET_NAME
|
||||
self.key_prefix = S3_KEY_PREFIX if S3_KEY_PREFIX else ""
|
||||
|
||||
|
||||
@@ -187,6 +187,17 @@ class TestS3StorageProvider:
|
||||
assert not (upload_dir / self.filename).exists()
|
||||
assert not (upload_dir / self.filename_extra).exists()
|
||||
|
||||
def test_init_without_credentials(self, monkeypatch):
|
||||
"""Test that S3StorageProvider can initialize without explicit credentials."""
|
||||
# Temporarily unset the environment variables
|
||||
monkeypatch.setattr(provider, "S3_ACCESS_KEY_ID", None)
|
||||
monkeypatch.setattr(provider, "S3_SECRET_ACCESS_KEY", None)
|
||||
|
||||
# Should not raise an exception
|
||||
storage = provider.S3StorageProvider()
|
||||
assert storage.s3_client is not None
|
||||
assert storage.bucket_name == provider.S3_BUCKET_NAME
|
||||
|
||||
|
||||
class TestGCSStorageProvider:
|
||||
Storage = provider.GCSStorageProvider()
|
||||
|
||||
Reference in New Issue
Block a user