mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
Moving code out of playwright branch
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
import validators
|
||||
|
||||
from typing import Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -12,8 +10,6 @@ def get_filtered_results(results, filter_list):
|
||||
filtered_results = []
|
||||
for result in results:
|
||||
url = result.get("url") or result.get("link", "")
|
||||
if not validators.url(url):
|
||||
continue
|
||||
domain = urlparse(url).netloc
|
||||
if any(domain.endswith(filtered_domain) for filtered_domain in filter_list):
|
||||
filtered_results.append(result)
|
||||
|
||||
@@ -48,16 +48,6 @@ def validate_url(url: Union[str, Sequence[str]]):
|
||||
else:
|
||||
return False
|
||||
|
||||
def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
|
||||
valid_urls = []
|
||||
for u in url:
|
||||
try:
|
||||
if validate_url(u):
|
||||
valid_urls.append(u)
|
||||
except ValueError:
|
||||
continue
|
||||
return valid_urls
|
||||
|
||||
def resolve_hostname(hostname):
|
||||
# Get address information
|
||||
addr_info = socket.getaddrinfo(hostname, None)
|
||||
@@ -253,11 +243,12 @@ def get_web_loader(
|
||||
verify_ssl: bool = True,
|
||||
requests_per_second: int = 2,
|
||||
):
|
||||
# Check if the URLs are valid
|
||||
safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls)
|
||||
# Check if the URL is valid
|
||||
if not validate_url(urls):
|
||||
raise ValueError(ERROR_MESSAGES.INVALID_URL)
|
||||
|
||||
web_loader_args = {
|
||||
"urls": safe_urls,
|
||||
"urls": urls,
|
||||
"verify_ssl": verify_ssl,
|
||||
"requests_per_second": requests_per_second,
|
||||
"continue_on_failure": True
|
||||
@@ -270,6 +261,6 @@ def get_web_loader(
|
||||
WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value]
|
||||
web_loader = WebLoaderClass(**web_loader_args)
|
||||
|
||||
log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls))
|
||||
log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls))
|
||||
|
||||
return web_loader
|
||||
@@ -1239,10 +1239,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
|
||||
|
||||
@router.post("/process/web/search")
|
||||
async def process_web_search(
|
||||
request: Request, form_data: SearchForm, extra_params: dict, user=Depends(get_verified_user)
|
||||
request: Request, form_data: SearchForm, user=Depends(get_verified_user)
|
||||
):
|
||||
event_emitter = extra_params["__event_emitter__"]
|
||||
|
||||
try:
|
||||
logging.info(
|
||||
f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}"
|
||||
@@ -1260,18 +1258,6 @@ async def process_web_search(
|
||||
|
||||
log.debug(f"web_results: {web_results}")
|
||||
|
||||
await event_emitter(
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"action": "web_search",
|
||||
"description": "Loading {{count}} sites",
|
||||
"urls": [result.link for result in web_results],
|
||||
"done": False
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
collection_name = form_data.collection_name
|
||||
if collection_name == "" or collection_name is None:
|
||||
|
||||
@@ -443,7 +443,6 @@ async def chat_web_search_handler(
|
||||
"query": searchQuery,
|
||||
}
|
||||
),
|
||||
extra_params=extra_params,
|
||||
user=user
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user