2025-02-03 17:37:20 -06:00
|
|
|
import validators
|
|
|
|
|
|
2024-05-06 12:27:46 +08:00
|
|
|
from typing import Optional
|
2024-06-13 07:14:48 +07:00
|
|
|
from urllib.parse import urlparse
|
2024-08-28 00:10:27 +02:00
|
|
|
|
2024-05-06 12:27:46 +08:00
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
2025-11-25 02:31:34 -05:00
|
|
|
from open_webui.retrieval.web.utils import resolve_hostname
|
|
|
|
|
from open_webui.utils.misc import is_string_allowed
|
2025-11-18 04:40:55 -05:00
|
|
|
|
2024-05-06 12:27:46 +08:00
|
|
|
|
2024-06-17 14:36:26 +07:00
|
|
|
def get_filtered_results(results, filter_list):
|
|
|
|
|
if not filter_list:
|
2024-06-13 07:14:48 +07:00
|
|
|
return results
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2024-06-13 07:14:48 +07:00
|
|
|
filtered_results = []
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2024-06-13 07:14:48 +07:00
|
|
|
for result in results:
|
2025-08-21 12:51:41 +04:00
|
|
|
url = result.get("url") or result.get("link", "") or result.get("href", "")
|
2025-02-03 17:37:20 -06:00
|
|
|
if not validators.url(url):
|
|
|
|
|
continue
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2024-08-27 13:15:17 +05:30
|
|
|
domain = urlparse(url).netloc
|
2025-11-18 04:40:55 -05:00
|
|
|
if not domain:
|
|
|
|
|
continue
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2025-11-18 04:40:55 -05:00
|
|
|
hostnames = [domain]
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2025-11-18 04:40:55 -05:00
|
|
|
try:
|
|
|
|
|
ipv4_addresses, ipv6_addresses = resolve_hostname(domain)
|
|
|
|
|
hostnames.extend(ipv4_addresses)
|
|
|
|
|
hostnames.extend(ipv6_addresses)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2025-12-02 04:17:32 -05:00
|
|
|
if is_string_allowed(hostnames, filter_list):
|
2025-11-18 04:40:55 -05:00
|
|
|
filtered_results.append(result)
|
|
|
|
|
continue
|
2025-11-16 13:52:09 -05:00
|
|
|
|
2024-06-13 07:14:48 +07:00
|
|
|
return filtered_results
|
|
|
|
|
|
2024-06-17 14:32:23 -07:00
|
|
|
|
2024-05-06 12:27:46 +08:00
|
|
|
class SearchResult(BaseModel):
|
|
|
|
|
link: str
|
|
|
|
|
title: Optional[str]
|
|
|
|
|
snippet: Optional[str]
|