Moving code out of playwright branch

This commit is contained in:
Rory
2025-02-03 18:47:26 -06:00
parent 22746c7a3f
commit 1b581b714f
54 changed files with 6 additions and 84 deletions

View File

@@ -48,16 +48,6 @@ def validate_url(url: Union[str, Sequence[str]]):
else:
return False
def safe_validate_urls(url: Sequence[str]) -> Sequence[str]:
valid_urls = []
for u in url:
try:
if validate_url(u):
valid_urls.append(u)
except ValueError:
continue
return valid_urls
def resolve_hostname(hostname):
# Get address information
addr_info = socket.getaddrinfo(hostname, None)
@@ -253,11 +243,12 @@ def get_web_loader(
verify_ssl: bool = True,
requests_per_second: int = 2,
):
# Check if the URLs are valid
safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls)
# Check if the URL is valid
if not validate_url(urls):
raise ValueError(ERROR_MESSAGES.INVALID_URL)
web_loader_args = {
"urls": safe_urls,
"urls": urls,
"verify_ssl": verify_ssl,
"requests_per_second": requests_per_second,
"continue_on_failure": True
@@ -270,6 +261,6 @@ def get_web_loader(
WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value]
web_loader = WebLoaderClass(**web_loader_args)
log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls))
log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls))
return web_loader