refac: web/rag config

This commit is contained in:
Timothy Jaeryang Baek
2025-04-12 16:33:36 -07:00
parent c3497da5dd
commit 48a23ce3fe
11 changed files with 1367 additions and 1530 deletions

View File

@@ -201,7 +201,10 @@ def save_config(config):
T = TypeVar("T")
ENABLE_PERSISTENT_CONFIG = os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true"
ENABLE_PERSISTENT_CONFIG = (
os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true"
)
class PersistentConfig(Generic[T]):
def __init__(self, env_name: str, config_path: str, env_value: T):
@@ -612,10 +615,16 @@ def load_oauth_providers():
"scope": OAUTH_SCOPES.value,
}
if OAUTH_CODE_CHALLENGE_METHOD.value and OAUTH_CODE_CHALLENGE_METHOD.value == "S256":
if (
OAUTH_CODE_CHALLENGE_METHOD.value
and OAUTH_CODE_CHALLENGE_METHOD.value == "S256"
):
client_kwargs["code_challenge_method"] = "S256"
elif OAUTH_CODE_CHALLENGE_METHOD.value:
raise Exception('Code challenge methods other than "%s" not supported. Given: "%s"' % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value))
raise Exception(
'Code challenge methods other than "%s" not supported. Given: "%s"'
% ("S256", OAUTH_CODE_CHALLENGE_METHOD.value)
)
client.register(
name="oidc",
@@ -1820,12 +1829,6 @@ RAG_FILE_MAX_SIZE = PersistentConfig(
),
)
ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = PersistentConfig(
"ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION",
"rag.enable_web_loader_ssl_verification",
os.environ.get("ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true",
)
RAG_EMBEDDING_ENGINE = PersistentConfig(
"RAG_EMBEDDING_ENGINE",
"rag.embedding_engine",
@@ -1990,16 +1993,20 @@ YOUTUBE_LOADER_PROXY_URL = PersistentConfig(
)
ENABLE_RAG_WEB_SEARCH = PersistentConfig(
"ENABLE_RAG_WEB_SEARCH",
####################################
# Web Search (RAG)
####################################
ENABLE_WEB_SEARCH = PersistentConfig(
"ENABLE_WEB_SEARCH",
"rag.web.search.enable",
os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true",
os.getenv("ENABLE_WEB_SEARCH", "False").lower() == "true",
)
RAG_WEB_SEARCH_ENGINE = PersistentConfig(
"RAG_WEB_SEARCH_ENGINE",
WEB_SEARCH_ENGINE = PersistentConfig(
"WEB_SEARCH_ENGINE",
"rag.web.search.engine",
os.getenv("RAG_WEB_SEARCH_ENGINE", ""),
os.getenv("WEB_SEARCH_ENGINE", ""),
)
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
@@ -2008,10 +2015,18 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig(
os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true",
)
WEB_SEARCH_RESULT_COUNT = PersistentConfig(
"WEB_SEARCH_RESULT_COUNT",
"rag.web.search.result_count",
int(os.getenv("WEB_SEARCH_RESULT_COUNT", "3")),
)
# You can provide a list of your own websites to filter after performing a web search.
# This ensures the highest level of safety and reliability of the information sources.
RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
"RAG_WEB_SEARCH_DOMAIN_FILTER_LIST",
WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
"WEB_SEARCH_DOMAIN_FILTER_LIST",
"rag.web.search.domain.filter_list",
[
# "wikipedia.com",
@@ -2020,6 +2035,30 @@ RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
],
)
WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
"WEB_SEARCH_CONCURRENT_REQUESTS",
"rag.web.search.concurrent_requests",
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
)
WEB_LOADER_ENGINE = PersistentConfig(
"WEB_LOADER_ENGINE",
"rag.web.loader.engine",
os.environ.get("WEB_LOADER_ENGINE", ""),
)
ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig(
"ENABLE_WEB_LOADER_SSL_VERIFICATION",
"rag.web.loader.ssl_verification",
os.environ.get("ENABLE_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true",
)
WEB_SEARCH_TRUST_ENV = PersistentConfig(
"WEB_SEARCH_TRUST_ENV",
"rag.web.search.trust_env",
os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
)
SEARXNG_QUERY_URL = PersistentConfig(
"SEARXNG_QUERY_URL",
@@ -2155,34 +2194,22 @@ SOUGOU_API_SK = PersistentConfig(
os.getenv("SOUGOU_API_SK", ""),
)
RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig(
"RAG_WEB_SEARCH_RESULT_COUNT",
"rag.web.search.result_count",
int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")),
TAVILY_API_KEY = PersistentConfig(
"TAVILY_API_KEY",
"rag.web.search.tavily_api_key",
os.getenv("TAVILY_API_KEY", ""),
)
RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
"RAG_WEB_SEARCH_CONCURRENT_REQUESTS",
"rag.web.search.concurrent_requests",
int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
TAVILY_EXTRACT_DEPTH = PersistentConfig(
"TAVILY_EXTRACT_DEPTH",
"rag.web.search.tavily_extract_depth",
os.getenv("TAVILY_EXTRACT_DEPTH", "basic"),
)
RAG_WEB_LOADER_ENGINE = PersistentConfig(
"RAG_WEB_LOADER_ENGINE",
"rag.web.loader.engine",
os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web"),
)
RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig(
"RAG_WEB_SEARCH_TRUST_ENV",
"rag.web.search.trust_env",
os.getenv("RAG_WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
)
PLAYWRIGHT_WS_URI = PersistentConfig(
"PLAYWRIGHT_WS_URI",
"rag.web.loader.playwright_ws_uri",
os.environ.get("PLAYWRIGHT_WS_URI", ""),
PLAYWRIGHT_WS_URL = PersistentConfig(
"PLAYWRIGHT_WS_URL",
"rag.web.loader.PLAYWRIGHT_WS_URL",
os.environ.get("PLAYWRIGHT_WS_URL", ""),
)
PLAYWRIGHT_TIMEOUT = PersistentConfig(
@@ -2203,17 +2230,6 @@ FIRECRAWL_API_BASE_URL = PersistentConfig(
os.environ.get("FIRECRAWL_API_BASE_URL", "https://api.firecrawl.dev"),
)
TAVILY_API_KEY = PersistentConfig(
"TAVILY_API_KEY",
"rag.web.loader.tavily_api_key",
os.getenv("TAVILY_API_KEY", ""),
)
TAVILY_EXTRACT_DEPTH = PersistentConfig(
"TAVILY_EXTRACT_DEPTH",
"rag.web.loader.tavily_extract_depth",
os.getenv("TAVILY_EXTRACT_DEPTH", "basic"),
)
####################################
# Images