Merge pull request #12517 from Ithanil/only_keep_retrieved_urls

fix: only keep URLs as sources for which the content was actually retrieved
This commit is contained in:
Timothy Jaeryang Baek
2025-04-06 15:08:47 -07:00
committed by GitHub

View File

@@ -1478,6 +1478,7 @@ async def process_web_search(
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
) )
docs = await loader.aload() docs = await loader.aload()
urls = [doc.metadata["source"] for doc in docs] # only keep URLs which could be retrieved
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return { return {