fix: handle unicode filenames in external document loader

Files with special characters in their names (e.g., ü.pdf) caused issues since HTTP headers only allow Latin-1 characters.
This change URL-encodes `X-Filename` before adding it to request headers, preventing failures when uploading or processing such files.

Fixes: #17000
This commit is contained in:
Athanasios Oikonomou
2025-08-28 22:19:25 +03:00
committed by Athanasios Oikonomou
parent 0ebe4f8f84
commit d735b036fe

View File

@@ -1,6 +1,7 @@
import requests import requests
import logging, os import logging, os
from typing import Iterator, List, Union from typing import Iterator, List, Union
from urllib.parse import quote
from langchain_core.document_loaders import BaseLoader from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document from langchain_core.documents import Document
@@ -37,7 +38,7 @@ class ExternalDocumentLoader(BaseLoader):
headers["Authorization"] = f"Bearer {self.api_key}" headers["Authorization"] = f"Bearer {self.api_key}"
try: try:
headers["X-Filename"] = os.path.basename(self.file_path) headers["X-Filename"] = quote(os.path.basename(self.file_path))
except: except:
pass pass