mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 20:07:49 +01:00
include html langchain loader for RAG
This commit is contained in:
@@ -21,6 +21,7 @@ from langchain_community.document_loaders import (
|
|||||||
TextLoader,
|
TextLoader,
|
||||||
PyPDFLoader,
|
PyPDFLoader,
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
|
UnstructuredHTMLLoader,
|
||||||
Docx2txtLoader,
|
Docx2txtLoader,
|
||||||
UnstructuredEPubLoader,
|
UnstructuredEPubLoader,
|
||||||
UnstructuredWordDocumentLoader,
|
UnstructuredWordDocumentLoader,
|
||||||
@@ -402,6 +403,8 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|||||||
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
||||||
elif file_ext == "xml":
|
elif file_ext == "xml":
|
||||||
loader = UnstructuredXMLLoader(file_path)
|
loader = UnstructuredXMLLoader(file_path)
|
||||||
|
elif file_ext in ["htm", "html"]:
|
||||||
|
loader = UnstructuredHTMLLoader(file_path)
|
||||||
elif file_ext == "md":
|
elif file_ext == "md":
|
||||||
loader = UnstructuredMarkdownLoader(file_path)
|
loader = UnstructuredMarkdownLoader(file_path)
|
||||||
elif file_content_type == "application/epub+zip":
|
elif file_content_type == "application/epub+zip":
|
||||||
|
|||||||
Reference in New Issue
Block a user