enh: tiktoken/token splitter support

This commit is contained in:
Timothy J. Baek
2024-10-13 02:07:50 -07:00
parent 8ae605ec4b
commit dff3732fcd
4 changed files with 49 additions and 7 deletions

View File

@@ -1014,6 +1014,22 @@ RAG_RERANKING_MODEL_TRUST_REMOTE_CODE = (
os.environ.get("RAG_RERANKING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true"
)
RAG_TEXT_SPLITTER = PersistentConfig(
"RAG_TEXT_SPLITTER",
"rag.text_splitter",
os.environ.get("RAG_TEXT_SPLITTER", ""),
)
TIKTOKEN_CACHE_DIR = os.environ.get("TIKTOKEN_CACHE_DIR", f"{CACHE_DIR}/tiktoken")
TIKTOKEN_ENCODING_NAME = PersistentConfig(
"TIKTOKEN_ENCODING_NAME",
"rag.tiktoken_encoding_name",
os.environ.get("TIKTOKEN_ENCODING_NAME", "cl100k_base"),
)
CHUNK_SIZE = PersistentConfig(
"CHUNK_SIZE", "rag.chunk_size", int(os.environ.get("CHUNK_SIZE", "1000"))
)