Merge pull request #4886 from kiosion/dev

feat: Add control for how message content is split for TTS generation requests
This commit is contained in:
Timothy Jaeryang Baek
2024-08-26 15:02:30 +02:00
committed by GitHub
14 changed files with 424 additions and 236 deletions

View File

@@ -37,6 +37,7 @@ from config import (
AUDIO_TTS_ENGINE,
AUDIO_TTS_MODEL,
AUDIO_TTS_VOICE,
AUDIO_TTS_SPLIT_ON,
AppConfig,
CORS_ALLOW_ORIGIN,
)
@@ -72,6 +73,7 @@ app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
@@ -88,6 +90,7 @@ class TTSConfigForm(BaseModel):
ENGINE: str
MODEL: str
VOICE: str
SPLIT_ON: str
class STTConfigForm(BaseModel):
@@ -139,6 +142,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
"ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
@@ -159,6 +163,7 @@ async def update_audio_config(
app.state.config.TTS_ENGINE = form_data.tts.ENGINE
app.state.config.TTS_MODEL = form_data.tts.MODEL
app.state.config.TTS_VOICE = form_data.tts.VOICE
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
@@ -173,6 +178,7 @@ async def update_audio_config(
"ENGINE": app.state.config.TTS_ENGINE,
"MODEL": app.state.config.TTS_MODEL,
"VOICE": app.state.config.TTS_VOICE,
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
},
"stt": {
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,

View File

@@ -1484,3 +1484,9 @@ AUDIO_TTS_VOICE = PersistentConfig(
"audio.tts.voice",
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
)
AUDIO_TTS_SPLIT_ON = PersistentConfig(
"AUDIO_TTS_SPLIT_ON",
"audio.tts.split_on",
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
)

View File

@@ -1933,6 +1933,7 @@ async def get_app_config(request: Request):
"tts": {
"engine": audio_app.state.config.TTS_ENGINE,
"voice": audio_app.state.config.TTS_VOICE,
"split_on": audio_app.state.config.TTS_SPLIT_ON,
},
"stt": {
"engine": audio_app.state.config.STT_ENGINE,