mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-16 11:57:51 +01:00
feat: Add control for how message content is split for TTS generation reqs
This commit is contained in:
@@ -37,6 +37,7 @@ from config import (
|
|||||||
AUDIO_TTS_ENGINE,
|
AUDIO_TTS_ENGINE,
|
||||||
AUDIO_TTS_MODEL,
|
AUDIO_TTS_MODEL,
|
||||||
AUDIO_TTS_VOICE,
|
AUDIO_TTS_VOICE,
|
||||||
|
AUDIO_TTS_SPLIT_ON,
|
||||||
AppConfig,
|
AppConfig,
|
||||||
CORS_ALLOW_ORIGIN,
|
CORS_ALLOW_ORIGIN,
|
||||||
)
|
)
|
||||||
@@ -72,6 +73,7 @@ app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
|
|||||||
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
|
app.state.config.TTS_MODEL = AUDIO_TTS_MODEL
|
||||||
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
|
app.state.config.TTS_VOICE = AUDIO_TTS_VOICE
|
||||||
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
|
app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY
|
||||||
|
app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON
|
||||||
|
|
||||||
# setting device type for whisper model
|
# setting device type for whisper model
|
||||||
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
|
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
|
||||||
@@ -88,6 +90,7 @@ class TTSConfigForm(BaseModel):
|
|||||||
ENGINE: str
|
ENGINE: str
|
||||||
MODEL: str
|
MODEL: str
|
||||||
VOICE: str
|
VOICE: str
|
||||||
|
SPLIT_ON: str
|
||||||
|
|
||||||
|
|
||||||
class STTConfigForm(BaseModel):
|
class STTConfigForm(BaseModel):
|
||||||
@@ -139,6 +142,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
|
|||||||
"ENGINE": app.state.config.TTS_ENGINE,
|
"ENGINE": app.state.config.TTS_ENGINE,
|
||||||
"MODEL": app.state.config.TTS_MODEL,
|
"MODEL": app.state.config.TTS_MODEL,
|
||||||
"VOICE": app.state.config.TTS_VOICE,
|
"VOICE": app.state.config.TTS_VOICE,
|
||||||
|
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
|
||||||
},
|
},
|
||||||
"stt": {
|
"stt": {
|
||||||
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
||||||
@@ -159,6 +163,7 @@ async def update_audio_config(
|
|||||||
app.state.config.TTS_ENGINE = form_data.tts.ENGINE
|
app.state.config.TTS_ENGINE = form_data.tts.ENGINE
|
||||||
app.state.config.TTS_MODEL = form_data.tts.MODEL
|
app.state.config.TTS_MODEL = form_data.tts.MODEL
|
||||||
app.state.config.TTS_VOICE = form_data.tts.VOICE
|
app.state.config.TTS_VOICE = form_data.tts.VOICE
|
||||||
|
app.state.config.TTS_SPLIT_ON = form_data.tts.SPLIT_ON
|
||||||
|
|
||||||
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
|
app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL
|
||||||
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
|
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
|
||||||
@@ -173,6 +178,7 @@ async def update_audio_config(
|
|||||||
"ENGINE": app.state.config.TTS_ENGINE,
|
"ENGINE": app.state.config.TTS_ENGINE,
|
||||||
"MODEL": app.state.config.TTS_MODEL,
|
"MODEL": app.state.config.TTS_MODEL,
|
||||||
"VOICE": app.state.config.TTS_VOICE,
|
"VOICE": app.state.config.TTS_VOICE,
|
||||||
|
"SPLIT_ON": app.state.config.TTS_SPLIT_ON,
|
||||||
},
|
},
|
||||||
"stt": {
|
"stt": {
|
||||||
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
"OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL,
|
||||||
|
|||||||
@@ -1484,3 +1484,9 @@ AUDIO_TTS_VOICE = PersistentConfig(
|
|||||||
"audio.tts.voice",
|
"audio.tts.voice",
|
||||||
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
|
os.getenv("AUDIO_TTS_VOICE", "alloy"), # OpenAI default voice
|
||||||
)
|
)
|
||||||
|
|
||||||
|
AUDIO_TTS_SPLIT_ON = PersistentConfig(
|
||||||
|
"AUDIO_TTS_SPLIT_ON",
|
||||||
|
"audio.tts.split_on",
|
||||||
|
os.getenv("AUDIO_TTS_SPLIT_ON", "punctuation"),
|
||||||
|
)
|
||||||
|
|||||||
@@ -1924,6 +1924,7 @@ async def get_app_config(request: Request):
|
|||||||
"tts": {
|
"tts": {
|
||||||
"engine": audio_app.state.config.TTS_ENGINE,
|
"engine": audio_app.state.config.TTS_ENGINE,
|
||||||
"voice": audio_app.state.config.TTS_VOICE,
|
"voice": audio_app.state.config.TTS_VOICE,
|
||||||
|
"split_on": audio_app.state.config.TTS_SPLIT_ON,
|
||||||
},
|
},
|
||||||
"stt": {
|
"stt": {
|
||||||
"engine": audio_app.state.config.STT_ENGINE,
|
"engine": audio_app.state.config.STT_ENGINE,
|
||||||
|
|||||||
@@ -132,7 +132,11 @@ export const synthesizeOpenAISpeech = async (
|
|||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const getModels = async (token: string = '') => {
|
interface AvailableModelsResponse {
|
||||||
|
models: { name: string; id: string }[] | { id: string }[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getModels = async (token: string = ''): Promise<AvailableModelsResponse> => {
|
||||||
let error = null;
|
let error = null;
|
||||||
|
|
||||||
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
|
const res = await fetch(`${AUDIO_API_BASE_URL}/models`, {
|
||||||
|
|||||||
@@ -10,31 +10,36 @@
|
|||||||
getModels as _getModels,
|
getModels as _getModels,
|
||||||
getVoices as _getVoices
|
getVoices as _getVoices
|
||||||
} from '$lib/apis/audio';
|
} from '$lib/apis/audio';
|
||||||
import { user, settings, config } from '$lib/stores';
|
import { config } from '$lib/stores';
|
||||||
|
|
||||||
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||||
|
|
||||||
export let saveHandler: Function;
|
import type { Writable } from 'svelte/store';
|
||||||
|
import type { i18n as i18nType } from 'i18next';
|
||||||
|
|
||||||
|
const i18n = getContext<Writable<i18nType>>('i18n');
|
||||||
|
|
||||||
|
export let saveHandler: () => void;
|
||||||
|
|
||||||
// Audio
|
// Audio
|
||||||
|
|
||||||
let TTS_OPENAI_API_BASE_URL = '';
|
let TTS_OPENAI_API_BASE_URL = '';
|
||||||
let TTS_OPENAI_API_KEY = '';
|
let TTS_OPENAI_API_KEY = '';
|
||||||
let TTS_API_KEY = '';
|
let TTS_API_KEY = '';
|
||||||
let TTS_ENGINE = '';
|
let TTS_ENGINE = '';
|
||||||
let TTS_MODEL = '';
|
let TTS_MODEL = '';
|
||||||
let TTS_VOICE = '';
|
let TTS_VOICE = '';
|
||||||
|
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||||
|
|
||||||
let STT_OPENAI_API_BASE_URL = '';
|
let STT_OPENAI_API_BASE_URL = '';
|
||||||
let STT_OPENAI_API_KEY = '';
|
let STT_OPENAI_API_KEY = '';
|
||||||
let STT_ENGINE = '';
|
let STT_ENGINE = '';
|
||||||
let STT_MODEL = '';
|
let STT_MODEL = '';
|
||||||
|
|
||||||
let voices = [];
|
// eslint-disable-next-line no-undef
|
||||||
let models = [];
|
let voices: SpeechSynthesisVoice[] = [];
|
||||||
let nonLocalVoices = false;
|
let models: Awaited<ReturnType<typeof _getModels>>['models'] = [];
|
||||||
|
|
||||||
const getModels = async () => {
|
const getModels = async () => {
|
||||||
if (TTS_ENGINE === '') {
|
if (TTS_ENGINE === '') {
|
||||||
@@ -53,8 +58,8 @@
|
|||||||
|
|
||||||
const getVoices = async () => {
|
const getVoices = async () => {
|
||||||
if (TTS_ENGINE === '') {
|
if (TTS_ENGINE === '') {
|
||||||
const getVoicesLoop = setInterval(async () => {
|
const getVoicesLoop = setInterval(() => {
|
||||||
voices = await speechSynthesis.getVoices();
|
voices = speechSynthesis.getVoices();
|
||||||
|
|
||||||
// do your loop
|
// do your loop
|
||||||
if (voices.length > 0) {
|
if (voices.length > 0) {
|
||||||
@@ -81,7 +86,8 @@
|
|||||||
API_KEY: TTS_API_KEY,
|
API_KEY: TTS_API_KEY,
|
||||||
ENGINE: TTS_ENGINE,
|
ENGINE: TTS_ENGINE,
|
||||||
MODEL: TTS_MODEL,
|
MODEL: TTS_MODEL,
|
||||||
VOICE: TTS_VOICE
|
VOICE: TTS_VOICE,
|
||||||
|
SPLIT_ON: TTS_SPLIT_ON
|
||||||
},
|
},
|
||||||
stt: {
|
stt: {
|
||||||
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
|
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
|
||||||
@@ -92,9 +98,8 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
toast.success($i18n.t('Audio settings updated successfully'));
|
saveHandler();
|
||||||
|
getBackendConfig().then(config.set).catch(() => {});
|
||||||
config.set(await getBackendConfig());
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -111,6 +116,8 @@
|
|||||||
TTS_MODEL = res.tts.MODEL;
|
TTS_MODEL = res.tts.MODEL;
|
||||||
TTS_VOICE = res.tts.VOICE;
|
TTS_VOICE = res.tts.VOICE;
|
||||||
|
|
||||||
|
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
|
||||||
|
|
||||||
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
|
||||||
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
|
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
|
||||||
|
|
||||||
@@ -139,7 +146,7 @@
|
|||||||
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
|
<div class=" self-center text-xs font-medium">{$i18n.t('Speech-to-Text Engine')}</div>
|
||||||
<div class="flex items-center relative">
|
<div class="flex items-center relative">
|
||||||
<select
|
<select
|
||||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||||
bind:value={STT_ENGINE}
|
bind:value={STT_ENGINE}
|
||||||
placeholder="Select an engine"
|
placeholder="Select an engine"
|
||||||
>
|
>
|
||||||
@@ -195,7 +202,7 @@
|
|||||||
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
<div class=" self-center text-xs font-medium">{$i18n.t('Text-to-Speech Engine')}</div>
|
||||||
<div class="flex items-center relative">
|
<div class="flex items-center relative">
|
||||||
<select
|
<select
|
||||||
class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||||
bind:value={TTS_ENGINE}
|
bind:value={TTS_ENGINE}
|
||||||
placeholder="Select a mode"
|
placeholder="Select a mode"
|
||||||
on:change={async (e) => {
|
on:change={async (e) => {
|
||||||
@@ -203,7 +210,7 @@
|
|||||||
await getVoices();
|
await getVoices();
|
||||||
await getModels();
|
await getModels();
|
||||||
|
|
||||||
if (e.target.value === 'openai') {
|
if (e.target?.value === 'openai') {
|
||||||
TTS_VOICE = 'alloy';
|
TTS_VOICE = 'alloy';
|
||||||
TTS_MODEL = 'tts-1';
|
TTS_MODEL = 'tts-1';
|
||||||
} else {
|
} else {
|
||||||
@@ -351,6 +358,28 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
<hr class="dark:border-gray-850 my-2" />
|
||||||
|
|
||||||
|
<div class="pt-0.5 flex w-full justify-between">
|
||||||
|
<div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
|
||||||
|
<div class="flex items-center relative">
|
||||||
|
<select
|
||||||
|
class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
|
||||||
|
placeholder="Select how to split response text"
|
||||||
|
bind:value={TTS_SPLIT_ON}
|
||||||
|
>
|
||||||
|
{#each Object.values(TTS_RESPONSE_SPLIT) as split}
|
||||||
|
<option value={split}>{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option>
|
||||||
|
{/each}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
|
||||||
|
{$i18n.t(
|
||||||
|
"Choose how to split response text for speech synthesis. 'Punctuation' splits by sentences, 'paragraphs' splits by paragraphs, and 'none' sends the response as a single string."
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -2,11 +2,10 @@
|
|||||||
import { toast } from 'svelte-sonner';
|
import { toast } from 'svelte-sonner';
|
||||||
import dayjs from 'dayjs';
|
import dayjs from 'dayjs';
|
||||||
|
|
||||||
import { fade } from 'svelte/transition';
|
|
||||||
import { createEventDispatcher } from 'svelte';
|
import { createEventDispatcher } from 'svelte';
|
||||||
import { onMount, tick, getContext } from 'svelte';
|
import { onMount, tick, getContext } from 'svelte';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext<Writable<i18nType>>('i18n');
|
||||||
|
|
||||||
const dispatch = createEventDispatcher();
|
const dispatch = createEventDispatcher();
|
||||||
|
|
||||||
@@ -15,20 +14,18 @@
|
|||||||
import { imageGenerations } from '$lib/apis/images';
|
import { imageGenerations } from '$lib/apis/images';
|
||||||
import {
|
import {
|
||||||
approximateToHumanReadable,
|
approximateToHumanReadable,
|
||||||
extractSentences,
|
extractParagraphsForAudio,
|
||||||
replaceTokens,
|
extractSentencesForAudio,
|
||||||
processResponseContent
|
prepareTextForTTS,
|
||||||
} from '$lib/utils';
|
} from '$lib/utils';
|
||||||
import { WEBUI_BASE_URL } from '$lib/constants';
|
import { WEBUI_BASE_URL } from '$lib/constants';
|
||||||
|
|
||||||
import Name from './Name.svelte';
|
import Name from './Name.svelte';
|
||||||
import ProfileImage from './ProfileImage.svelte';
|
import ProfileImage from './ProfileImage.svelte';
|
||||||
import Skeleton from './Skeleton.svelte';
|
import Skeleton from './Skeleton.svelte';
|
||||||
import CodeBlock from './CodeBlock.svelte';
|
|
||||||
import Image from '$lib/components/common/Image.svelte';
|
import Image from '$lib/components/common/Image.svelte';
|
||||||
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
||||||
import RateComment from './RateComment.svelte';
|
import RateComment from './RateComment.svelte';
|
||||||
import CitationsModal from '$lib/components/chat/Messages/CitationsModal.svelte';
|
|
||||||
import Spinner from '$lib/components/common/Spinner.svelte';
|
import Spinner from '$lib/components/common/Spinner.svelte';
|
||||||
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
|
import WebSearchResults from './ResponseMessage/WebSearchResults.svelte';
|
||||||
import Sparkles from '$lib/components/icons/Sparkles.svelte';
|
import Sparkles from '$lib/components/icons/Sparkles.svelte';
|
||||||
@@ -36,7 +33,38 @@
|
|||||||
import Error from './Error.svelte';
|
import Error from './Error.svelte';
|
||||||
import Citations from './Citations.svelte';
|
import Citations from './Citations.svelte';
|
||||||
|
|
||||||
export let message;
|
import type { Writable } from 'svelte/store';
|
||||||
|
import type { i18n as i18nType } from 'i18next';
|
||||||
|
import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||||
|
|
||||||
|
interface MessageType {
|
||||||
|
id: string;
|
||||||
|
model: string;
|
||||||
|
content: string;
|
||||||
|
files?: { type: string; url: string }[];
|
||||||
|
timestamp: number;
|
||||||
|
role: string;
|
||||||
|
statusHistory?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; }[];
|
||||||
|
status?: { done: boolean; action: string; description: string; urls?: string[]; query?: string; };
|
||||||
|
done: boolean;
|
||||||
|
error?: boolean | { content: string };
|
||||||
|
citations?: string[];
|
||||||
|
info?: {
|
||||||
|
openai?: boolean;
|
||||||
|
prompt_tokens?: number;
|
||||||
|
completion_tokens?: number;
|
||||||
|
total_tokens?: number;
|
||||||
|
eval_count?: number;
|
||||||
|
eval_duration?: number;
|
||||||
|
prompt_eval_count?: number;
|
||||||
|
prompt_eval_duration?: number;
|
||||||
|
total_duration?: number;
|
||||||
|
load_duration?: number;
|
||||||
|
};
|
||||||
|
annotation?: { type: string; rating: number; };
|
||||||
|
}
|
||||||
|
|
||||||
|
export let message: MessageType;
|
||||||
export let siblings;
|
export let siblings;
|
||||||
|
|
||||||
export let isLastMessage = true;
|
export let isLastMessage = true;
|
||||||
@@ -60,28 +88,33 @@
|
|||||||
let editedContent = '';
|
let editedContent = '';
|
||||||
let editTextAreaElement: HTMLTextAreaElement;
|
let editTextAreaElement: HTMLTextAreaElement;
|
||||||
|
|
||||||
let sentencesAudio = {};
|
let audioParts: Record<number, HTMLAudioElement | null> = {};
|
||||||
let speaking = null;
|
let speaking = false;
|
||||||
let speakingIdx = null;
|
let speakingIdx: number | undefined;
|
||||||
|
|
||||||
let loadingSpeech = false;
|
let loadingSpeech = false;
|
||||||
let generatingImage = false;
|
let generatingImage = false;
|
||||||
|
|
||||||
let showRateComment = false;
|
let showRateComment = false;
|
||||||
|
|
||||||
const playAudio = (idx) => {
|
const playAudio = (idx: number) => {
|
||||||
return new Promise((res) => {
|
return new Promise<void>((res) => {
|
||||||
speakingIdx = idx;
|
speakingIdx = idx;
|
||||||
const audio = sentencesAudio[idx];
|
const audio = audioParts[idx];
|
||||||
|
|
||||||
|
if (!audio) {
|
||||||
|
return res();
|
||||||
|
}
|
||||||
|
|
||||||
audio.play();
|
audio.play();
|
||||||
audio.onended = async (e) => {
|
audio.onended = async () => {
|
||||||
await new Promise((r) => setTimeout(r, 300));
|
await new Promise((r) => setTimeout(r, 300));
|
||||||
|
|
||||||
if (Object.keys(sentencesAudio).length - 1 === idx) {
|
if (Object.keys(audioParts).length - 1 === idx) {
|
||||||
speaking = null;
|
speaking = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
res(e);
|
res();
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
@@ -91,113 +124,119 @@
|
|||||||
try {
|
try {
|
||||||
speechSynthesis.cancel();
|
speechSynthesis.cancel();
|
||||||
|
|
||||||
sentencesAudio[speakingIdx].pause();
|
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
|
||||||
sentencesAudio[speakingIdx].currentTime = 0;
|
audioParts[speakingIdx]!.pause();
|
||||||
|
audioParts[speakingIdx]!.currentTime = 0;
|
||||||
|
}
|
||||||
} catch {}
|
} catch {}
|
||||||
|
|
||||||
speaking = null;
|
speaking = false;
|
||||||
speakingIdx = null;
|
speakingIdx = undefined;
|
||||||
} else {
|
return;
|
||||||
if ((message?.content ?? '').trim() !== '') {
|
}
|
||||||
speaking = true;
|
|
||||||
|
|
||||||
if ($config.audio.tts.engine !== '') {
|
if (!(message?.content ?? '').trim().length) {
|
||||||
loadingSpeech = true;
|
toast.info($i18n.t('No content to speak'));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
|
speaking = true;
|
||||||
const lastIndex = mergedTexts.length - 1;
|
|
||||||
if (lastIndex >= 0) {
|
|
||||||
const previousText = mergedTexts[lastIndex];
|
|
||||||
const wordCount = previousText.split(/\s+/).length;
|
|
||||||
if (wordCount < 2) {
|
|
||||||
mergedTexts[lastIndex] = previousText + ' ' + currentText;
|
|
||||||
} else {
|
|
||||||
mergedTexts.push(currentText);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
mergedTexts.push(currentText);
|
|
||||||
}
|
|
||||||
return mergedTexts;
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
console.log(sentences);
|
if ($config.audio.tts.engine !== '') {
|
||||||
|
loadingSpeech = true;
|
||||||
|
|
||||||
if (sentences.length > 0) {
|
const preparedMessageContent: string[] = [];
|
||||||
sentencesAudio = sentences.reduce((a, e, i, arr) => {
|
|
||||||
a[i] = null;
|
|
||||||
return a;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
switch ($config.audio.tts.split_on) {
|
||||||
|
default:
|
||||||
for (const [idx, sentence] of sentences.entries()) {
|
case TTS_RESPONSE_SPLIT.PUNCTUATION:
|
||||||
const res = await synthesizeOpenAISpeech(
|
preparedMessageContent.push(...extractSentencesForAudio(message.content));
|
||||||
localStorage.token,
|
break;
|
||||||
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
case TTS_RESPONSE_SPLIT.PARAGRAPHS:
|
||||||
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
preparedMessageContent.push(...extractParagraphsForAudio(message.content));
|
||||||
: $config?.audio?.tts?.voice,
|
break;
|
||||||
sentence
|
case TTS_RESPONSE_SPLIT.NONE:
|
||||||
).catch((error) => {
|
preparedMessageContent.push(prepareTextForTTS(message.content));
|
||||||
toast.error(error);
|
break;
|
||||||
|
|
||||||
speaking = null;
|
|
||||||
loadingSpeech = false;
|
|
||||||
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res) {
|
|
||||||
const blob = await res.blob();
|
|
||||||
const blobUrl = URL.createObjectURL(blob);
|
|
||||||
const audio = new Audio(blobUrl);
|
|
||||||
sentencesAudio[idx] = audio;
|
|
||||||
loadingSpeech = false;
|
|
||||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
speaking = null;
|
|
||||||
loadingSpeech = false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let voices = [];
|
|
||||||
const getVoicesLoop = setInterval(async () => {
|
|
||||||
voices = await speechSynthesis.getVoices();
|
|
||||||
if (voices.length > 0) {
|
|
||||||
clearInterval(getVoicesLoop);
|
|
||||||
|
|
||||||
const voice =
|
|
||||||
voices
|
|
||||||
?.filter(
|
|
||||||
(v) =>
|
|
||||||
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
|
||||||
)
|
|
||||||
?.at(0) ?? undefined;
|
|
||||||
|
|
||||||
console.log(voice);
|
|
||||||
|
|
||||||
const speak = new SpeechSynthesisUtterance(message.content);
|
|
||||||
|
|
||||||
console.log(speak);
|
|
||||||
|
|
||||||
speak.onend = () => {
|
|
||||||
speaking = null;
|
|
||||||
if ($settings.conversationMode) {
|
|
||||||
document.getElementById('voice-input-button')?.click();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if (voice) {
|
|
||||||
speak.voice = voice;
|
|
||||||
}
|
|
||||||
|
|
||||||
speechSynthesis.speak(speak);
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
toast.error($i18n.t('No content to speak'));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!preparedMessageContent.length) {
|
||||||
|
console.log('No content to speak');
|
||||||
|
toast.info($i18n.t('No content to speak'));
|
||||||
|
|
||||||
|
speaking = false;
|
||||||
|
loadingSpeech = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.debug('Prepared message content for TTS', preparedMessageContent);
|
||||||
|
|
||||||
|
audioParts = preparedMessageContent.reduce((acc, _sentence, idx) => {
|
||||||
|
acc[idx] = null;
|
||||||
|
return acc;
|
||||||
|
}, {} as typeof audioParts);
|
||||||
|
|
||||||
|
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||||
|
|
||||||
|
for (const [idx, sentence] of preparedMessageContent.entries()) {
|
||||||
|
const res = await synthesizeOpenAISpeech(
|
||||||
|
localStorage.token,
|
||||||
|
$settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice
|
||||||
|
? ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||||
|
: $config?.audio?.tts?.voice,
|
||||||
|
sentence
|
||||||
|
).catch((error) => {
|
||||||
|
console.error(error);
|
||||||
|
toast.error(error);
|
||||||
|
|
||||||
|
speaking = false;
|
||||||
|
loadingSpeech = false;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res) {
|
||||||
|
const blob = await res.blob();
|
||||||
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
|
const audio = new Audio(blobUrl);
|
||||||
|
audioParts[idx] = audio;
|
||||||
|
loadingSpeech = false;
|
||||||
|
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let voices = [];
|
||||||
|
const getVoicesLoop = setInterval(() => {
|
||||||
|
voices = speechSynthesis.getVoices();
|
||||||
|
if (voices.length > 0) {
|
||||||
|
clearInterval(getVoicesLoop);
|
||||||
|
|
||||||
|
const voice =
|
||||||
|
voices
|
||||||
|
?.filter(
|
||||||
|
(v) =>
|
||||||
|
v.voiceURI === ($settings?.audio?.tts?.voice ?? $config?.audio?.tts?.voice)
|
||||||
|
)
|
||||||
|
?.at(0) ?? undefined;
|
||||||
|
|
||||||
|
console.log(voice);
|
||||||
|
|
||||||
|
const speak = new SpeechSynthesisUtterance(message.content);
|
||||||
|
|
||||||
|
console.log(speak);
|
||||||
|
|
||||||
|
speak.onend = () => {
|
||||||
|
speaking = false;
|
||||||
|
if ($settings.conversationMode) {
|
||||||
|
document.getElementById('voice-input-button')?.click();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (voice) {
|
||||||
|
speak.voice = voice;
|
||||||
|
}
|
||||||
|
|
||||||
|
speechSynthesis.speak(speak);
|
||||||
|
}
|
||||||
|
}, 100);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -230,7 +269,7 @@
|
|||||||
await tick();
|
await tick();
|
||||||
};
|
};
|
||||||
|
|
||||||
const generateImage = async (message) => {
|
const generateImage = async (message: MessageType) => {
|
||||||
generatingImage = true;
|
generatingImage = true;
|
||||||
const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
|
const res = await imageGenerations(localStorage.token, message.content).catch((error) => {
|
||||||
toast.error(error);
|
toast.error(error);
|
||||||
@@ -285,7 +324,7 @@
|
|||||||
</Name>
|
</Name>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
{#if (message?.files ?? []).filter((f) => f.type === 'image').length > 0}
|
{#if message?.files && message.files?.filter((f) => f.type === 'image').length > 0}
|
||||||
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
|
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
|
||||||
{#each message.files as file}
|
{#each message.files as file}
|
||||||
<div>
|
<div>
|
||||||
@@ -304,7 +343,7 @@
|
|||||||
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
|
message?.statusHistory ?? [...(message?.status ? [message?.status] : [])]
|
||||||
).at(-1)}
|
).at(-1)}
|
||||||
<div class="flex items-center gap-2 pt-0.5 pb-1">
|
<div class="flex items-center gap-2 pt-0.5 pb-1">
|
||||||
{#if status.done === false}
|
{#if status?.done === false}
|
||||||
<div class="">
|
<div class="">
|
||||||
<Spinner className="size-4" />
|
<Spinner className="size-4" />
|
||||||
</div>
|
</div>
|
||||||
@@ -521,7 +560,7 @@
|
|||||||
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
|
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
|
||||||
on:click={() => {
|
on:click={() => {
|
||||||
if (!loadingSpeech) {
|
if (!loadingSpeech) {
|
||||||
toggleSpeakMessage(message);
|
toggleSpeakMessage();
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
@@ -661,7 +700,7 @@
|
|||||||
`${
|
`${
|
||||||
Math.round(
|
Math.round(
|
||||||
((message.info.eval_count ?? 0) /
|
((message.info.eval_count ?? 0) /
|
||||||
(message.info.eval_duration / 1000000000)) *
|
((message.info.eval_duration ?? 0) / 1000000000)) *
|
||||||
100
|
100
|
||||||
) / 100
|
) / 100
|
||||||
} tokens` ?? 'N/A'
|
} tokens` ?? 'N/A'
|
||||||
@@ -669,7 +708,7 @@
|
|||||||
prompt_token/s: ${
|
prompt_token/s: ${
|
||||||
Math.round(
|
Math.round(
|
||||||
((message.info.prompt_eval_count ?? 0) /
|
((message.info.prompt_eval_count ?? 0) /
|
||||||
(message.info.prompt_eval_duration / 1000000000)) *
|
((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
|
||||||
100
|
100
|
||||||
) / 100 ?? 'N/A'
|
) / 100 ?? 'N/A'
|
||||||
} tokens<br/>
|
} tokens<br/>
|
||||||
@@ -688,7 +727,7 @@
|
|||||||
eval_duration: ${
|
eval_duration: ${
|
||||||
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
|
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
|
||||||
}ms<br/>
|
}ms<br/>
|
||||||
approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`}
|
approximate_total: ${approximateToHumanReadable((message.info.total_duration ?? 0))}`}
|
||||||
placement="top"
|
placement="top"
|
||||||
>
|
>
|
||||||
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">
|
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">
|
||||||
|
|||||||
@@ -7,3 +7,9 @@ export type Banner = {
|
|||||||
dismissible?: boolean;
|
dismissible?: boolean;
|
||||||
timestamp: number;
|
timestamp: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export enum TTS_RESPONSE_SPLIT {
|
||||||
|
PUNCTUATION = 'punctuation',
|
||||||
|
PARAGRAPHS = 'paragraphs',
|
||||||
|
NONE = 'none',
|
||||||
|
}
|
||||||
|
|||||||
@@ -408,7 +408,7 @@ const convertOpenAIMessages = (convo) => {
|
|||||||
let currentId = '';
|
let currentId = '';
|
||||||
let lastId = null;
|
let lastId = null;
|
||||||
|
|
||||||
for (let message_id in mapping) {
|
for (const message_id in mapping) {
|
||||||
const message = mapping[message_id];
|
const message = mapping[message_id];
|
||||||
currentId = message_id;
|
currentId = message_id;
|
||||||
try {
|
try {
|
||||||
@@ -442,7 +442,7 @@ const convertOpenAIMessages = (convo) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let history = {};
|
const history: Record<PropertyKey, (typeof messages)[number]> = {};
|
||||||
messages.forEach((obj) => (history[obj.id] = obj));
|
messages.forEach((obj) => (history[obj.id] = obj));
|
||||||
|
|
||||||
const chat = {
|
const chat = {
|
||||||
@@ -481,7 +481,7 @@ const validateChat = (chat) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Every message's content should be a string
|
// Every message's content should be a string
|
||||||
for (let message of messages) {
|
for (const message of messages) {
|
||||||
if (typeof message.content !== 'string') {
|
if (typeof message.content !== 'string') {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -494,7 +494,7 @@ export const convertOpenAIChats = (_chats) => {
|
|||||||
// Create a list of dictionaries with each conversation from import
|
// Create a list of dictionaries with each conversation from import
|
||||||
const chats = [];
|
const chats = [];
|
||||||
let failed = 0;
|
let failed = 0;
|
||||||
for (let convo of _chats) {
|
for (const convo of _chats) {
|
||||||
const chat = convertOpenAIMessages(convo);
|
const chat = convertOpenAIMessages(convo);
|
||||||
|
|
||||||
if (validateChat(chat)) {
|
if (validateChat(chat)) {
|
||||||
@@ -513,7 +513,7 @@ export const convertOpenAIChats = (_chats) => {
|
|||||||
return chats;
|
return chats;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const isValidHttpUrl = (string) => {
|
export const isValidHttpUrl = (string: string) => {
|
||||||
let url;
|
let url;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -525,7 +525,7 @@ export const isValidHttpUrl = (string) => {
|
|||||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||||
};
|
};
|
||||||
|
|
||||||
export const removeEmojis = (str) => {
|
export const removeEmojis = (str: string) => {
|
||||||
// Regular expression to match emojis
|
// Regular expression to match emojis
|
||||||
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||||
|
|
||||||
@@ -533,20 +533,24 @@ export const removeEmojis = (str) => {
|
|||||||
return str.replace(emojiRegex, '');
|
return str.replace(emojiRegex, '');
|
||||||
};
|
};
|
||||||
|
|
||||||
export const removeFormattings = (str) => {
|
export const removeFormattings = (str: string) => {
|
||||||
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
|
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractSentences = (text) => {
|
export const prepareTextForTTS = (content: string) => {
|
||||||
// This regular expression matches code blocks marked by triple backticks
|
return removeFormattings(removeEmojis(content.trim()));
|
||||||
const codeBlockRegex = /```[\s\S]*?```/g;
|
};
|
||||||
|
|
||||||
let codeBlocks = [];
|
// This regular expression matches code blocks marked by triple backticks
|
||||||
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
||||||
|
|
||||||
|
export const extractSentences = (text: string) => {
|
||||||
|
const codeBlocks: string[] = [];
|
||||||
let index = 0;
|
let index = 0;
|
||||||
|
|
||||||
// Temporarily replace code blocks with placeholders and store the blocks separately
|
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||||
text = text.replace(codeBlockRegex, (match) => {
|
text = text.replace(codeBlockRegex, (match) => {
|
||||||
let placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||||
codeBlocks[index++] = match;
|
codeBlocks[index++] = match;
|
||||||
return placeholder;
|
return placeholder;
|
||||||
});
|
});
|
||||||
@@ -561,11 +565,36 @@ export const extractSentences = (text) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
return sentences
|
return sentences
|
||||||
.map((sentence) => removeFormattings(removeEmojis(sentence.trim())))
|
.map(prepareTextForTTS)
|
||||||
.filter((sentence) => sentence);
|
.filter(Boolean);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const extractSentencesForAudio = (text) => {
|
export const extractParagraphsForAudio = (text: string) => {
|
||||||
|
const codeBlocks: string[] = [];
|
||||||
|
let index = 0;
|
||||||
|
|
||||||
|
// Temporarily replace code blocks with placeholders and store the blocks separately
|
||||||
|
text = text.replace(codeBlockRegex, (match) => {
|
||||||
|
const placeholder = `\u0000${index}\u0000`; // Use a unique placeholder
|
||||||
|
codeBlocks[index++] = match;
|
||||||
|
return placeholder;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Split the modified text into paragraphs based on newlines, avoiding these blocks
|
||||||
|
let paragraphs = text.split(/\n+/);
|
||||||
|
|
||||||
|
// Restore code blocks and process paragraphs
|
||||||
|
paragraphs = paragraphs.map((paragraph) => {
|
||||||
|
// Check if the paragraph includes a placeholder for a code block
|
||||||
|
return paragraph.replace(/\u0000(\d+)\u0000/g, (_, idx) => codeBlocks[idx]);
|
||||||
|
});
|
||||||
|
|
||||||
|
return paragraphs
|
||||||
|
.map(prepareTextForTTS)
|
||||||
|
.filter(Boolean);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const extractSentencesForAudio = (text: string) => {
|
||||||
return extractSentences(text).reduce((mergedTexts, currentText) => {
|
return extractSentences(text).reduce((mergedTexts, currentText) => {
|
||||||
const lastIndex = mergedTexts.length - 1;
|
const lastIndex = mergedTexts.length - 1;
|
||||||
if (lastIndex >= 0) {
|
if (lastIndex >= 0) {
|
||||||
@@ -580,7 +609,7 @@ export const extractSentencesForAudio = (text) => {
|
|||||||
mergedTexts.push(currentText);
|
mergedTexts.push(currentText);
|
||||||
}
|
}
|
||||||
return mergedTexts;
|
return mergedTexts;
|
||||||
}, []);
|
}, [] as string[]);
|
||||||
};
|
};
|
||||||
|
|
||||||
export const blobToFile = (blob, fileName) => {
|
export const blobToFile = (blob, fileName) => {
|
||||||
|
|||||||
Reference in New Issue
Block a user