mirror of
https://github.com/serp-ai/bark-with-voice-clone.git
synced 2025-12-15 03:07:58 +01:00
Add more settings
This commit is contained in:
@@ -9,10 +9,13 @@
|
||||
"from bark.generation import load_codec_model, generate_text_semantic\n",
|
||||
"from encodec.utils import convert_audio\n",
|
||||
"\n",
|
||||
"from transformers import BertTokenizer\n",
|
||||
"\n",
|
||||
"import torchaudio\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"model = load_codec_model(use_gpu=True)"
|
||||
"model = load_codec_model(use_gpu=True)\n",
|
||||
"tokenizer = BertTokenizer.from_pretrained(\"bert-base-multilingual-cased\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -59,7 +62,7 @@
|
||||
"# get seconds of audio\n",
|
||||
"seconds = wav.shape[-1] / model.sample_rate\n",
|
||||
"# generate semantic tokens\n",
|
||||
"semantic_tokens = generate_text_semantic(text, max_gen_duration_s=seconds)"
|
||||
"semantic_tokens = generate_text_semantic(text, max_gen_duration_s=seconds, top_k=50, top_p=.95, temp=0.7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -117,12 +120,49 @@
|
||||
"source": [
|
||||
"from bark.api import generate_audio\n",
|
||||
"from bark.generation import SAMPLE_RATE\n",
|
||||
"text_prompt = \"\"\"\n",
|
||||
" Hello, my name is Suno. And, uh — and I like pizza. [laughs] \n",
|
||||
" But I also have other interests such as playing tic tac toe.\n",
|
||||
"\"\"\"\n",
|
||||
"voice_name = \"speaker_0\" # use your custom voice name here if you have one\n",
|
||||
"audio_array = generate_audio(text_prompt, history_prompt=voice_name)"
|
||||
"text_prompt = \"Hello, my name is Suno. And, uh — and I like pizza. [laughs]\"\n",
|
||||
"voice_name = \"speaker_0\" # use your custom voice name here if you have one"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# simple generation\n",
|
||||
"audio_array = generate_audio(text_prompt, history_prompt=voice_name, text_temp=0.7, waveform_temp=0.7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# generation with more control\n",
|
||||
"from bark.generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic\n",
|
||||
"x_semantic = generate_text_semantic(\n",
|
||||
" text_prompt,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.7,\n",
|
||||
" top_k=50,\n",
|
||||
" top_p=0.95,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"x_coarse_gen = generate_coarse(\n",
|
||||
" x_semantic,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.7,\n",
|
||||
" top_k=50,\n",
|
||||
" top_p=0.95,\n",
|
||||
")\n",
|
||||
"x_fine_gen = generate_fine(\n",
|
||||
" x_coarse_gen,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.5,\n",
|
||||
")\n",
|
||||
"audio_array = codec_decode(x_fine_gen)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,12 +8,49 @@
|
||||
"source": [
|
||||
"from bark.api import generate_audio\n",
|
||||
"from bark.generation import SAMPLE_RATE\n",
|
||||
"text_prompt = \"\"\"\n",
|
||||
" Hello, my name is Suno. And, uh — and I like pizza. [laughs] \n",
|
||||
" But I also have other interests such as playing tic tac toe.\n",
|
||||
"\"\"\"\n",
|
||||
"voice_name = \"speaker_0\" # use your custom voice name here if you have one\n",
|
||||
"audio_array = generate_audio(text_prompt, history_prompt=voice_name)"
|
||||
"text_prompt = \"Hello, my name is Suno. And, uh — and I like pizza. [laughs]\"\n",
|
||||
"voice_name = \"speaker_0\" # use your custom voice name here if you have one"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# simple generation\n",
|
||||
"audio_array = generate_audio(text_prompt, history_prompt=voice_name, text_temp=0.7, waveform_temp=0.7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# generation with more control\n",
|
||||
"from bark.generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic\n",
|
||||
"x_semantic = generate_text_semantic(\n",
|
||||
" text_prompt,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.7,\n",
|
||||
" top_k=50,\n",
|
||||
" top_p=0.95,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"x_coarse_gen = generate_coarse(\n",
|
||||
" x_semantic,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.7,\n",
|
||||
" top_k=50,\n",
|
||||
" top_p=0.95,\n",
|
||||
")\n",
|
||||
"x_fine_gen = generate_fine(\n",
|
||||
" x_coarse_gen,\n",
|
||||
" history_prompt=voice_name,\n",
|
||||
" temp=0.5,\n",
|
||||
")\n",
|
||||
"audio_array = codec_decode(x_fine_gen)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user