Add more settings

This commit is contained in:
Francis LaBounty
2023-04-21 23:10:15 -06:00
parent e0c3b1989a
commit 527f9a910d
2 changed files with 91 additions and 14 deletions

View File

@@ -9,10 +9,13 @@
"from bark.generation import load_codec_model, generate_text_semantic\n",
"from encodec.utils import convert_audio\n",
"\n",
"from transformers import BertTokenizer\n",
"\n",
"import torchaudio\n",
"import torch\n",
"\n",
"model = load_codec_model(use_gpu=True)"
"model = load_codec_model(use_gpu=True)\n",
"tokenizer = BertTokenizer.from_pretrained(\"bert-base-multilingual-cased\")"
]
},
{
@@ -59,7 +62,7 @@
"# get seconds of audio\n",
"seconds = wav.shape[-1] / model.sample_rate\n",
"# generate semantic tokens\n",
"semantic_tokens = generate_text_semantic(text, max_gen_duration_s=seconds)"
"semantic_tokens = generate_text_semantic(text, max_gen_duration_s=seconds, top_k=50, top_p=.95, temp=0.7)"
]
},
{
@@ -117,12 +120,49 @@
"source": [
"from bark.api import generate_audio\n",
"from bark.generation import SAMPLE_RATE\n",
"text_prompt = \"\"\"\n",
" Hello, my name is Suno. And, uh — and I like pizza. [laughs] \n",
" But I also have other interests such as playing tic tac toe.\n",
"\"\"\"\n",
"voice_name = \"speaker_0\" # use your custom voice name here if you have one\n",
"audio_array = generate_audio(text_prompt, history_prompt=voice_name)"
"text_prompt = \"Hello, my name is Suno. And, uh — and I like pizza. [laughs]\"\n",
"voice_name = \"speaker_0\" # use your custom voice name here if you have one"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# simple generation\n",
"audio_array = generate_audio(text_prompt, history_prompt=voice_name, text_temp=0.7, waveform_temp=0.7)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# generation with more control\n",
"from bark.generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic\n",
"x_semantic = generate_text_semantic(\n",
" text_prompt,\n",
" history_prompt=voice_name,\n",
" temp=0.7,\n",
" top_k=50,\n",
" top_p=0.95,\n",
")\n",
"\n",
"x_coarse_gen = generate_coarse(\n",
" x_semantic,\n",
" history_prompt=voice_name,\n",
" temp=0.7,\n",
" top_k=50,\n",
" top_p=0.95,\n",
")\n",
"x_fine_gen = generate_fine(\n",
" x_coarse_gen,\n",
" history_prompt=voice_name,\n",
" temp=0.5,\n",
")\n",
"audio_array = codec_decode(x_fine_gen)"
]
},
{

View File

@@ -8,12 +8,49 @@
"source": [
"from bark.api import generate_audio\n",
"from bark.generation import SAMPLE_RATE\n",
"text_prompt = \"\"\"\n",
" Hello, my name is Suno. And, uh — and I like pizza. [laughs] \n",
" But I also have other interests such as playing tic tac toe.\n",
"\"\"\"\n",
"voice_name = \"speaker_0\" # use your custom voice name here if you have one\n",
"audio_array = generate_audio(text_prompt, history_prompt=voice_name)"
"text_prompt = \"Hello, my name is Suno. And, uh — and I like pizza. [laughs]\"\n",
"voice_name = \"speaker_0\" # use your custom voice name here if you have one"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# simple generation\n",
"audio_array = generate_audio(text_prompt, history_prompt=voice_name, text_temp=0.7, waveform_temp=0.7)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# generation with more control\n",
"from bark.generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic\n",
"x_semantic = generate_text_semantic(\n",
" text_prompt,\n",
" history_prompt=voice_name,\n",
" temp=0.7,\n",
" top_k=50,\n",
" top_p=0.95,\n",
")\n",
"\n",
"x_coarse_gen = generate_coarse(\n",
" x_semantic,\n",
" history_prompt=voice_name,\n",
" temp=0.7,\n",
" top_k=50,\n",
" top_p=0.95,\n",
")\n",
"x_fine_gen = generate_fine(\n",
" x_coarse_gen,\n",
" history_prompt=voice_name,\n",
" temp=0.5,\n",
")\n",
"audio_array = codec_decode(x_fine_gen)"
]
},
{