mirror of
https://github.com/serp-ai/bark-with-voice-clone.git
synced 2025-12-16 03:38:01 +01:00
184 lines
5.4 KiB
Plaintext
184 lines
5.4 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"from bark.api import generate_audio\n",
|
||
|
|
"from bark.generation import SAMPLE_RATE, preload_models, codec_decode, generate_coarse, generate_fine, generate_text_semantic"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"semantic_path = \"E:/Python/bark-with-voice-clone/semantic_output/pytorch_model.bin\"\n",
|
||
|
|
"coarse_path = \"E:/Python/bark-with-voice-clone/coarse_output/pytorch_model.bin\"\n",
|
||
|
|
"fine_path = \"E:/Python/bark-with-voice-clone/fine_output/pytorch_model.bin\""
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"preload_models(\n",
|
||
|
|
" text_use_gpu=True,\n",
|
||
|
|
" text_use_small=False,\n",
|
||
|
|
" text_model_path=semantic_path,\n",
|
||
|
|
" coarse_use_gpu=True,\n",
|
||
|
|
" coarse_use_small=False,\n",
|
||
|
|
" coarse_model_path=coarse_path,\n",
|
||
|
|
" fine_use_gpu=True,\n",
|
||
|
|
" fine_use_small=False,\n",
|
||
|
|
" fine_model_path=fine_path,\n",
|
||
|
|
" codec_use_gpu=True,\n",
|
||
|
|
" force_reload=False,\n",
|
||
|
|
" path=\"models\"\n",
|
||
|
|
")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"# simple generation\n",
|
||
|
|
"text_prompt = \"I am Joe Biden... and this is the finetuned semantic, coarse and fine model! [laughs] A lot better than the original!\"\n",
|
||
|
|
"audio_array = generate_audio(text_prompt, history_prompt=None, text_temp=0.7, waveform_temp=0.7)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"from IPython.display import Audio\n",
|
||
|
|
"# play audio\n",
|
||
|
|
"Audio(audio_array, rate=SAMPLE_RATE)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"from scipy.io.wavfile import write as write_wav\n",
|
||
|
|
"# save audio\n",
|
||
|
|
"filepath = \"output/audio.wav\" # change this to your desired output path\n",
|
||
|
|
"write_wav(filepath, SAMPLE_RATE, audio_array)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"def generate_with_settings(text_prompt, semantic_temp=0.7, semantic_top_k=50, semantic_top_p=0.95, coarse_temp=0.7, coarse_top_k=50, coarse_top_p=0.95, fine_temp=0.5, voice_name=None, use_semantic_history_prompt=True, use_coarse_history_prompt=True, use_fine_history_prompt=True, output_full=False):\n",
|
||
|
|
" # generation with more control\n",
|
||
|
|
" x_semantic = generate_text_semantic(\n",
|
||
|
|
" text_prompt,\n",
|
||
|
|
" history_prompt=voice_name if use_semantic_history_prompt else None,\n",
|
||
|
|
" temp=semantic_temp,\n",
|
||
|
|
" top_k=semantic_top_k,\n",
|
||
|
|
" top_p=semantic_top_p,\n",
|
||
|
|
" )\n",
|
||
|
|
"\n",
|
||
|
|
" x_coarse_gen = generate_coarse(\n",
|
||
|
|
" x_semantic,\n",
|
||
|
|
" history_prompt=voice_name if use_coarse_history_prompt else None,\n",
|
||
|
|
" temp=coarse_temp,\n",
|
||
|
|
" top_k=coarse_top_k,\n",
|
||
|
|
" top_p=coarse_top_p,\n",
|
||
|
|
" )\n",
|
||
|
|
" x_fine_gen = generate_fine(\n",
|
||
|
|
" x_coarse_gen,\n",
|
||
|
|
" history_prompt=voice_name if use_fine_history_prompt else None,\n",
|
||
|
|
" temp=fine_temp,\n",
|
||
|
|
" )\n",
|
||
|
|
"\n",
|
||
|
|
" if output_full:\n",
|
||
|
|
" full_generation = {\n",
|
||
|
|
" 'semantic_prompt': x_semantic,\n",
|
||
|
|
" 'coarse_prompt': x_coarse_gen,\n",
|
||
|
|
" 'fine_prompt': x_fine_gen,\n",
|
||
|
|
" }\n",
|
||
|
|
" return full_generation, codec_decode(x_fine_gen)\n",
|
||
|
|
" return codec_decode(x_fine_gen)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"text_prompt = \"I am Joe Biden... and this is the finetuned semantic, coarse and fine model! [laughs] A lot better than the original!\"\n",
|
||
|
|
"\n",
|
||
|
|
"audio_array = generate_with_settings(\n",
|
||
|
|
" text_prompt,\n",
|
||
|
|
" semantic_temp=0.7,\n",
|
||
|
|
" semantic_top_k=50,\n",
|
||
|
|
" semantic_top_p=0.99,\n",
|
||
|
|
" coarse_temp=0.7,\n",
|
||
|
|
" coarse_top_k=50,\n",
|
||
|
|
" coarse_top_p=0.99,\n",
|
||
|
|
" fine_temp=0.5,\n",
|
||
|
|
" voice_name=None,\n",
|
||
|
|
" use_semantic_history_prompt=True,\n",
|
||
|
|
" use_coarse_history_prompt=True,\n",
|
||
|
|
" use_fine_history_prompt=True,\n",
|
||
|
|
" output_full=False\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"from IPython.display import Audio\n",
|
||
|
|
"# play audio\n",
|
||
|
|
"Audio(audio_array, rate=SAMPLE_RATE)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"from scipy.io.wavfile import write as write_wav\n",
|
||
|
|
"# save audio\n",
|
||
|
|
"filepath = \"output/audio.wav\" # change this to your desired output path\n",
|
||
|
|
"write_wav(filepath, SAMPLE_RATE, audio_array)"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.10.8"
|
||
|
|
},
|
||
|
|
"orig_nbformat": 4
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 2
|
||
|
|
}
|