mirror of
https://github.com/serp-ai/bark-with-voice-clone.git
synced 2025-12-16 03:38:01 +01:00
Add RVC support
This commit is contained in:
@@ -6,9 +6,29 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import Audio\n",
|
||||
"from scipy.io.wavfile import write as write_wav\n",
|
||||
"\n",
|
||||
"from bark.generation import SAMPLE_RATE, preload_models, codec_decode, generate_coarse, generate_fine, generate_text_semantic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"semantic_path = \"semantic_output/pytorch_model.bin\" # set to None if you don't want to use finetuned semantic\n",
|
||||
"coarse_path = \"coarse_output/pytorch_model.bin\" # set to None if you don't want to use finetuned coarse\n",
|
||||
"fine_path = \"fine_output/pytorch_model.bin\" # set to None if you don't want to use finetuned fine\n",
|
||||
"use_rvc = True # Set to False to use bark without RVC\n",
|
||||
"rvc_name = 'mi-test'\n",
|
||||
"rvc_path = f\"Retrieval-based-Voice-Conversion-WebUI/weights/{rvc_name}.pth\"\n",
|
||||
"index_path = f\"Retrieval-based-Voice-Conversion-WebUI/logs/{rvc_name}/added_IVF256_Flat_nprobe_1_{rvc_name}_v2.index\" \n",
|
||||
"device=\"cuda:0\"\n",
|
||||
"is_half=True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -139,6 +159,33 @@
|
||||
"# - `♪` for song lyrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# download and load all models\n",
|
||||
"preload_models(\n",
|
||||
" text_use_gpu=True,\n",
|
||||
" text_use_small=False,\n",
|
||||
" text_model_path=semantic_path,\n",
|
||||
" coarse_use_gpu=True,\n",
|
||||
" coarse_use_small=False,\n",
|
||||
" coarse_model_path=coarse_path,\n",
|
||||
" fine_use_gpu=True,\n",
|
||||
" fine_use_small=False,\n",
|
||||
" fine_model_path=fine_path,\n",
|
||||
" codec_use_gpu=True,\n",
|
||||
" force_reload=False,\n",
|
||||
" path=\"models\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"if use_rvc:\n",
|
||||
" from rvc_infer import get_vc, vc_single\n",
|
||||
" get_vc(rvc_path, device, is_half)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -180,26 +227,6 @@
|
||||
"In conclusion, the human journey is one of discovery, driven by our innate curiosity and desire to understand the world around us. From the dawn of our species to the present day, we have continued to explore, learn, and adapt, pushing the boundaries of what is known and possible. As we continue to unravel the mysteries of the cosmos, our spirit.\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# download and load all models\n",
|
||||
"preload_models(\n",
|
||||
" text_use_gpu=True,\n",
|
||||
" text_use_small=False,\n",
|
||||
" coarse_use_gpu=True,\n",
|
||||
" coarse_use_small=False,\n",
|
||||
" fine_use_gpu=True,\n",
|
||||
" fine_use_small=False,\n",
|
||||
" codec_use_gpu=True,\n",
|
||||
" force_reload=False,\n",
|
||||
" path=\"models\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -215,7 +242,7 @@
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# generation settings\n",
|
||||
"voice_name = 'speaker_4'\n",
|
||||
"voice_name = 'en_speaker_0'\n",
|
||||
"out_filepath = 'audio/audio.wav'\n",
|
||||
"\n",
|
||||
"semantic_temp = 0.7\n",
|
||||
@@ -234,6 +261,15 @@
|
||||
"\n",
|
||||
"use_last_generation_as_history = True\n",
|
||||
"\n",
|
||||
"if use_rvc:\n",
|
||||
" index_rate = 0.75\n",
|
||||
" f0up_key = -10\n",
|
||||
" filter_radius = 3\n",
|
||||
" rms_mix_rate = 0.25\n",
|
||||
" protect = 0.33\n",
|
||||
" resample_sr = SAMPLE_RATE\n",
|
||||
" f0method = \"harvest\" #harvest or pm\n",
|
||||
"\n",
|
||||
"texts = split_and_recombine_text(text)\n",
|
||||
"\n",
|
||||
"all_parts = []\n",
|
||||
@@ -263,6 +299,14 @@
|
||||
" fine_prompt=full_generation['fine_prompt'],\n",
|
||||
" )\n",
|
||||
" voice_name = '_temp/history.npz'\n",
|
||||
" write_wav(out_filepath.replace('.wav', f'_{i}') + '.wav', SAMPLE_RATE, audio_array)\n",
|
||||
"\n",
|
||||
" if use_rvc:\n",
|
||||
" try:\n",
|
||||
" audio_array = vc_single(0,out_filepath.replace('.wav', f'_{i}') + '.wav',f0up_key,None,f0method,index_path,index_rate, filter_radius=filter_radius, resample_sr=resample_sr, rms_mix_rate=rms_mix_rate, protect=protect)\n",
|
||||
" except:\n",
|
||||
" audio_array = vc_single(0,out_filepath.replace('.wav', f'_{i}') + '.wav',f0up_key,None,'pm',index_path,index_rate, filter_radius=filter_radius, resample_sr=resample_sr, rms_mix_rate=rms_mix_rate, protect=protect)\n",
|
||||
" write_wav(out_filepath.replace('.wav', f'_{i}') + '.wav', SAMPLE_RATE, audio_array)\n",
|
||||
" all_parts.append(audio_array)\n",
|
||||
"\n",
|
||||
"audio_array = np.concatenate(all_parts, axis=-1)\n",
|
||||
|
||||
Reference in New Issue
Block a user