diff --git a/README.md b/README.md index 7979072..7c1e738 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,16 @@ # 🐶 Bark...but with the ability to use voice cloning on custom audio/text pairs +If you want to clone a voice just follow the `clone_voice.ipynb` notebook. If you want to generate audio from text, follow the `generate.ipynb` notebook. + +To create a voice clone sample, you need an audio/text pair of around 5-10 seconds (with the samples I decoded back to the original audio they were around 7 seconds). + +Haven't experimented with what kind of audio/text pairs work best, but this will be updated as we find out more. + + + +------------------------------------------------------------------- +# Original README.md + [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/OnusFM.svg?style=social&label=@OnusFM)](https://twitter.com/OnusFM) [![](https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat&)](https://discord.gg/J2B2vsjKuE) diff --git a/clone_voice.ipynb b/clone_voice.ipynb index 2f4b375..836afbb 100644 --- a/clone_voice.ipynb +++ b/clone_voice.ipynb @@ -23,9 +23,10 @@ "source": [ "# Load and pre-process the audio waveform\n", "audio_filepath = 'audio.wav' # the audio you want to clone (will get truncated so 5-10 seconds is probably fine, existing samples that I checked are around 7 seconds)\n", + "device = 'cuda' # or 'cpu'\n", "wav, sr = torchaudio.load(audio_filepath)\n", "wav = convert_audio(wav, sr, model.sample_rate, model.channels)\n", - "wav = wav.unsqueeze(0).to('cuda')" + "wav = wav.unsqueeze(0).to(device)" ] }, {