update readme and add support for cpu

2025-12-15 03:07:58 +01:00 · 2023-04-21 09:11:08 -06:00
parent 0aad680734
commit 6adb12603a
2 changed files with 13 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -1,5 +1,16 @@
 # 🐶 Bark...but with the ability to use voice cloning on custom audio/text pairs

+If you want to clone a voice just follow the `clone_voice.ipynb` notebook. If you want to generate audio from text, follow the `generate.ipynb` notebook.
+
+To create a voice clone sample, you need an audio/text pair of around 5-10 seconds (with the samples I decoded back to the original audio they were around 7 seconds).
+
+Haven't experimented with what kind of audio/text pairs work best, but this will be updated as we find out more.
+
+
+
+-------------------------------------------------------------------
+# Original README.md
+
 <a href="http://www.repostatus.org/#active"><img src="http://www.repostatus.org/badges/latest/active.svg" /></a>
 [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/OnusFM.svg?style=social&label=@OnusFM)](https://twitter.com/OnusFM)
 [![](https://dcbadge.vercel.app/api/server/J2B2vsjKuE?compact=true&style=flat&)](https://discord.gg/J2B2vsjKuE)
--- a/clone_voice.ipynb
+++ b/clone_voice.ipynb
@@ -23,9 +23,10 @@
   "source": [
    "# Load and pre-process the audio waveform\n",
    "audio_filepath = 'audio.wav' # the audio you want to clone (will get truncated so 5-10 seconds is probably fine, existing samples that I checked are around 7 seconds)\n",
+    "device = 'cuda' # or 'cpu'\n",
    "wav, sr = torchaudio.load(audio_filepath)\n",
    "wav = convert_audio(wav, sr, model.sample_rate, model.channels)\n",
-    "wav = wav.unsqueeze(0).to('cuda')"
+    "wav = wav.unsqueeze(0).to(device)"
   ]
  },
  {