diff --git a/sample_0.mp3 b/sample_0.mp3 new file mode 100644 index 0000000..4787405 Binary files /dev/null and b/sample_0.mp3 differ diff --git a/voice cloning AI.ipynb b/voice cloning AI.ipynb new file mode 100644 index 0000000..dbb1d4d --- /dev/null +++ b/voice cloning AI.ipynb @@ -0,0 +1,150 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "voice_generator_AI.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "lbdT4692nk7G" + }, + "source": [ + "%tensorflow_version 1.x\n", + "import os\n", + "from os.path import join\n", + "\n", + "# Getting the model (PS: this is a famous but not official model)\n", + "!git clone https://github.com/CorentinJ/Real-Time-Voice-Cloning.git\n", + "project_name = 'Real-Time-Voice-Cloning'\n", + "\n", + "#Install requirements with some utils\n", + "!cd {project_name} && pip install -q -r requirements.txt\n", + "!apt-get install -qq libportaudio2\n", + "!pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip\n", + "\n", + "#downloading pretrained weights\n", + "!cd {project_name} && wget https://github.com/blue-fish/Real-Time-Voice-Cloning/releases/download/v1.0/pretrained.zip && unzip -o pretrained.zip" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "34S28kEKnx3t" + }, + "source": [ + "#packages\n", + "import sys\n", + "sys.path.append(project_name)\n", + "\n", + "from IPython.display import display, Audio, clear_output\n", + "from IPython.utils import io\n", + "import ipywidgets as widgets\n", + "import numpy as np\n", + "from dl_colab_notebooks.audio import record_audio, upload_audio\n", + "\n", + "from synthesizer.inference import Synthesizer\n", + "from encoder import inference as encoder\n", + "from vocoder import inference as vocoder\n", + "from pathlib import Path\n", + "\n", + "#loading weights to the models\n", + "encoder.load_model(project_name / Path(\"encoder/saved_models/pretrained.pt\"))\n", + "synthesizer = Synthesizer(project_name / Path(\"synthesizer/saved_models/pretrained/pretrained.pt\"))\n", + "vocoder.load_model(project_name / Path(\"vocoder/saved_models/pretrained/pretrained.pt\"))" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "YYfgVfeX3zkU" + }, + "source": [ + "#params (changes these as you please :)\n", + "record_seconds = 10 # Record duration 1-10 seconds\n", + "record_or_upload = \"Upload\" # fill this with Record (record now) or Upload (upload an audio)\n", + "\n", + "\n", + "SAMPLE_RATE = 22050\n", + "embedding = None\n", + "def _compute_embedding(audio):\n", + " display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))\n", + " global embedding\n", + " embedding = None\n", + " embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))\n", + "def _record_audio(b):\n", + " clear_output()\n", + " audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)\n", + " _compute_embedding(audio)\n", + "def _upload_audio(b):\n", + " clear_output()\n", + " audio = upload_audio(sample_rate=SAMPLE_RATE)\n", + " _compute_embedding(audio)\n", + "\n", + "if record_or_upload == \"Record\":\n", + " button = widgets.Button(description=\"Record Your Voice\")\n", + " button.on_click(_record_audio)\n", + " display(button)\n", + "else:\n", + " _upload_audio(\"\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "BOV8CvVL4fl-" + }, + "source": [ + "# text to output ( Please dont use this to generate something that is going to get you in trouble :'( )\n", + "text = \"Hi people check this out this man Rida is doing something fun\" # text to say\n", + "\n", + "\n", + "if embedding is None:\n", + " print(\"first record or upload a voice file!\")\n", + "else:\n", + " print(\"Synthesizing new audio...\")\n", + " #with io.capture_output() as captured:\n", + " specs = synthesizer.synthesize_spectrograms([text], [embedding])\n", + " generated_wav = vocoder.infer_waveform(specs[0])\n", + " generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode=\"constant\")\n", + " clear_output()\n", + " out_audio = Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True)\n", + " display(out_audio)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "aAhgj6Z_YixE" + }, + "source": [ + "# download output audio file ( Please dont use this to generate something that is going to get you in trouble :'( )\n", + "with open('out.wav', 'wb') as f:\n", + " f.write(out_audio.data)" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file