voice%20cloning%20AI.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "voice_generator_AI.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "lbdT4692nk7G"
      },
      "source": [
        "%tensorflow_version 1.x\n",
        "import os\n",
        "from os.path import join\n",
        "\n",
        "# Getting the model (PS: this is a famous but not official model)\n",
        "!git clone https://github.com/CorentinJ/Real-Time-Voice-Cloning.git\n",
        "project_name = 'Real-Time-Voice-Cloning'\n",
        "\n",
        "#Install requirements with some utils\n",
        "!cd {project_name} && pip install -q -r requirements.txt\n",
        "!apt-get install -qq libportaudio2\n",
        "!pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip\n",
        "\n",
        "#downloading pretrained weights\n",
        "!cd {project_name} && wget https://github.com/blue-fish/Real-Time-Voice-Cloning/releases/download/v1.0/pretrained.zip && unzip -o pretrained.zip"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "34S28kEKnx3t"
      },
      "source": [
        "#packages\n",
        "import sys\n",
        "sys.path.append(project_name)\n",
        "\n",
        "from IPython.display import display, Audio, clear_output\n",
        "from IPython.utils import io\n",
        "import ipywidgets as widgets\n",
        "import numpy as np\n",
        "from dl_colab_notebooks.audio import record_audio, upload_audio\n",
        "\n",
        "from synthesizer.inference import Synthesizer\n",
        "from encoder import inference as encoder\n",
        "from vocoder import inference as vocoder\n",
        "from pathlib import Path\n",
        "\n",
        "#loading weights to the models\n",
        "encoder.load_model(project_name / Path(\"encoder/saved_models/pretrained.pt\"))\n",
        "synthesizer = Synthesizer(project_name / Path(\"synthesizer/saved_models/pretrained/pretrained.pt\"))\n",
        "vocoder.load_model(project_name / Path(\"vocoder/saved_models/pretrained/pretrained.pt\"))"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YYfgVfeX3zkU"
      },
      "source": [
        "#params (changes these as you please :)\n",
        "record_seconds = 10 # Record duration 1-10 seconds\n",
        "record_or_upload = \"Upload\" # fill this with Record (record now) or Upload (upload an audio)\n",
        "\n",
        "\n",
        "SAMPLE_RATE = 22050\n",
        "embedding = None\n",
        "def _compute_embedding(audio):\n",
        "  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))\n",
        "  global embedding\n",
        "  embedding = None\n",
        "  embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))\n",
        "def _record_audio(b):\n",
        "  clear_output()\n",
        "  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)\n",
        "  _compute_embedding(audio)\n",
        "def _upload_audio(b):\n",
        "  clear_output()\n",
        "  audio = upload_audio(sample_rate=SAMPLE_RATE)\n",
        "  _compute_embedding(audio)\n",
        "\n",
        "if record_or_upload == \"Record\":\n",
        "  button = widgets.Button(description=\"Record Your Voice\")\n",
        "  button.on_click(_record_audio)\n",
        "  display(button)\n",
        "else:\n",
        "  _upload_audio(\"\")"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BOV8CvVL4fl-"
      },
      "source": [
        "# text to output ( Please dont use this to generate something that is going to get you in trouble :'( )\n",
        "text = \"Hi people check this out this man Rida is doing something fun\" # text to say\n",
        "\n",
        "\n",
        "if embedding is None:\n",
        "  print(\"first record or upload a voice file!\")\n",
        "else:\n",
        "  print(\"Synthesizing new audio...\")\n",
        "  #with io.capture_output() as captured:\n",
        "  specs = synthesizer.synthesize_spectrograms([text], [embedding])\n",
        "  generated_wav = vocoder.infer_waveform(specs[0])\n",
        "  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode=\"constant\")\n",
        "  clear_output()\n",
        "  out_audio = Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True)\n",
        "  display(out_audio)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aAhgj6Z_YixE"
      },
      "source": [
        "# download output audio file ( Please dont use this to generate something that is going to get you in trouble :'( )\n",
        "with open('out.wav', 'wb') as f:\n",
        "    f.write(out_audio.data)"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
adding the notebook 2021-10-18 17:15:22 +00:00			`{`
			`"nbformat": 4,`
			`"nbformat_minor": 0,`
			`"metadata": {`
			`"colab": {`
			`"name": "voice_generator_AI.ipynb",`
			`"provenance": [],`
			`"collapsed_sections": []`
			`},`
			`"kernelspec": {`
			`"name": "python3",`
			`"display_name": "Python 3"`
			`},`
			`"language_info": {`
			`"name": "python"`
			`},`
			`"accelerator": "GPU"`
			`},`
			`"cells": [`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"id": "lbdT4692nk7G"`
			`},`
			`"source": [`
			`"%tensorflow_version 1.x\n",`
			`"import os\n",`
			`"from os.path import join\n",`
			`"\n",`
			`"# Getting the model (PS: this is a famous but not official model)\n",`
			`"!git clone https://github.com/CorentinJ/Real-Time-Voice-Cloning.git\n",`
			`"project_name = 'Real-Time-Voice-Cloning'\n",`
			`"\n",`
			`"#Install requirements with some utils\n",`
			`"!cd {project_name} && pip install -q -r requirements.txt\n",`
			`"!apt-get install -qq libportaudio2\n",`
			`"!pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip\n",`
			`"\n",`
			`"#downloading pretrained weights\n",`
			`"!cd {project_name} && wget https://github.com/blue-fish/Real-Time-Voice-Cloning/releases/download/v1.0/pretrained.zip && unzip -o pretrained.zip"`
			`],`
			`"execution_count": null,`
			`"outputs": []`
			`},`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"id": "34S28kEKnx3t"`
			`},`
			`"source": [`
			`"#packages\n",`
			`"import sys\n",`
			`"sys.path.append(project_name)\n",`
			`"\n",`
			`"from IPython.display import display, Audio, clear_output\n",`
			`"from IPython.utils import io\n",`
			`"import ipywidgets as widgets\n",`
			`"import numpy as np\n",`
			`"from dl_colab_notebooks.audio import record_audio, upload_audio\n",`
			`"\n",`
			`"from synthesizer.inference import Synthesizer\n",`
			`"from encoder import inference as encoder\n",`
			`"from vocoder import inference as vocoder\n",`
			`"from pathlib import Path\n",`
			`"\n",`
			`"#loading weights to the models\n",`
			`"encoder.load_model(project_name / Path(\"encoder/saved_models/pretrained.pt\"))\n",`
			`"synthesizer = Synthesizer(project_name / Path(\"synthesizer/saved_models/pretrained/pretrained.pt\"))\n",`
			`"vocoder.load_model(project_name / Path(\"vocoder/saved_models/pretrained/pretrained.pt\"))"`
			`],`
			`"execution_count": null,`
			`"outputs": []`
			`},`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"id": "YYfgVfeX3zkU"`
			`},`
			`"source": [`
			`"#params (changes these as you please :)\n",`
			`"record_seconds = 10 # Record duration 1-10 seconds\n",`
			`"record_or_upload = \"Upload\" # fill this with Record (record now) or Upload (upload an audio)\n",`
			`"\n",`
			`"\n",`
			`"SAMPLE_RATE = 22050\n",`
			`"embedding = None\n",`
			`"def _compute_embedding(audio):\n",`
			`" display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))\n",`
			`" global embedding\n",`
			`" embedding = None\n",`
			`" embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))\n",`
			`"def _record_audio(b):\n",`
			`" clear_output()\n",`
			`" audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)\n",`
			`" _compute_embedding(audio)\n",`
			`"def _upload_audio(b):\n",`
			`" clear_output()\n",`
			`" audio = upload_audio(sample_rate=SAMPLE_RATE)\n",`
			`" _compute_embedding(audio)\n",`
			`"\n",`
			`"if record_or_upload == \"Record\":\n",`
			`" button = widgets.Button(description=\"Record Your Voice\")\n",`
			`" button.on_click(_record_audio)\n",`
			`" display(button)\n",`
			`"else:\n",`
			`" _upload_audio(\"\")"`
			`],`
			`"execution_count": null,`
			`"outputs": []`
			`},`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"id": "BOV8CvVL4fl-"`
			`},`
			`"source": [`
			`"# text to output ( Please dont use this to generate something that is going to get you in trouble :'( )\n",`
			`"text = \"Hi people check this out this man Rida is doing something fun\" # text to say\n",`
			`"\n",`
			`"\n",`
			`"if embedding is None:\n",`
			`" print(\"first record or upload a voice file!\")\n",`
			`"else:\n",`
			`" print(\"Synthesizing new audio...\")\n",`
			`" #with io.capture_output() as captured:\n",`
			`" specs = synthesizer.synthesize_spectrograms([text], [embedding])\n",`
			`" generated_wav = vocoder.infer_waveform(specs[0])\n",`
			`" generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode=\"constant\")\n",`
			`" clear_output()\n",`
			`" out_audio = Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True)\n",`
			`" display(out_audio)"`
			`],`
			`"execution_count": null,`
			`"outputs": []`
			`},`
			`{`
			`"cell_type": "code",`
			`"metadata": {`
			`"id": "aAhgj6Z_YixE"`
			`},`
			`"source": [`
			`"# download output audio file ( Please dont use this to generate something that is going to get you in trouble :'( )\n",`
			`"with open('out.wav', 'wb') as f:\n",`
			`" f.write(out_audio.data)"`
			`],`
			`"execution_count": null,`
			`"outputs": []`
			`}`
			`]`
			`}`