diff --git a/.gitignore b/.gitignore index 14b6901..aa11767 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,7 @@ outputs checkpoints trash examples* -.env \ No newline at end of file +.env +build +*.egg-info/ +*.zip \ No newline at end of file diff --git a/demo_part1.ipynb b/demo_part1.ipynb index 81484f8..40375fd 100644 --- a/demo_part1.ipynb +++ b/demo_part1.ipynb @@ -17,8 +17,8 @@ "source": [ "import os\n", "import torch\n", - "import se_extractor\n", - "from api import BaseSpeakerTTS, ToneColorConverter" + "from openvoice import se_extractor\n", + "from openvoice.api import BaseSpeakerTTS, ToneColorConverter" ] }, { @@ -233,4 +233,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/demo_part2.ipynb b/demo_part2.ipynb index 8cdb221..f0a6ee8 100644 --- a/demo_part2.ipynb +++ b/demo_part2.ipynb @@ -17,8 +17,8 @@ "source": [ "import os\n", "import torch\n", - "import se_extractor\n", - "from api import ToneColorConverter" + "from openvoice import se_extractor\n", + "from openvoice.api import ToneColorConverter" ] }, { @@ -192,4 +192,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/docs/USAGE.md b/docs/USAGE.md index f464399..27a166e 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -49,7 +49,7 @@ conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cud pip install -r requirements.txt ``` -Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip) and extract it to the `checkpoints` folder +Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_1226.zip) and extract it to the `checkpoints` folder **1. Flexible Voice Style Control.** Please see [`demo_part1.ipynb`](../demo_part1.ipynb) for an example usage of how OpenVoice enables flexible style control over the cloned voice. diff --git a/openvoice/__init__.py b/openvoice/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api.py b/openvoice/api.py similarity index 97% rename from api.py rename to openvoice/api.py index 41c6294..5ee45f2 100644 --- a/api.py +++ b/openvoice/api.py @@ -2,13 +2,13 @@ import torch import numpy as np import re import soundfile -import utils -import commons +from openvoice import utils +from openvoice import commons import os import librosa -from text import text_to_sequence -from mel_processing import spectrogram_torch -from models import SynthesizerTrn +from openvoice.text import text_to_sequence +from openvoice.mel_processing import spectrogram_torch +from openvoice.models import SynthesizerTrn class OpenVoiceBaseClass(object): diff --git a/attentions.py b/openvoice/attentions.py similarity index 99% rename from attentions.py rename to openvoice/attentions.py index 355a72e..5c410f0 100644 --- a/attentions.py +++ b/openvoice/attentions.py @@ -3,7 +3,7 @@ import torch from torch import nn from torch.nn import functional as F -import commons +from openvoice import commons import logging logger = logging.getLogger(__name__) diff --git a/commons.py b/openvoice/commons.py similarity index 100% rename from commons.py rename to openvoice/commons.py diff --git a/mel_processing.py b/openvoice/mel_processing.py similarity index 100% rename from mel_processing.py rename to openvoice/mel_processing.py diff --git a/models.py b/openvoice/models.py similarity index 99% rename from models.py rename to openvoice/models.py index a50d013..b1030fd 100644 --- a/models.py +++ b/openvoice/models.py @@ -3,14 +3,14 @@ import torch from torch import nn from torch.nn import functional as F -import commons -import modules -import attentions +from openvoice import commons +from openvoice import modules +from openvoice import attentions from torch.nn import Conv1d, ConvTranspose1d, Conv2d from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm -from commons import init_weights, get_padding +from openvoice.commons import init_weights, get_padding class TextEncoder(nn.Module): diff --git a/modules.py b/openvoice/modules.py similarity index 98% rename from modules.py rename to openvoice/modules.py index a046172..d659a32 100644 --- a/modules.py +++ b/openvoice/modules.py @@ -6,10 +6,10 @@ from torch.nn import functional as F from torch.nn import Conv1d from torch.nn.utils import weight_norm, remove_weight_norm -import commons -from commons import init_weights, get_padding -from transforms import piecewise_rational_quadratic_transform -from attentions import Encoder +from openvoice import commons +from openvoice.commons import init_weights, get_padding +from openvoice.transforms import piecewise_rational_quadratic_transform +from openvoice.attentions import Encoder LRELU_SLOPE = 0.1 diff --git a/openvoice_app.py b/openvoice/openvoice_app.py similarity index 99% rename from openvoice_app.py rename to openvoice/openvoice_app.py index f89b021..15b0b43 100644 --- a/openvoice_app.py +++ b/openvoice/openvoice_app.py @@ -4,8 +4,8 @@ import argparse import gradio as gr from zipfile import ZipFile import langid -import se_extractor -from api import BaseSpeakerTTS, ToneColorConverter +from openvoice import se_extractor +from openvoice.api import BaseSpeakerTTS, ToneColorConverter parser = argparse.ArgumentParser() parser.add_argument("--share", action='store_true', default=False, help="make link public") diff --git a/se_extractor.py b/openvoice/se_extractor.py similarity index 100% rename from se_extractor.py rename to openvoice/se_extractor.py diff --git a/text/__init__.py b/openvoice/text/__init__.py similarity index 92% rename from text/__init__.py rename to openvoice/text/__init__.py index 1d51bec..6494bcb 100644 --- a/text/__init__.py +++ b/openvoice/text/__init__.py @@ -1,6 +1,6 @@ """ from https://github.com/keithito/tacotron """ -from text import cleaners -from text.symbols import symbols +from openvoice.text import cleaners +from openvoice.text.symbols import symbols # Mappings from symbol to numeric ID and vice versa: @@ -43,7 +43,7 @@ def cleaned_text_to_sequence(cleaned_text, symbols): -from text.symbols import language_tone_start_map +from openvoice.text.symbols import language_tone_start_map def cleaned_text_to_sequence_vits2(cleaned_text, tones, language, symbols, languages): """Converts a string of text to a sequence of IDs corresponding to the symbols in the text. Args: diff --git a/text/cleaners.py b/openvoice/text/cleaners.py similarity index 67% rename from text/cleaners.py rename to openvoice/text/cleaners.py index 619ad47..16dd168 100644 --- a/text/cleaners.py +++ b/openvoice/text/cleaners.py @@ -1,6 +1,6 @@ import re -from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2 -from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2 +from openvoice.text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2 +from openvoice.text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2 def cjke_cleaners2(text): text = re.sub(r'\[ZH\](.*?)\[ZH\]', diff --git a/text/english.py b/openvoice/text/english.py similarity index 100% rename from text/english.py rename to openvoice/text/english.py diff --git a/text/mandarin.py b/openvoice/text/mandarin.py similarity index 100% rename from text/mandarin.py rename to openvoice/text/mandarin.py diff --git a/text/symbols.py b/openvoice/text/symbols.py similarity index 100% rename from text/symbols.py rename to openvoice/text/symbols.py diff --git a/transforms.py b/openvoice/transforms.py similarity index 100% rename from transforms.py rename to openvoice/transforms.py diff --git a/utils.py b/openvoice/utils.py similarity index 100% rename from utils.py rename to openvoice/utils.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..8f93fa8 --- /dev/null +++ b/setup.py @@ -0,0 +1,45 @@ +from setuptools import setup + +setup(name='MyShell-OpenVoice', + version='0.0.0', + description='Instant voice cloning by MyShell.', + long_description=open('README.md').read().strip(), + long_description_content_type='text/markdown', + keywords=[ + 'text-to-speech', + 'tts', + 'voice-clone', + 'zero-shot-tts' + ], + url='https://github.com/myshell-ai/OpenVoice', + project_urls={ + 'Documentation': 'https://github.com/myshell-ai/OpenVoice/blob/main/docs/USAGE.md', + 'Changes': 'https://github.com/myshell-ai/OpenVoice/releases', + 'Code': 'https://github.com/myshell-ai/OpenVoice', + 'Issue tracker': 'https://github.com/myshell-ai/OpenVoice/issues', + }, + author='MyShell', + author_email='ethan@myshell.ai', + license='Creative Commons Attribution-NonCommercial 4.0 International Public License', + packages=[ + 'openvoice' + ], + python_requires='>=3.9', + install_requires=[ + 'librosa==0.9.1', + 'faster-whisper==0.9.0', + 'pydub==0.25.1', + 'wavmark==0.0.2', + 'numpy==1.22.0', + 'eng_to_ipa==0.0.2', + 'inflect==7.0.0', + 'unidecode==1.3.7', + 'whisper-timestamped==1.14.2', + 'pypinyin==0.50.0', + 'cn2an==0.5.22', + 'jieba==0.42.1', + 'gradio==3.48.0', + 'langid==1.1.6' + ], + zip_safe=False + )