Merge pull request #141 from myshell-ai/dev-0219

Dev 0219
This commit is contained in:
Zengyi Qin
2024-02-19 13:42:37 -05:00
committed by GitHub
21 changed files with 77 additions and 29 deletions

5
.gitignore vendored
View File

@@ -5,4 +5,7 @@ outputs
checkpoints checkpoints
trash trash
examples* examples*
.env .env
build
*.egg-info/
*.zip

View File

@@ -17,8 +17,8 @@
"source": [ "source": [
"import os\n", "import os\n",
"import torch\n", "import torch\n",
"import se_extractor\n", "from openvoice import se_extractor\n",
"from api import BaseSpeakerTTS, ToneColorConverter" "from openvoice.api import BaseSpeakerTTS, ToneColorConverter"
] ]
}, },
{ {
@@ -233,4 +233,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 5 "nbformat_minor": 5
} }

View File

@@ -17,8 +17,8 @@
"source": [ "source": [
"import os\n", "import os\n",
"import torch\n", "import torch\n",
"import se_extractor\n", "from openvoice import se_extractor\n",
"from api import ToneColorConverter" "from openvoice.api import ToneColorConverter"
] ]
}, },
{ {
@@ -192,4 +192,4 @@
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 5 "nbformat_minor": 5
} }

View File

@@ -49,7 +49,7 @@ conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cud
pip install -r requirements.txt pip install -r requirements.txt
``` ```
Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip) and extract it to the `checkpoints` folder Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_1226.zip) and extract it to the `checkpoints` folder
**1. Flexible Voice Style Control.** **1. Flexible Voice Style Control.**
Please see [`demo_part1.ipynb`](../demo_part1.ipynb) for an example usage of how OpenVoice enables flexible style control over the cloned voice. Please see [`demo_part1.ipynb`](../demo_part1.ipynb) for an example usage of how OpenVoice enables flexible style control over the cloned voice.

0
openvoice/__init__.py Normal file
View File

View File

@@ -2,13 +2,13 @@ import torch
import numpy as np import numpy as np
import re import re
import soundfile import soundfile
import utils from openvoice import utils
import commons from openvoice import commons
import os import os
import librosa import librosa
from text import text_to_sequence from openvoice.text import text_to_sequence
from mel_processing import spectrogram_torch from openvoice.mel_processing import spectrogram_torch
from models import SynthesizerTrn from openvoice.models import SynthesizerTrn
class OpenVoiceBaseClass(object): class OpenVoiceBaseClass(object):

View File

@@ -3,7 +3,7 @@ import torch
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
import commons from openvoice import commons
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -3,14 +3,14 @@ import torch
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
import commons from openvoice import commons
import modules from openvoice import modules
import attentions from openvoice import attentions
from torch.nn import Conv1d, ConvTranspose1d, Conv2d from torch.nn import Conv1d, ConvTranspose1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from commons import init_weights, get_padding from openvoice.commons import init_weights, get_padding
class TextEncoder(nn.Module): class TextEncoder(nn.Module):

View File

@@ -6,10 +6,10 @@ from torch.nn import functional as F
from torch.nn import Conv1d from torch.nn import Conv1d
from torch.nn.utils import weight_norm, remove_weight_norm from torch.nn.utils import weight_norm, remove_weight_norm
import commons from openvoice import commons
from commons import init_weights, get_padding from openvoice.commons import init_weights, get_padding
from transforms import piecewise_rational_quadratic_transform from openvoice.transforms import piecewise_rational_quadratic_transform
from attentions import Encoder from openvoice.attentions import Encoder
LRELU_SLOPE = 0.1 LRELU_SLOPE = 0.1

View File

@@ -4,8 +4,8 @@ import argparse
import gradio as gr import gradio as gr
from zipfile import ZipFile from zipfile import ZipFile
import langid import langid
import se_extractor from openvoice import se_extractor
from api import BaseSpeakerTTS, ToneColorConverter from openvoice.api import BaseSpeakerTTS, ToneColorConverter
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--share", action='store_true', default=False, help="make link public") parser.add_argument("--share", action='store_true', default=False, help="make link public")

View File

@@ -1,6 +1,6 @@
""" from https://github.com/keithito/tacotron """ """ from https://github.com/keithito/tacotron """
from text import cleaners from openvoice.text import cleaners
from text.symbols import symbols from openvoice.text.symbols import symbols
# Mappings from symbol to numeric ID and vice versa: # Mappings from symbol to numeric ID and vice versa:
@@ -43,7 +43,7 @@ def cleaned_text_to_sequence(cleaned_text, symbols):
from text.symbols import language_tone_start_map from openvoice.text.symbols import language_tone_start_map
def cleaned_text_to_sequence_vits2(cleaned_text, tones, language, symbols, languages): def cleaned_text_to_sequence_vits2(cleaned_text, tones, language, symbols, languages):
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text. """Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args: Args:

View File

@@ -1,6 +1,6 @@
import re import re
from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2 from openvoice.text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2 from openvoice.text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
def cjke_cleaners2(text): def cjke_cleaners2(text):
text = re.sub(r'\[ZH\](.*?)\[ZH\]', text = re.sub(r'\[ZH\](.*?)\[ZH\]',

45
setup.py Normal file
View File

@@ -0,0 +1,45 @@
from setuptools import setup
setup(name='MyShell-OpenVoice',
version='0.0.0',
description='Instant voice cloning by MyShell.',
long_description=open('README.md').read().strip(),
long_description_content_type='text/markdown',
keywords=[
'text-to-speech',
'tts',
'voice-clone',
'zero-shot-tts'
],
url='https://github.com/myshell-ai/OpenVoice',
project_urls={
'Documentation': 'https://github.com/myshell-ai/OpenVoice/blob/main/docs/USAGE.md',
'Changes': 'https://github.com/myshell-ai/OpenVoice/releases',
'Code': 'https://github.com/myshell-ai/OpenVoice',
'Issue tracker': 'https://github.com/myshell-ai/OpenVoice/issues',
},
author='MyShell',
author_email='ethan@myshell.ai',
license='Creative Commons Attribution-NonCommercial 4.0 International Public License',
packages=[
'openvoice'
],
python_requires='>=3.9',
install_requires=[
'librosa==0.9.1',
'faster-whisper==0.9.0',
'pydub==0.25.1',
'wavmark==0.0.2',
'numpy==1.22.0',
'eng_to_ipa==0.0.2',
'inflect==7.0.0',
'unidecode==1.3.7',
'whisper-timestamped==1.14.2',
'pypinyin==0.50.0',
'cn2an==0.5.22',
'jieba==0.42.1',
'gradio==3.48.0',
'langid==1.1.6'
],
zip_safe=False
)