Merge pull request #141 from myshell-ai/dev-0219

Dev 0219
This commit is contained in:
Zengyi Qin
2024-02-19 13:42:37 -05:00
committed by GitHub
21 changed files with 77 additions and 29 deletions

3
.gitignore vendored
View File

@@ -6,3 +6,6 @@ checkpoints
trash
examples*
.env
build
*.egg-info/
*.zip

View File

@@ -17,8 +17,8 @@
"source": [
"import os\n",
"import torch\n",
"import se_extractor\n",
"from api import BaseSpeakerTTS, ToneColorConverter"
"from openvoice import se_extractor\n",
"from openvoice.api import BaseSpeakerTTS, ToneColorConverter"
]
},
{

View File

@@ -17,8 +17,8 @@
"source": [
"import os\n",
"import torch\n",
"import se_extractor\n",
"from api import ToneColorConverter"
"from openvoice import se_extractor\n",
"from openvoice.api import ToneColorConverter"
]
},
{

View File

@@ -49,7 +49,7 @@ conda install pytorch==1.13.1 torchvision==0.14.1 torchaudio==0.13.1 pytorch-cud
pip install -r requirements.txt
```
Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/checkpoints_1226.zip) and extract it to the `checkpoints` folder
Download the checkpoint from [here](https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/checkpoints_1226.zip) and extract it to the `checkpoints` folder
**1. Flexible Voice Style Control.**
Please see [`demo_part1.ipynb`](../demo_part1.ipynb) for an example usage of how OpenVoice enables flexible style control over the cloned voice.

0
openvoice/__init__.py Normal file
View File

View File

@@ -2,13 +2,13 @@ import torch
import numpy as np
import re
import soundfile
import utils
import commons
from openvoice import utils
from openvoice import commons
import os
import librosa
from text import text_to_sequence
from mel_processing import spectrogram_torch
from models import SynthesizerTrn
from openvoice.text import text_to_sequence
from openvoice.mel_processing import spectrogram_torch
from openvoice.models import SynthesizerTrn
class OpenVoiceBaseClass(object):

View File

@@ -3,7 +3,7 @@ import torch
from torch import nn
from torch.nn import functional as F
import commons
from openvoice import commons
import logging
logger = logging.getLogger(__name__)

View File

@@ -3,14 +3,14 @@ import torch
from torch import nn
from torch.nn import functional as F
import commons
import modules
import attentions
from openvoice import commons
from openvoice import modules
from openvoice import attentions
from torch.nn import Conv1d, ConvTranspose1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from commons import init_weights, get_padding
from openvoice.commons import init_weights, get_padding
class TextEncoder(nn.Module):

View File

@@ -6,10 +6,10 @@ from torch.nn import functional as F
from torch.nn import Conv1d
from torch.nn.utils import weight_norm, remove_weight_norm
import commons
from commons import init_weights, get_padding
from transforms import piecewise_rational_quadratic_transform
from attentions import Encoder
from openvoice import commons
from openvoice.commons import init_weights, get_padding
from openvoice.transforms import piecewise_rational_quadratic_transform
from openvoice.attentions import Encoder
LRELU_SLOPE = 0.1

View File

@@ -4,8 +4,8 @@ import argparse
import gradio as gr
from zipfile import ZipFile
import langid
import se_extractor
from api import BaseSpeakerTTS, ToneColorConverter
from openvoice import se_extractor
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
parser = argparse.ArgumentParser()
parser.add_argument("--share", action='store_true', default=False, help="make link public")

View File

@@ -1,6 +1,6 @@
""" from https://github.com/keithito/tacotron """
from text import cleaners
from text.symbols import symbols
from openvoice.text import cleaners
from openvoice.text.symbols import symbols
# Mappings from symbol to numeric ID and vice versa:
@@ -43,7 +43,7 @@ def cleaned_text_to_sequence(cleaned_text, symbols):
from text.symbols import language_tone_start_map
from openvoice.text.symbols import language_tone_start_map
def cleaned_text_to_sequence_vits2(cleaned_text, tones, language, symbols, languages):
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
Args:

View File

@@ -1,6 +1,6 @@
import re
from text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
from text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
from openvoice.text.english import english_to_lazy_ipa, english_to_ipa2, english_to_lazy_ipa2
from openvoice.text.mandarin import number_to_chinese, chinese_to_bopomofo, latin_to_bopomofo, chinese_to_romaji, chinese_to_lazy_ipa, chinese_to_ipa, chinese_to_ipa2
def cjke_cleaners2(text):
text = re.sub(r'\[ZH\](.*?)\[ZH\]',

45
setup.py Normal file
View File

@@ -0,0 +1,45 @@
from setuptools import setup
setup(name='MyShell-OpenVoice',
version='0.0.0',
description='Instant voice cloning by MyShell.',
long_description=open('README.md').read().strip(),
long_description_content_type='text/markdown',
keywords=[
'text-to-speech',
'tts',
'voice-clone',
'zero-shot-tts'
],
url='https://github.com/myshell-ai/OpenVoice',
project_urls={
'Documentation': 'https://github.com/myshell-ai/OpenVoice/blob/main/docs/USAGE.md',
'Changes': 'https://github.com/myshell-ai/OpenVoice/releases',
'Code': 'https://github.com/myshell-ai/OpenVoice',
'Issue tracker': 'https://github.com/myshell-ai/OpenVoice/issues',
},
author='MyShell',
author_email='ethan@myshell.ai',
license='Creative Commons Attribution-NonCommercial 4.0 International Public License',
packages=[
'openvoice'
],
python_requires='>=3.9',
install_requires=[
'librosa==0.9.1',
'faster-whisper==0.9.0',
'pydub==0.25.1',
'wavmark==0.0.2',
'numpy==1.22.0',
'eng_to_ipa==0.0.2',
'inflect==7.0.0',
'unidecode==1.3.7',
'whisper-timestamped==1.14.2',
'pypinyin==0.50.0',
'cn2an==0.5.22',
'jieba==0.42.1',
'gradio==3.48.0',
'langid==1.1.6'
],
zip_safe=False
)