Merge remote-tracking branch 'ogrvc/main' into try-update

This commit is contained in:
kalomaze
2023-07-17 13:25:04 -05:00
109 changed files with 4786 additions and 463 deletions

1
.gitignore vendored
View File

@@ -4,3 +4,4 @@ __pycache__
*.pyd
hubert_base.pt
/logs
.venv

View File

@@ -1,6 +1,7 @@
MIT License
Copyright (c) 2023 liujing04
Copyright (c) 2023 源文雨
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +19,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

View File

@@ -1,11 +1,9 @@
import soundfile as sf
import torch, pdb, time, argparse, os, warnings, sys, librosa
import torch, pdb, os, warnings, librosa
import numpy as np
import onnxruntime as ort
from scipy.io.wavfile import write
from tqdm import tqdm
import torch
import torch.nn as nn
dim_c = 4

View File

@@ -1,50 +1,45 @@
MIT License
Copyright (c) 2023 liujing04
Copyright (c) 2023 源文雨
本软件及其相关代码以MIT协议开源作者不对软件具备任何控制力使用软件者、传播软件导出的声音者自负全责
如不认可该条款,则不能使用或引用软件包内任何代码和文件。
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
特此授予任何获得本软件和相关文档文件(以下简称“软件”)副本的人免费使用、复制、修改、合并、出版、分发、再授权和/或销售本软件的权利,以及授予本软件所提供的人使用本软件的权利,但须符合以下条件:
上述版权声明和本许可声明应包含在软件的所有副本或实质部分中。
软件是“按原样”提供的,没有任何明示或暗示的保证,包括但不限于适销性、适用于特定目的和不侵权的保证。在任何情况下,作者或版权持有人均不承担因软件或软件的使用或其他交易而产生、产生或与之相关的任何索赔、损害赔偿或其他责任,无论是在合同诉讼、侵权诉讼还是其他诉讼中。
相关引用库协议如下:
#################
ContentVec
https://github.com/auspicious3000/contentvec/blob/main/LICENSE
MIT License
#################
VITS
https://github.com/jaywalnut310/vits/blob/main/LICENSE
MIT License
#################
HIFIGAN
https://github.com/jik876/hifi-gan/blob/master/LICENSE
MIT License
#################
gradio
https://github.com/gradio-app/gradio/blob/main/LICENSE
Apache License 2.0
#################
ffmpeg
https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2021-02-28-12-32/ffmpeg-n4.3.2-160-gfbb9368226-win64-lgpl-4.3.zip
LPGLv3 License
MIT License
#################
ultimatevocalremovergui
https://github.com/Anjok07/ultimatevocalremovergui/blob/master/LICENSE
https://github.com/yang123qwe/vocal_separation_by_uvr5
MIT License
#################
audio-slicer
https://github.com/openvpi/audio-slicer/blob/main/LICENSE
MIT License
本软件及其相关代码以MIT协议开源作者不对软件具备任何控制力使用软件者、传播软件导出的声音者自负全责。
如不认可该条款,则不能使用或引用软件包内任何代码和文件。
特此授予任何获得本软件和相关文档文件(以下简称“软件”)副本的人免费使用、复制、修改、合并、出版、分发、再授权和/或销售本软件的权利,以及授予本软件所提供的人使用本软件的权利,但须符合以下条件:
上述版权声明和本许可声明应包含在软件的所有副本或实质部分中。
软件是“按原样”提供的,没有任何明示或暗示的保证,包括但不限于适销性、适用于特定目的和不侵权的保证。在任何情况下,作者或版权持有人均不承担因软件或软件的使用或其他交易而产生、产生或与之相关的任何索赔、损害赔偿或其他责任,无论是在合同诉讼、侵权诉讼还是其他诉讼中
The LICENCEs for related libraries are as follows.
相关引用库协议如下:
ContentVec
https://github.com/auspicious3000/contentvec/blob/main/LICENSE
MIT License
VITS
https://github.com/jaywalnut310/vits/blob/main/LICENSE
MIT License
HIFIGAN
https://github.com/jik876/hifi-gan/blob/master/LICENSE
MIT License
gradio
https://github.com/gradio-app/gradio/blob/main/LICENSE
Apache License 2.0
ffmpeg
https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2021-02-28-12-32/ffmpeg-n4.3.2-160-gfbb9368226-win64-lgpl-4.3.zip
LPGLv3 License
MIT License
ultimatevocalremovergui
https://github.com/Anjok07/ultimatevocalremovergui/blob/master/LICENSE
https://github.com/yang123qwe/vocal_separation_by_uvr5
MIT License
audio-slicer
https://github.com/openvpi/audio-slicer/blob/main/LICENSE
MIT License
PySimpleGUI
https://github.com/PySimpleGUI/PySimpleGUI/blob/master/license.txt
LPGLv3 License

View File

@@ -20,7 +20,7 @@
{
"cell_type": "markdown",
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)"
],
"metadata": {
"id": "ZFFCx5J80SGa"

5
app.py
View File

@@ -1,4 +1,3 @@
import io
import os
import torch
@@ -6,14 +5,12 @@ import torch
import gradio as gr
import librosa
import numpy as np
import soundfile
import logging
from fairseq import checkpoint_utils
from my_utils import load_audio
from vc_infer_pipeline import VC
import traceback
from config import Config
from infer_pack.models import (
from lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,

View File

@@ -1,9 +1,10 @@
import argparse
import sys
import torch
from multiprocessing import cpu_count
def config_file_change_fp32():
def use_fp32_config():
for config_file in ["32k.json", "40k.json", "48k.json"]:
with open(f"configs/{config_file}", "r") as f:
strr = f.read().replace("true", "false")
@@ -36,11 +37,10 @@ class Config:
@staticmethod
def arg_parse() -> tuple:
exe = sys.executable or "python"
parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=7865, help="Listen port")
parser.add_argument(
"--pycmd", type=str, default="python", help="Python command"
)
parser.add_argument("--pycmd", type=str, default=exe, help="Python command")
parser.add_argument("--colab", action="store_true", help="Launch in colab")
parser.add_argument(
"--noparallel", action="store_true", help="Disable parallel processing"
@@ -70,6 +70,18 @@ class Config:
cmd_opts.is_cli,
)
# has_mps is only available in nightly pytorch (for now) and MasOS 12.3+.
# check `getattr` and try it for compatibility
@staticmethod
def has_mps() -> bool:
if not torch.backends.mps.is_available():
return False
try:
torch.zeros(1).to(torch.device("mps"))
return True
except Exception:
return False
def device_config(self) -> tuple:
if torch.cuda.is_available():
i_device = int(self.device.split(":")[-1])
@@ -81,11 +93,11 @@ class Config:
or "1070" in self.gpu_name
or "1080" in self.gpu_name
):
print("16系/10系显卡和P40强制单精度")
print("Found GPU", self.gpu_name, ", force to fp32")
self.is_half = False
config_file_change_fp32()
use_fp32_config()
else:
self.gpu_name = None
print("Found GPU", self.gpu_name)
self.gpu_mem = int(
torch.cuda.get_device_properties(i_device).total_memory
/ 1024
@@ -98,16 +110,16 @@ class Config:
strr = f.read().replace("3.7", "3.0")
with open("trainset_preprocess_pipeline_print.py", "w") as f:
f.write(strr)
elif torch.backends.mps.is_available():
print("没有发现支持的N卡, 使用MPS进行推理")
elif self.has_mps():
print("No supported Nvidia GPU found, use MPS instead")
self.device = "mps"
self.is_half = False
config_file_change_fp32()
use_fp32_config()
else:
print("没有发现支持的N卡, 使用CPU进行推理")
print("No supported Nvidia GPU found, use CPU instead")
self.device = "cpu"
self.is_half = False
config_file_change_fp32()
use_fp32_config()
if self.n_cpu == 0:
self.n_cpu = cpu_count()

View File

@@ -7,7 +7,7 @@
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 4,
"fp16_run": true,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 12800,
"init_lr_ratio": 1,

View File

@@ -7,7 +7,7 @@
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 4,
"fp16_run": true,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 12800,
"init_lr_ratio": 1,

View File

@@ -7,7 +7,7 @@
"betas": [0.8, 0.99],
"eps": 1e-9,
"batch_size": 4,
"fp16_run": true,
"fp16_run": false,
"lr_decay": 0.999875,
"segment_size": 11520,
"init_lr_ratio": 1,

View File

@@ -29,7 +29,7 @@ todolist
- 废弃32k模型的训练
### 20230513更新
- 清除一键包内部老版本runtime内残留的infer_pack和uvr5_pack
- 清除一键包内部老版本runtime内残留的lib.infer_pack和uvr5_pack
- 修复训练集预处理伪多进程的bug
- 增加harvest识别音高可选通过中值滤波削弱哑音现象可调整中值滤波半径
- 导出音频增加后处理重采样

View File

@@ -27,7 +27,7 @@ todolist
- v1 32k model training is no more supported
### 2023-05-13
- Clear the redundant codes in the old version of runtime in the one-click-package: infer_pack and uvr5_pack
- Clear the redundant codes in the old version of runtime in the one-click-package: lib.infer_pack and uvr5_pack
- Fix pseudo multiprocessing bug in training set preprocessing
- Adding median filtering radius adjustment for harvest pitch recognize algorithm
- Support post processing resampling for exporting audio

View File

@@ -33,7 +33,7 @@
### 2023년 5월 13일 업데이트
- 원클릭 패키지의 이전 버전 런타임 내, 불필요한 코드(infer_pack 및 uvr5_pack) 제거.
- 원클릭 패키지의 이전 버전 런타임 내, 불필요한 코드(lib.infer_pack 및 uvr5_pack) 제거.
- 훈련 세트 전처리의 유사 다중 처리 버그 수정.
- Harvest 피치 인식 알고리즘에 대한 중위수 필터링 반경 조정 추가.
- 오디오 내보낼 때, 후처리 리샘플링 지원.

View File

@@ -3,12 +3,12 @@
<h1>Retrieval-based-Voice-Conversion-WebUI</h1>
An easy-to-use Voice Conversion framework based on VITS.<br><br>
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
<img src="https://counter.seku.su/cmoe?name=rvc&theme=r34" /><br>
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt)
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE)
[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)
[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk)
@@ -16,7 +16,7 @@ An easy-to-use Voice Conversion framework based on VITS.<br><br>
</div>
------
[**Changelog**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md) | [**FAQ (Frequently Asked Questions)**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/FAQ-(Frequently-Asked-Questions))
[**Changelog**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_EN.md) | [**FAQ (Frequently Asked Questions)**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/FAQ-(Frequently-Asked-Questions))
[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md))
@@ -50,7 +50,7 @@ The following commands need to be executed in the environment of Python version
# Reference: https://pytorch.org/get-started/locally/
pip install torch torchvision torchaudio
#For Windows + Nvidia Ampere Architecture(RTX30xx), you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/issues/21
#For Windows + Nvidia Ampere Architecture(RTX30xx), you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
# Install the Poetry dependency management tool, skip if installed
@@ -104,7 +104,7 @@ There's also a tutorial on RVC in Chinese and you can check it out if needed.
+ [audio-slicer](https://github.com/openvpi/audio-slicer)
## Thanks to all contributors for their efforts
<a href="https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=liujing04/Retrieval-based-Voice-Conversion-WebUI" />
<a href="https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=RVC-Project/Retrieval-based-Voice-Conversion-WebUI" />
</a>

View File

@@ -3,12 +3,12 @@
<h1>Retrieval-based-Voice-Conversion-WebUI</h1>
VITSに基づく使いやすい音声変換voice changerframework<br><br>
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
<img src="https://counter.seku.su/cmoe?name=rvc&theme=r34" /><br>
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt)
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE)
[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)
[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk)
@@ -17,7 +17,7 @@ VITSに基づく使いやすい音声変換voice changerframework<br><br>
------
[**更新日誌**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md)
[**更新日誌**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_CN.md)
[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md))
@@ -99,6 +99,6 @@ Windowsをお使いの方は、直接`RVC-beta.7z`をダウンロード後に展
+ [audio-slicer](https://github.com/openvpi/audio-slicer)
## 貢献者(contributor)の皆様の尽力に感謝します
<a href="https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=liujing04/Retrieval-based-Voice-Conversion-WebUI" />
<a href="https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=RVC-Project/Retrieval-based-Voice-Conversion-WebUI" />
</a>

View File

@@ -3,12 +3,12 @@
<h1>Retrieval-based-Voice-Conversion-WebUI</h1>
VITS基盤의 簡單하고使用하기 쉬운音聲變換틀<br><br>
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
<img src="https://counter.seku.su/cmoe?name=rvc&theme=r34" /><br>
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt)
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE)
[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)
[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk)
@@ -16,7 +16,7 @@ VITS基盤의 簡單하고使用하기 쉬운音聲變換틀<br><br>
</div>
------
[**更新日誌**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_CN.md)
[**更新日誌**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_KO.md)
[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md))
@@ -94,7 +94,7 @@ Windows를 使用하는境遇 `RVC-beta.7z`를 다운로드 및 壓縮解除하
+ [audio-slicer](https://github.com/openvpi/audio-slicer)
## 모든寄與者분들의勞力에感謝드립니다
<a href="https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=liujing04/Retrieval-based-Voice-Conversion-WebUI" />
<a href="https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=RVC-Project/Retrieval-based-Voice-Conversion-WebUI" />
</a>

View File

@@ -3,12 +3,12 @@
<h1>Retrieval-based-Voice-Conversion-WebUI</h1>
VITS 기반의 간단하고 사용하기 쉬운 음성 변환 프레임워크.<br><br>
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
<img src="https://counter.seku.su/cmoe?name=rvc&theme=r34" /><br>
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/liujing04/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/%E4%BD%BF%E7%94%A8%E9%9C%80%E9%81%B5%E5%AE%88%E7%9A%84%E5%8D%8F%E8%AE%AE-LICENSE.txt)
[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)
[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE)
[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)
[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk)
@@ -17,7 +17,7 @@ VITS 기반의 간단하고 사용하기 쉬운 음성 변환 프레임워크.<b
---
[**업데이트 로그**](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/blob/main/Changelog_KO.md)
[**업데이트 로그**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_KO.md)
[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md))
@@ -51,7 +51,7 @@ poetry를 통해 dependecies를 설치하는 것을 권장합니다.
# 참조: https://pytorch.org/get-started/locally/
pip install torch torchvision torchaudio
# Windows + Nvidia Ampere Architecture(RTX30xx)를 사용하고 있다면, https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/issues/21 에서 명시된 것과 같이 PyTorch에 맞는 CUDA 버전을 지정해야 합니다.
# Windows + Nvidia Ampere Architecture(RTX30xx)를 사용하고 있다면, https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21 에서 명시된 것과 같이 PyTorch에 맞는 CUDA 버전을 지정해야 합니다.
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
# Poetry 설치, 이미 설치되어 있는 경우 건너뛰기 가능
@@ -107,6 +107,6 @@ Windows를 사용하는 경우 `RVC-beta.7z`를 다운로드 및 압축 해제
## 모든 기여자 분들의 노력에 감사드립니다.
<a href="https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=liujing04/Retrieval-based-Voice-Conversion-WebUI" />
<a href="https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/graphs/contributors" target="_blank">
<img src="https://contrib.rocks/image?repo=RVC-Project/Retrieval-based-Voice-Conversion-WebUI" />
</a>

186
environment_dml.yaml Normal file
View File

@@ -0,0 +1,186 @@
name: pydml
channels:
- pytorch
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/
- defaults
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/fastai/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/
dependencies:
- abseil-cpp=20211102.0=hd77b12b_0
- absl-py=1.3.0=py310haa95532_0
- aiohttp=3.8.3=py310h2bbff1b_0
- aiosignal=1.2.0=pyhd3eb1b0_0
- async-timeout=4.0.2=py310haa95532_0
- attrs=22.1.0=py310haa95532_0
- blas=1.0=mkl
- blinker=1.4=py310haa95532_0
- bottleneck=1.3.5=py310h9128911_0
- brotli=1.0.9=h2bbff1b_7
- brotli-bin=1.0.9=h2bbff1b_7
- brotlipy=0.7.0=py310h2bbff1b_1002
- bzip2=1.0.8=he774522_0
- c-ares=1.19.0=h2bbff1b_0
- ca-certificates=2023.05.30=haa95532_0
- cachetools=4.2.2=pyhd3eb1b0_0
- certifi=2023.5.7=py310haa95532_0
- cffi=1.15.1=py310h2bbff1b_3
- charset-normalizer=2.0.4=pyhd3eb1b0_0
- click=8.0.4=py310haa95532_0
- colorama=0.4.6=py310haa95532_0
- contourpy=1.0.5=py310h59b6b97_0
- cryptography=39.0.1=py310h21b164f_0
- cycler=0.11.0=pyhd3eb1b0_0
- fonttools=4.25.0=pyhd3eb1b0_0
- freetype=2.12.1=ha860e81_0
- frozenlist=1.3.3=py310h2bbff1b_0
- giflib=5.2.1=h8cc25b3_3
- glib=2.69.1=h5dc1a3c_2
- google-auth=2.6.0=pyhd3eb1b0_0
- google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
- grpc-cpp=1.48.2=hf108199_0
- grpcio=1.48.2=py310hf108199_0
- gst-plugins-base=1.18.5=h9e645db_0
- gstreamer=1.18.5=hd78058f_0
- icu=58.2=ha925a31_3
- idna=3.4=py310haa95532_0
- intel-openmp=2023.1.0=h59b6b97_46319
- jpeg=9e=h2bbff1b_1
- kiwisolver=1.4.4=py310hd77b12b_0
- krb5=1.19.4=h5b6d351_0
- lerc=3.0=hd77b12b_0
- libbrotlicommon=1.0.9=h2bbff1b_7
- libbrotlidec=1.0.9=h2bbff1b_7
- libbrotlienc=1.0.9=h2bbff1b_7
- libclang=14.0.6=default_hb5a9fac_1
- libclang13=14.0.6=default_h8e68704_1
- libdeflate=1.17=h2bbff1b_0
- libffi=3.4.4=hd77b12b_0
- libiconv=1.16=h2bbff1b_2
- libogg=1.3.5=h2bbff1b_1
- libpng=1.6.39=h8cc25b3_0
- libprotobuf=3.20.3=h23ce68f_0
- libtiff=4.5.0=h6c2663c_2
- libuv=1.44.2=h2bbff1b_0
- libvorbis=1.3.7=he774522_0
- libwebp=1.2.4=hbc33d0d_1
- libwebp-base=1.2.4=h2bbff1b_1
- libxml2=2.10.3=h0ad7f3c_0
- libxslt=1.1.37=h2bbff1b_0
- lz4-c=1.9.4=h2bbff1b_0
- markdown=3.4.1=py310haa95532_0
- markupsafe=2.1.1=py310h2bbff1b_0
- matplotlib=3.7.1=py310haa95532_1
- matplotlib-base=3.7.1=py310h4ed8f06_1
- mkl=2023.1.0=h8bd8f75_46356
- mkl-service=2.4.0=py310h2bbff1b_1
- mkl_fft=1.3.6=py310h4ed8f06_1
- mkl_random=1.2.2=py310h4ed8f06_1
- multidict=6.0.2=py310h2bbff1b_0
- munkres=1.1.4=py_0
- numexpr=2.8.4=py310h2cd9be0_1
- numpy=1.24.3=py310h055cbcc_1
- numpy-base=1.24.3=py310h65a83cf_1
- oauthlib=3.2.2=py310haa95532_0
- openssl=1.1.1t=h2bbff1b_0
- packaging=23.0=py310haa95532_0
- pandas=1.5.3=py310h4ed8f06_0
- pcre=8.45=hd77b12b_0
- pillow=9.4.0=py310hd77b12b_0
- pip=23.0.1=py310haa95532_0
- ply=3.11=py310haa95532_0
- protobuf=3.20.3=py310hd77b12b_0
- pyasn1=0.4.8=pyhd3eb1b0_0
- pyasn1-modules=0.2.8=py_0
- pycparser=2.21=pyhd3eb1b0_0
- pyjwt=2.4.0=py310haa95532_0
- pyopenssl=23.0.0=py310haa95532_0
- pyparsing=3.0.9=py310haa95532_0
- pyqt=5.15.7=py310hd77b12b_0
- pyqt5-sip=12.11.0=py310hd77b12b_0
- pysocks=1.7.1=py310haa95532_0
- python=3.10.11=h966fe2a_2
- python-dateutil=2.8.2=pyhd3eb1b0_0
- pytorch-mutex=1.0=cpu
- pytz=2022.7=py310haa95532_0
- pyyaml=6.0=py310h2bbff1b_1
- qt-main=5.15.2=he8e5bd7_8
- qt-webengine=5.15.9=hb9a9bb5_5
- qtwebkit=5.212=h2bbfb41_5
- re2=2022.04.01=hd77b12b_0
- requests=2.29.0=py310haa95532_0
- requests-oauthlib=1.3.0=py_0
- rsa=4.7.2=pyhd3eb1b0_1
- setuptools=67.8.0=py310haa95532_0
- sip=6.6.2=py310hd77b12b_0
- six=1.16.0=pyhd3eb1b0_1
- sqlite=3.41.2=h2bbff1b_0
- tbb=2021.8.0=h59b6b97_0
- tensorboard=2.10.0=py310haa95532_0
- tensorboard-data-server=0.6.1=py310haa95532_0
- tensorboard-plugin-wit=1.8.1=py310haa95532_0
- tk=8.6.12=h2bbff1b_0
- toml=0.10.2=pyhd3eb1b0_0
- tornado=6.2=py310h2bbff1b_0
- tqdm=4.65.0=py310h9909e9c_0
- typing_extensions=4.5.0=py310haa95532_0
- tzdata=2023c=h04d1e81_0
- urllib3=1.26.16=py310haa95532_0
- vc=14.2=h21ff451_1
- vs2015_runtime=14.27.29016=h5e58377_2
- werkzeug=2.2.3=py310haa95532_0
- wheel=0.38.4=py310haa95532_0
- win_inet_pton=1.1.0=py310haa95532_0
- xz=5.4.2=h8cc25b3_0
- yaml=0.2.5=he774522_0
- yarl=1.8.1=py310h2bbff1b_0
- zlib=1.2.13=h8cc25b3_0
- zstd=1.5.5=hd43e919_0
- pip:
- antlr4-python3-runtime==4.8
- appdirs==1.4.4
- audioread==3.0.0
- bitarray==2.7.4
- cython==0.29.35
- decorator==5.1.1
- fairseq==0.12.2
- faiss-cpu==1.7.4
- filelock==3.12.0
- hydra-core==1.0.7
- jinja2==3.1.2
- joblib==1.2.0
- lazy-loader==0.2
- librosa==0.10.0.post2
- llvmlite==0.40.0
- lxml==4.9.2
- mpmath==1.3.0
- msgpack==1.0.5
- networkx==3.1
- noisereduce==2.0.1
- numba==0.57.0
- omegaconf==2.0.6
- opencv-python==4.7.0.72
- pooch==1.6.0
- portalocker==2.7.0
- pysimplegui==4.60.5
- pywin32==306
- pyworld==0.3.3
- regex==2023.5.5
- sacrebleu==2.3.1
- scikit-learn==1.2.2
- scipy==1.10.1
- sounddevice==0.4.6
- soundfile==0.12.1
- soxr==0.3.5
- sympy==1.12
- tabulate==0.9.0
- threadpoolctl==3.1.0
- torch==2.0.0
- torch-directml==0.2.0.dev230426
- torchaudio==2.0.1
- torchvision==0.15.1
- wget==3.2
prefix: D:\ProgramData\anaconda3_\envs\pydml

View File

@@ -4,7 +4,6 @@ now_dir = os.getcwd()
sys.path.append(now_dir)
from my_utils import load_audio
import pyworld
from scipy.io import wavfile
import numpy as np, logging
import torchcrepe # Fork Feature. Crepe algo for training and preprocess
import torch

View File

@@ -1,9 +1,12 @@
import os, sys, traceback
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
# device=sys.argv[1]
n_part = int(sys.argv[2])
i_part = int(sys.argv[3])
if len(sys.argv) == 5:
if len(sys.argv) == 6:
exp_dir = sys.argv[4]
version = sys.argv[5]
else:
@@ -17,14 +20,11 @@ import soundfile as sf
import numpy as np
from fairseq import checkpoint_utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
if torch.cuda.is_available():
device = "cuda"
elif torch.backends.mps.is_available():
device = "mps"
else:
device = "cpu"
f = open("%s/extract_f0_feature.log" % exp_dir, "a+")

View File

@@ -22,8 +22,11 @@ def process(fn: str):
print("processing infer-web.py")
process("infer-web.py")
print("processing gui.py")
process("gui.py")
print("processing gui_v0.py")
process("gui_v0.py")
print("processing gui_v1.py")
process("gui_v1.py")
# Save as a JSON file
with open("./i18n/zh_CN.json", "w", encoding="utf-8") as f:

View File

@@ -1,2 +1,2 @@
runtime\python.exe gui.py
runtime\python.exe gui_v1.py
pause

View File

@@ -1,15 +1,3 @@
"""
0416后的更新
引入config中half
重建npy而不用填写
v2支持
无f0模型支持
修复
int16
增加无索引支持
f0算法改harvest(怎么看就只有这个会影响CPU占用)但是不这么改效果不好
"""
import os, sys, traceback, re
import json
@@ -31,7 +19,7 @@ import scipy.signal as signal
import torchcrepe
# import matplotlib.pyplot as plt
from infer_pack.models import (
from lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
@@ -302,7 +290,12 @@ class GUI:
self.launcher()
def load(self):
input_devices, output_devices, _, _ = self.get_devices()
(
input_devices,
output_devices,
input_devices_indices,
output_devices_indices,
) = self.get_devices()
try:
with open("values1.json", "r") as j:
data = json.load(j)
@@ -310,10 +303,14 @@ class GUI:
# Injecting f0_method into the json data
with open("values1.json", "w") as j:
data = {
"pth_path": " ",
"index_path": " ",
"sg_input_device": input_devices[sd.default.device[0]],
"sg_output_device": output_devices[sd.default.device[1]],
"pth_path": "",
"index_path": "",
"sg_input_device": input_devices[
input_devices_indices.index(sd.default.device[0])
],
"sg_output_device": output_devices[
output_devices_indices.index(sd.default.device[1])
],
"threhold": "-45",
"pitch": "0",
"index_rate": "0",
@@ -349,7 +346,7 @@ class GUI:
sg.FileBrowse(
i18n("Hubert模型"),
initial_folder=os.path.join(os.getcwd()),
file_types=((". pt"),),
file_types=(("pt files", "*.pt"),),
),
],
[
@@ -360,7 +357,7 @@ class GUI:
sg.FileBrowse(
i18n("选择.pth文件"),
initial_folder=os.path.join(os.getcwd(), "weights"),
file_types=((". pth"),),
file_types=(("weight files", "*.pth"),),
),
],
[
@@ -371,7 +368,7 @@ class GUI:
sg.FileBrowse(
i18n("选择.index文件"),
initial_folder=os.path.join(os.getcwd(), "logs"),
file_types=((". index"),),
file_types=(("index files", "*.index"),),
),
],
[
@@ -383,7 +380,7 @@ class GUI:
sg.FileBrowse(
i18n("选择.npy文件"),
initial_folder=os.path.join(os.getcwd(), "logs"),
file_types=((". npy"),),
file_types=(("feature files", "*.npy"),),
),
],
],
@@ -639,6 +636,7 @@ class GUI:
接受音频输入
"""
with sd.Stream(
channels=2,
callback=self.audio_callback,
blocksize=self.block_frame,
samplerate=self.config.samplerate,

637
gui_v1.py Normal file
View File

@@ -0,0 +1,637 @@
import os, sys
if sys.platform == "darwin":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
now_dir = os.getcwd()
sys.path.append(now_dir)
import multiprocessing
class Harvest(multiprocessing.Process):
def __init__(self, inp_q, opt_q):
multiprocessing.Process.__init__(self)
self.inp_q = inp_q
self.opt_q = opt_q
def run(self):
import numpy as np, pyworld
while 1:
idx, x, res_f0, n_cpu, ts = self.inp_q.get()
f0, t = pyworld.harvest(
x.astype(np.double),
fs=16000,
f0_ceil=1100,
f0_floor=50,
frame_period=10,
)
res_f0[idx] = f0
if len(res_f0.keys()) >= n_cpu:
self.opt_q.put(ts)
if __name__ == "__main__":
from multiprocessing import Queue
from queue import Empty
import numpy as np
import multiprocessing
import traceback, re
import json
import PySimpleGUI as sg
import sounddevice as sd
import noisereduce as nr
from multiprocessing import cpu_count
import librosa, torch, time, threading
import torch.nn.functional as F
import torchaudio.transforms as tat
from i18n import I18nAuto
i18n = I18nAuto()
device = torch.device(
"cuda"
if torch.cuda.is_available()
else ("mps" if torch.backends.mps.is_available() else "cpu")
)
current_dir = os.getcwd()
inp_q = Queue()
opt_q = Queue()
n_cpu = min(cpu_count(), 8)
for _ in range(n_cpu):
Harvest(inp_q, opt_q).start()
from rvc_for_realtime import RVC
class GUIConfig:
def __init__(self) -> None:
self.pth_path: str = ""
self.index_path: str = ""
self.pitch: int = 12
self.samplerate: int = 40000
self.block_time: float = 1.0 # s
self.buffer_num: int = 1
self.threhold: int = -30
self.crossfade_time: float = 0.08
self.extra_time: float = 0.04
self.I_noise_reduce = False
self.O_noise_reduce = False
self.index_rate = 0.3
self.n_cpu = min(n_cpu, 8)
self.f0method = "harvest"
class GUI:
def __init__(self) -> None:
self.config = GUIConfig()
self.flag_vc = False
self.launcher()
def load(self):
input_devices, output_devices, _, _ = self.get_devices()
try:
with open("values1.json", "r") as j:
data = json.load(j)
data["pm"] = data["f0method"] == "pm"
data["harvest"] = data["f0method"] == "harvest"
data["crepe"] = data["f0method"] == "crepe"
data["rmvpe"] = data["f0method"] == "rmvpe"
except:
with open("values1.json", "w") as j:
data = {
"pth_path": " ",
"index_path": " ",
"sg_input_device": input_devices[sd.default.device[0]],
"sg_output_device": output_devices[sd.default.device[1]],
"threhold": "-45",
"pitch": "0",
"index_rate": "0",
"block_time": "1",
"crossfade_length": "0.04",
"extra_time": "1",
"f0method": "rmvpe",
}
return data
def launcher(self):
data = self.load()
sg.theme("LightBlue3")
input_devices, output_devices, _, _ = self.get_devices()
layout = [
[
sg.Frame(
title=i18n("加载模型"),
layout=[
[
sg.Input(
default_text=data.get("pth_path", ""),
key="pth_path",
),
sg.FileBrowse(
i18n("选择.pth文件"),
initial_folder=os.path.join(os.getcwd(), "weights"),
file_types=((". pth"),),
),
],
[
sg.Input(
default_text=data.get("index_path", ""),
key="index_path",
),
sg.FileBrowse(
i18n("选择.index文件"),
initial_folder=os.path.join(os.getcwd(), "logs"),
file_types=((". index"),),
),
],
],
)
],
[
sg.Frame(
layout=[
[
sg.Text(i18n("输入设备")),
sg.Combo(
input_devices,
key="sg_input_device",
default_value=data.get("sg_input_device", ""),
),
],
[
sg.Text(i18n("输出设备")),
sg.Combo(
output_devices,
key="sg_output_device",
default_value=data.get("sg_output_device", ""),
),
],
],
title=i18n("音频设备(请使用同种类驱动)"),
)
],
[
sg.Frame(
layout=[
[
sg.Text(i18n("响应阈值")),
sg.Slider(
range=(-60, 0),
key="threhold",
resolution=1,
orientation="h",
default_value=data.get("threhold", ""),
),
],
[
sg.Text(i18n("音调设置")),
sg.Slider(
range=(-24, 24),
key="pitch",
resolution=1,
orientation="h",
default_value=data.get("pitch", ""),
),
],
[
sg.Text(i18n("Index Rate")),
sg.Slider(
range=(0.0, 1.0),
key="index_rate",
resolution=0.01,
orientation="h",
default_value=data.get("index_rate", ""),
),
],
[
sg.Text(i18n("音高算法")),
sg.Radio(
"pm",
"f0method",
key="pm",
default=data.get("pm", "") == True,
),
sg.Radio(
"harvest",
"f0method",
key="harvest",
default=data.get("harvest", "") == True,
),
sg.Radio(
"crepe",
"f0method",
key="crepe",
default=data.get("crepe", "") == True,
),
sg.Radio(
"rmvpe",
"f0method",
key="rmvpe",
default=data.get("rmvpe", "") == True,
),
],
],
title=i18n("常规设置"),
),
sg.Frame(
layout=[
[
sg.Text(i18n("采样长度")),
sg.Slider(
range=(0.12, 2.4),
key="block_time",
resolution=0.03,
orientation="h",
default_value=data.get("block_time", ""),
),
],
[
sg.Text(i18n("harvest进程数")),
sg.Slider(
range=(1, n_cpu),
key="n_cpu",
resolution=1,
orientation="h",
default_value=data.get(
"n_cpu", min(self.config.n_cpu, n_cpu)
),
),
],
[
sg.Text(i18n("淡入淡出长度")),
sg.Slider(
range=(0.01, 0.15),
key="crossfade_length",
resolution=0.01,
orientation="h",
default_value=data.get("crossfade_length", ""),
),
],
[
sg.Text(i18n("额外推理时长")),
sg.Slider(
range=(0.05, 3.00),
key="extra_time",
resolution=0.01,
orientation="h",
default_value=data.get("extra_time", ""),
),
],
[
sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"),
sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"),
],
],
title=i18n("性能设置"),
),
],
[
sg.Button(i18n("开始音频转换"), key="start_vc"),
sg.Button(i18n("停止音频转换"), key="stop_vc"),
sg.Text(i18n("推理时间(ms):")),
sg.Text("0", key="infer_time"),
],
]
self.window = sg.Window("RVC - GUI", layout=layout)
self.event_handler()
def event_handler(self):
while True:
event, values = self.window.read()
if event == sg.WINDOW_CLOSED:
self.flag_vc = False
exit()
if event == "start_vc" and self.flag_vc == False:
if self.set_values(values) == True:
print("using_cuda:" + str(torch.cuda.is_available()))
self.start_vc()
settings = {
"pth_path": values["pth_path"],
"index_path": values["index_path"],
"sg_input_device": values["sg_input_device"],
"sg_output_device": values["sg_output_device"],
"threhold": values["threhold"],
"pitch": values["pitch"],
"index_rate": values["index_rate"],
"block_time": values["block_time"],
"crossfade_length": values["crossfade_length"],
"extra_time": values["extra_time"],
"n_cpu": values["n_cpu"],
"f0method": ["pm", "harvest", "crepe", "rmvpe"][
[
values["pm"],
values["harvest"],
values["crepe"],
values["rmvpe"],
].index(True)
],
}
with open("values1.json", "w") as j:
json.dump(settings, j)
if event == "stop_vc" and self.flag_vc == True:
self.flag_vc = False
def set_values(self, values):
if len(values["pth_path"].strip()) == 0:
sg.popup(i18n("请选择pth文件"))
return False
if len(values["index_path"].strip()) == 0:
sg.popup(i18n("请选择index文件"))
return False
pattern = re.compile("[^\x00-\x7F]+")
if pattern.findall(values["pth_path"]):
sg.popup(i18n("pth文件路径不可包含中文"))
return False
if pattern.findall(values["index_path"]):
sg.popup(i18n("index文件路径不可包含中文"))
return False
self.set_devices(values["sg_input_device"], values["sg_output_device"])
self.config.pth_path = values["pth_path"]
self.config.index_path = values["index_path"]
self.config.threhold = values["threhold"]
self.config.pitch = values["pitch"]
self.config.block_time = values["block_time"]
self.config.crossfade_time = values["crossfade_length"]
self.config.extra_time = values["extra_time"]
self.config.I_noise_reduce = values["I_noise_reduce"]
self.config.O_noise_reduce = values["O_noise_reduce"]
self.config.index_rate = values["index_rate"]
self.config.n_cpu = values["n_cpu"]
self.config.f0method = ["pm", "harvest", "crepe", "rmvpe"][
[
values["pm"],
values["harvest"],
values["crepe"],
values["rmvpe"],
].index(True)
]
return True
def start_vc(self):
torch.cuda.empty_cache()
self.flag_vc = True
self.rvc = RVC(
self.config.pitch,
self.config.pth_path,
self.config.index_path,
self.config.index_rate,
self.config.n_cpu,
inp_q,
opt_q,
device,
)
self.config.samplerate = self.rvc.tgt_sr
self.config.crossfade_time = min(
self.config.crossfade_time, self.config.block_time
)
self.block_frame = int(self.config.block_time * self.config.samplerate)
self.crossfade_frame = int(
self.config.crossfade_time * self.config.samplerate
)
self.sola_search_frame = int(0.01 * self.config.samplerate)
self.extra_frame = int(self.config.extra_time * self.config.samplerate)
self.zc = self.rvc.tgt_sr // 100
self.input_wav: np.ndarray = np.zeros(
int(
np.ceil(
(
self.extra_frame
+ self.crossfade_frame
+ self.sola_search_frame
+ self.block_frame
)
/ self.zc
)
* self.zc
),
dtype="float32",
)
self.output_wav_cache: torch.Tensor = torch.zeros(
int(
np.ceil(
(
self.extra_frame
+ self.crossfade_frame
+ self.sola_search_frame
+ self.block_frame
)
/ self.zc
)
* self.zc
),
device=device,
dtype=torch.float32,
)
self.pitch: np.ndarray = np.zeros(
self.input_wav.shape[0] // self.zc,
dtype="int32",
)
self.pitchf: np.ndarray = np.zeros(
self.input_wav.shape[0] // self.zc,
dtype="float64",
)
self.output_wav: torch.Tensor = torch.zeros(
self.block_frame, device=device, dtype=torch.float32
)
self.sola_buffer: torch.Tensor = torch.zeros(
self.crossfade_frame, device=device, dtype=torch.float32
)
self.fade_in_window: torch.Tensor = torch.linspace(
0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32
)
self.fade_out_window: torch.Tensor = 1 - self.fade_in_window
self.resampler = tat.Resample(
orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
).to(device)
thread_vc = threading.Thread(target=self.soundinput)
thread_vc.start()
def soundinput(self):
"""
接受音频输入
"""
channels = 1 if sys.platform == "darwin" else 2
with sd.Stream(
channels=channels,
callback=self.audio_callback,
blocksize=self.block_frame,
samplerate=self.config.samplerate,
dtype="float32",
):
while self.flag_vc:
time.sleep(self.config.block_time)
print("Audio block passed.")
print("ENDing VC")
def audio_callback(
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
"""
音频处理
"""
start_time = time.perf_counter()
indata = librosa.to_mono(indata.T)
if self.config.I_noise_reduce:
indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate)
"""noise gate"""
frame_length = 2048
hop_length = 1024
rms = librosa.feature.rms(
y=indata, frame_length=frame_length, hop_length=hop_length
)
if self.config.threhold > -60:
db_threhold = (
librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
)
for i in range(db_threhold.shape[0]):
if db_threhold[i]:
indata[i * hop_length : (i + 1) * hop_length] = 0
self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata)
# infer
inp = torch.from_numpy(self.input_wav).to(device)
##0
res1 = self.resampler(inp)
###55%
rate1 = self.block_frame / (
self.extra_frame
+ self.crossfade_frame
+ self.sola_search_frame
+ self.block_frame
)
rate2 = (
self.crossfade_frame + self.sola_search_frame + self.block_frame
) / (
self.extra_frame
+ self.crossfade_frame
+ self.sola_search_frame
+ self.block_frame
)
res2 = self.rvc.infer(
res1,
res1[-self.block_frame :].cpu().numpy(),
rate1,
rate2,
self.pitch,
self.pitchf,
self.config.f0method,
)
self.output_wav_cache[-res2.shape[0] :] = res2
infer_wav = self.output_wav_cache[
-self.crossfade_frame - self.sola_search_frame - self.block_frame :
]
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
cor_nom = F.conv1d(
infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame],
self.sola_buffer[None, None, :],
)
cor_den = torch.sqrt(
F.conv1d(
infer_wav[
None, None, : self.crossfade_frame + self.sola_search_frame
]
** 2,
torch.ones(1, 1, self.crossfade_frame, device=device),
)
+ 1e-8
)
if sys.platform == "darwin":
cor_nom = cor_nom.cpu()
cor_den = cor_den.cpu()
sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
print("sola offset: " + str(int(sola_offset)))
self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame]
self.output_wav[: self.crossfade_frame] *= self.fade_in_window
self.output_wav[: self.crossfade_frame] += self.sola_buffer[:]
# crossfade
if sola_offset < self.sola_search_frame:
self.sola_buffer[:] = (
infer_wav[
-self.sola_search_frame
- self.crossfade_frame
+ sola_offset : -self.sola_search_frame
+ sola_offset
]
* self.fade_out_window
)
else:
self.sola_buffer[:] = (
infer_wav[-self.crossfade_frame :] * self.fade_out_window
)
if self.config.O_noise_reduce:
if sys.platform == "darwin":
noise_reduced_signal = nr.reduce_noise(
y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate
)
outdata[:] = noise_reduced_signal[:, np.newaxis]
else:
outdata[:] = np.tile(
nr.reduce_noise(
y=self.output_wav[:].cpu().numpy(),
sr=self.config.samplerate,
),
(2, 1),
).T
else:
if sys.platform == "darwin":
outdata[:] = self.output_wav[:].cpu().numpy()[:, np.newaxis]
else:
outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy()
total_time = time.perf_counter() - start_time
self.window["infer_time"].update(int(total_time * 1000))
print("infer time:" + str(total_time))
def get_devices(self, update: bool = True):
"""获取设备列表"""
if update:
sd._terminate()
sd._initialize()
devices = sd.query_devices()
hostapis = sd.query_hostapis()
for hostapi in hostapis:
for device_idx in hostapi["devices"]:
devices[device_idx]["hostapi_name"] = hostapi["name"]
input_devices = [
f"{d['name']} ({d['hostapi_name']})"
for d in devices
if d["max_input_channels"] > 0
]
output_devices = [
f"{d['name']} ({d['hostapi_name']})"
for d in devices
if d["max_output_channels"] > 0
]
input_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_input_channels"] > 0
]
output_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_output_channels"] > 0
]
return (
input_devices,
output_devices,
input_devices_indices,
output_devices_indices,
)
def set_devices(self, input_device, output_device):
"""设置输出设备"""
(
input_devices,
output_devices,
input_device_indices,
output_device_indices,
) = self.get_devices()
sd.default.device[0] = input_device_indices[
input_devices.index(input_device)
]
sd.default.device[1] = output_device_indices[
output_devices.index(output_device)
]
print("input device:" + str(sd.default.device[0]) + ":" + str(input_device))
print(
"output device:" + str(sd.default.device[1]) + ":" + str(output_device)
)
gui = GUI()

710
guidml.py Normal file
View File

@@ -0,0 +1,710 @@
"""
0416后的更新
引入config中half
重建npy而不用填写
v2支持
无f0模型支持
修复
int16
增加无索引支持
f0算法改harvest(怎么看就只有这个会影响CPU占用),但是不这么改效果不好
"""
import os, sys, traceback, re
import json
now_dir = os.getcwd()
sys.path.append(now_dir)
from config import Config
Config = Config()
import torch_directml
import PySimpleGUI as sg
import sounddevice as sd
import noisereduce as nr
import numpy as np
from fairseq import checkpoint_utils
import librosa, torch, pyworld, faiss, time, threading
import torch.nn.functional as F
import torchaudio.transforms as tat
import scipy.signal as signal
# import matplotlib.pyplot as plt
from lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
from i18n import I18nAuto
i18n = I18nAuto()
device = torch_directml.device(torch_directml.default_device())
current_dir = os.getcwd()
class RVC:
def __init__(
self, key, hubert_path, pth_path, index_path, npy_path, index_rate
) -> None:
"""
初始化
"""
try:
self.f0_up_key = key
self.time_step = 160 / 16000 * 1000
self.f0_min = 50
self.f0_max = 1100
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
self.sr = 16000
self.window = 160
if index_rate != 0:
self.index = faiss.read_index(index_path)
# self.big_npy = np.load(npy_path)
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
print("index search enabled")
self.index_rate = index_rate
model_path = hubert_path
print("load model(s) from {}".format(model_path))
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
[model_path],
suffix="",
)
self.model = models[0]
self.model = self.model.to(device)
if Config.is_half:
self.model = self.model.half()
else:
self.model = self.model.float()
self.model.eval()
cpt = torch.load(pth_path, map_location="cpu")
self.tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
self.if_f0 = cpt.get("f0", 1)
self.version = cpt.get("version", "v1")
if self.version == "v1":
if self.if_f0 == 1:
self.net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=Config.is_half
)
else:
self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif self.version == "v2":
if self.if_f0 == 1:
self.net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=Config.is_half
)
else:
self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del self.net_g.enc_q
print(self.net_g.load_state_dict(cpt["weight"], strict=False))
self.net_g.eval().to(device)
if Config.is_half:
self.net_g = self.net_g.half()
else:
self.net_g = self.net_g.float()
except:
print(traceback.format_exc())
def get_f0(self, x, f0_up_key, inp_f0=None):
x_pad = 1
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0, t = pyworld.harvest(
x.astype(np.double),
fs=self.sr,
f0_ceil=f0_max,
f0_floor=f0_min,
frame_period=10,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12)
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
tf0 = self.sr // self.window # 每秒f0点数
if inp_f0 is not None:
delta_t = np.round(
(inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
).astype("int16")
replace_f0 = np.interp(
list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
)
shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0]
f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape]
# with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
f0_mel_max - f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int)
return f0_coarse, f0bak # 1-0
def infer(self, feats: torch.Tensor) -> np.ndarray:
"""
推理函数
"""
audio = feats.clone().cpu().numpy()
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
if Config.is_half:
feats = feats.half()
else:
feats = feats.float()
inputs = {
"source": feats.to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9 if self.version == "v1" else 12,
}
torch.cuda.synchronize()
with torch.no_grad():
logits = self.model.extract_features(**inputs)
feats = (
self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
)
####索引优化
try:
if (
hasattr(self, "index")
and hasattr(self, "big_npy")
and self.index_rate != 0
):
npy = feats[0].cpu().numpy().astype("float32")
score, ix = self.index.search(npy, k=8)
weight = np.square(1 / score)
weight /= weight.sum(axis=1, keepdims=True)
npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
if Config.is_half:
npy = npy.astype("float16")
feats = (
torch.from_numpy(npy).unsqueeze(0).to(device) * self.index_rate
+ (1 - self.index_rate) * feats
)
else:
print("index search FAIL or disabled")
except:
traceback.print_exc()
print("index search FAIL")
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
torch.cuda.synchronize()
print(feats.shape)
if self.if_f0 == 1:
pitch, pitchf = self.get_f0(audio, self.f0_up_key)
p_len = min(feats.shape[1], 13000, pitch.shape[0]) # 太大了爆显存
else:
pitch, pitchf = None, None
p_len = min(feats.shape[1], 13000) # 太大了爆显存
torch.cuda.synchronize()
# print(feats.shape,pitch.shape)
feats = feats[:, :p_len, :]
if self.if_f0 == 1:
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
p_len = torch.LongTensor([p_len]).to(device)
ii = 0 # sid
sid = torch.LongTensor([ii]).to(device)
with torch.no_grad():
if self.if_f0 == 1:
infered_audio = (
self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
.data.cpu()
.float()
)
else:
infered_audio = (
self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float()
)
torch.cuda.synchronize()
return infered_audio
class GUIConfig:
def __init__(self) -> None:
self.hubert_path: str = ""
self.pth_path: str = ""
self.index_path: str = ""
self.npy_path: str = ""
self.pitch: int = 12
self.samplerate: int = 44100
self.block_time: float = 1.0 # s
self.buffer_num: int = 1
self.threhold: int = -30
self.crossfade_time: float = 0.08
self.extra_time: float = 0.04
self.I_noise_reduce = False
self.O_noise_reduce = False
self.index_rate = 0.3
class GUI:
def __init__(self) -> None:
self.config = GUIConfig()
self.flag_vc = False
self.launcher()
def load(self):
(
input_devices,
output_devices,
input_devices_indices,
output_devices_indices,
) = self.get_devices()
try:
with open("values1.json", "r") as j:
data = json.load(j)
except:
with open("values1.json", "w") as j:
data = {
"pth_path": "",
"index_path": "",
"sg_input_device": input_devices[
input_devices_indices.index(sd.default.device[0])
],
"sg_output_device": output_devices[
output_devices_indices.index(sd.default.device[1])
],
"threhold": "-45",
"pitch": "0",
"index_rate": "0",
"block_time": "1",
"crossfade_length": "0.04",
"extra_time": "1",
}
return data
def launcher(self):
data = self.load()
sg.theme("LightBlue3")
input_devices, output_devices, _, _ = self.get_devices()
layout = [
[
sg.Frame(
title=i18n("加载模型"),
layout=[
[
sg.Input(
default_text="hubert_base.pt",
key="hubert_path",
disabled=True,
),
sg.FileBrowse(
i18n("Hubert模型"),
initial_folder=os.path.join(os.getcwd()),
file_types=(("pt files", "*.pt"),),
),
],
[
sg.Input(
default_text=data.get("pth_path", ""),
key="pth_path",
),
sg.FileBrowse(
i18n("选择.pth文件"),
initial_folder=os.path.join(os.getcwd(), "weights"),
file_types=(("weight files", "*.pth"),),
),
],
[
sg.Input(
default_text=data.get("index_path", ""),
key="index_path",
),
sg.FileBrowse(
i18n("选择.index文件"),
initial_folder=os.path.join(os.getcwd(), "logs"),
file_types=(("index files", "*.index"),),
),
],
[
sg.Input(
default_text="你不需要填写这个You don't need write this.",
key="npy_path",
disabled=True,
),
sg.FileBrowse(
i18n("选择.npy文件"),
initial_folder=os.path.join(os.getcwd(), "logs"),
file_types=(("feature files", "*.npy"),),
),
],
],
)
],
[
sg.Frame(
layout=[
[
sg.Text(i18n("输入设备")),
sg.Combo(
input_devices,
key="sg_input_device",
default_value=data.get("sg_input_device", ""),
),
],
[
sg.Text(i18n("输出设备")),
sg.Combo(
output_devices,
key="sg_output_device",
default_value=data.get("sg_output_device", ""),
),
],
],
title=i18n("音频设备(请使用同种类驱动)"),
)
],
[
sg.Frame(
layout=[
[
sg.Text(i18n("响应阈值")),
sg.Slider(
range=(-60, 0),
key="threhold",
resolution=1,
orientation="h",
default_value=data.get("threhold", ""),
),
],
[
sg.Text(i18n("音调设置")),
sg.Slider(
range=(-24, 24),
key="pitch",
resolution=1,
orientation="h",
default_value=data.get("pitch", ""),
),
],
[
sg.Text(i18n("Index Rate")),
sg.Slider(
range=(0.0, 1.0),
key="index_rate",
resolution=0.01,
orientation="h",
default_value=data.get("index_rate", ""),
),
],
],
title=i18n("常规设置"),
),
sg.Frame(
layout=[
[
sg.Text(i18n("采样长度")),
sg.Slider(
range=(0.1, 3.0),
key="block_time",
resolution=0.1,
orientation="h",
default_value=data.get("block_time", ""),
),
],
[
sg.Text(i18n("淡入淡出长度")),
sg.Slider(
range=(0.01, 0.15),
key="crossfade_length",
resolution=0.01,
orientation="h",
default_value=data.get("crossfade_length", ""),
),
],
[
sg.Text(i18n("额外推理时长")),
sg.Slider(
range=(0.05, 3.00),
key="extra_time",
resolution=0.01,
orientation="h",
default_value=data.get("extra_time", ""),
),
],
[
sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"),
sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"),
],
],
title=i18n("性能设置"),
),
],
[
sg.Button(i18n("开始音频转换"), key="start_vc"),
sg.Button(i18n("停止音频转换"), key="stop_vc"),
sg.Text(i18n("推理时间(ms):")),
sg.Text("0", key="infer_time"),
],
]
self.window = sg.Window("RVC - GUI", layout=layout)
self.event_handler()
def event_handler(self):
while True:
event, values = self.window.read()
if event == sg.WINDOW_CLOSED:
self.flag_vc = False
exit()
if event == "start_vc" and self.flag_vc == False:
if self.set_values(values) == True:
print("using_cuda:" + str(torch.cuda.is_available()))
self.start_vc()
settings = {
"pth_path": values["pth_path"],
"index_path": values["index_path"],
"sg_input_device": values["sg_input_device"],
"sg_output_device": values["sg_output_device"],
"threhold": values["threhold"],
"pitch": values["pitch"],
"index_rate": values["index_rate"],
"block_time": values["block_time"],
"crossfade_length": values["crossfade_length"],
"extra_time": values["extra_time"],
}
with open("values1.json", "w") as j:
json.dump(settings, j)
if event == "stop_vc" and self.flag_vc == True:
self.flag_vc = False
def set_values(self, values):
if len(values["pth_path"].strip()) == 0:
sg.popup(i18n("请选择pth文件"))
return False
if len(values["index_path"].strip()) == 0:
sg.popup(i18n("请选择index文件"))
return False
pattern = re.compile("[^\x00-\x7F]+")
if pattern.findall(values["hubert_path"]):
sg.popup(i18n("hubert模型路径不可包含中文"))
return False
if pattern.findall(values["pth_path"]):
sg.popup(i18n("pth文件路径不可包含中文"))
return False
if pattern.findall(values["index_path"]):
sg.popup(i18n("index文件路径不可包含中文"))
return False
self.set_devices(values["sg_input_device"], values["sg_output_device"])
self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt")
self.config.pth_path = values["pth_path"]
self.config.index_path = values["index_path"]
self.config.npy_path = values["npy_path"]
self.config.threhold = values["threhold"]
self.config.pitch = values["pitch"]
self.config.block_time = values["block_time"]
self.config.crossfade_time = values["crossfade_length"]
self.config.extra_time = values["extra_time"]
self.config.I_noise_reduce = values["I_noise_reduce"]
self.config.O_noise_reduce = values["O_noise_reduce"]
self.config.index_rate = values["index_rate"]
return True
def start_vc(self):
torch.cuda.empty_cache()
self.flag_vc = True
self.block_frame = int(self.config.block_time * self.config.samplerate)
self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate)
self.sola_search_frame = int(0.012 * self.config.samplerate)
self.delay_frame = int(0.01 * self.config.samplerate) # 往前预留0.02s
self.extra_frame = int(self.config.extra_time * self.config.samplerate)
self.rvc = None
self.rvc = RVC(
self.config.pitch,
self.config.hubert_path,
self.config.pth_path,
self.config.index_path,
self.config.npy_path,
self.config.index_rate,
)
self.input_wav: np.ndarray = np.zeros(
self.extra_frame
+ self.crossfade_frame
+ self.sola_search_frame
+ self.block_frame,
dtype="float32",
)
self.output_wav: torch.Tensor = torch.zeros(
self.block_frame, device=device, dtype=torch.float32
)
self.sola_buffer: torch.Tensor = torch.zeros(
self.crossfade_frame, device=device, dtype=torch.float32
)
self.fade_in_window: torch.Tensor = torch.linspace(
0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32
)
self.fade_out_window: torch.Tensor = 1 - self.fade_in_window
self.resampler1 = tat.Resample(
orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32
)
self.resampler2 = tat.Resample(
orig_freq=self.rvc.tgt_sr,
new_freq=self.config.samplerate,
dtype=torch.float32,
)
thread_vc = threading.Thread(target=self.soundinput)
thread_vc.start()
def soundinput(self):
"""
接受音频输入
"""
with sd.Stream(
channels=2,
callback=self.audio_callback,
blocksize=self.block_frame,
samplerate=self.config.samplerate,
dtype="float32",
):
while self.flag_vc:
time.sleep(self.config.block_time)
print("Audio block passed.")
print("ENDing VC")
def audio_callback(
self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
):
"""
音频处理
"""
start_time = time.perf_counter()
indata = librosa.to_mono(indata.T)
if self.config.I_noise_reduce:
indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate)
"""noise gate"""
frame_length = 2048
hop_length = 1024
rms = librosa.feature.rms(
y=indata, frame_length=frame_length, hop_length=hop_length
)
db_threhold = librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold
# print(rms.shape,db.shape,db)
for i in range(db_threhold.shape[0]):
if db_threhold[i]:
indata[i * hop_length : (i + 1) * hop_length] = 0
self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata)
# infer
print("input_wav:" + str(self.input_wav.shape))
# print('infered_wav:'+str(infer_wav.shape))
infer_wav: torch.Tensor = self.resampler2(
self.rvc.infer(self.resampler1(torch.from_numpy(self.input_wav)))
)[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].to(
device
)
print("infer_wav:" + str(infer_wav.shape))
# SOLA algorithm from https://github.com/yxlllc/DDSP-SVC
cor_nom = F.conv1d(
infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame],
self.sola_buffer[None, None, :],
)
cor_den = torch.sqrt(
F.conv1d(
infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame]
** 2,
torch.ones(1, 1, self.crossfade_frame, device=device),
)
+ 1e-8
)
sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
print("sola offset: " + str(int(sola_offset)))
# crossfade
self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame]
self.output_wav[: self.crossfade_frame] *= self.fade_in_window
self.output_wav[: self.crossfade_frame] += self.sola_buffer[:]
if sola_offset < self.sola_search_frame:
self.sola_buffer[:] = (
infer_wav[
-self.sola_search_frame
- self.crossfade_frame
+ sola_offset : -self.sola_search_frame
+ sola_offset
]
* self.fade_out_window
)
else:
self.sola_buffer[:] = (
infer_wav[-self.crossfade_frame :] * self.fade_out_window
)
if self.config.O_noise_reduce:
outdata[:] = np.tile(
nr.reduce_noise(
y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate
),
(2, 1),
).T
else:
outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy()
total_time = time.perf_counter() - start_time
self.window["infer_time"].update(int(total_time * 1000))
print("infer time:" + str(total_time))
def get_devices(self, update: bool = True):
"""获取设备列表"""
if update:
sd._terminate()
sd._initialize()
devices = sd.query_devices()
hostapis = sd.query_hostapis()
for hostapi in hostapis:
for device_idx in hostapi["devices"]:
devices[device_idx]["hostapi_name"] = hostapi["name"]
input_devices = [
f"{d['name']} ({d['hostapi_name']})"
for d in devices
if d["max_input_channels"] > 0
]
output_devices = [
f"{d['name']} ({d['hostapi_name']})"
for d in devices
if d["max_output_channels"] > 0
]
input_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_input_channels"] > 0
]
output_devices_indices = [
d["index"] if "index" in d else d["name"]
for d in devices
if d["max_output_channels"] > 0
]
return (
input_devices,
output_devices,
input_devices_indices,
output_devices_indices,
)
def set_devices(self, input_device, output_device):
"""设置输出设备"""
(
input_devices,
output_devices,
input_device_indices,
output_device_indices,
) = self.get_devices()
sd.default.device[0] = input_device_indices[input_devices.index(input_device)]
sd.default.device[1] = output_device_indices[
output_devices.index(output_device)
]
print("input device:" + str(sd.default.device[0]) + ":" + str(input_device))
print("output device:" + str(sd.default.device[1]) + ":" + str(output_device))
gui = GUI()

View File

@@ -7,7 +7,7 @@
"step3a:正在训练模型": "Paso 3a: Entrenando el modelo",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Entrenamiento finalizado, puede ver el registro de entrenamiento en la consola o en el archivo train.log en la carpeta del experimento",
"全流程结束!": "¡Todo el proceso ha terminado!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.": "Este software es de código abierto bajo la licencia MIT, el autor no tiene ningún control sobre el software, y aquellos que usan el software y difunden los sonidos exportados por el software son los únicos responsables.<br>Si no está de acuerdo con esta cláusula , no puede utilizar ni citar ningún código ni archivo del paquete de software Consulte el directorio raíz <b>Agreement-LICENSE.txt</b> para obtener más información.",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "Este software es de código abierto bajo la licencia MIT, el autor no tiene ningún control sobre el software, y aquellos que usan el software y difunden los sonidos exportados por el software son los únicos responsables.<br>Si no está de acuerdo con esta cláusula , no puede utilizar ni citar ningún código ni archivo del paquete de software Consulte el directorio raíz <b>Agreement-LICENSE.txt</b> para obtener más información.",
"模型推理": "inferencia del modelo",
"推理音色": "inferencia de voz",
"刷新音色列表和索引路径": "Actualizar la lista de timbres e índice de rutas",
@@ -37,6 +37,7 @@
"也可批量输入音频文件, 二选一, 优先读文件夹": "También se pueden ingresar múltiples archivos de audio, cualquiera de las dos opciones, con prioridad dada a la carpeta",
"导出文件格式": "Formato de archivo de exportación",
"伴奏人声分离&去混响&去回声": "Separación de voz acompañante & eliminación de reverberación & eco",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "Procesamiento por lotes para la separación de acompañamiento vocal utilizando el modelo UVR5.<br>Ejemplo de formato de ruta de carpeta válido: D:\\ruta\\a\\la\\carpeta\\de\\entrada (copiar desde la barra de direcciones del administrador de archivos).<br>El modelo se divide en tres categorías:<br>1. Preservar voces: Elija esta opción para audio sin armonías. Preserva las voces mejor que HP5. Incluye dos modelos incorporados: HP2 y HP3. HP3 puede filtrar ligeramente el acompañamiento pero conserva las voces un poco mejor que HP2.<br>2. Preservar solo voces principales: Elija esta opción para audio con armonías. Puede debilitar las voces principales. Incluye un modelo incorporado: HP5.<br>3. Modelos de des-reverberación y des-retardo (por FoxJoy):<br>(1) MDX-Net: La mejor opción para la eliminación de reverberación estéreo pero no puede eliminar la reverberación mono;<br>&emsp;(234) DeEcho: Elimina efectos de retardo. El modo Agresivo elimina más a fondo que el modo Normal. DeReverb adicionalmente elimina la reverberación y puede eliminar la reverberación mono, pero no muy efectivamente para contenido de alta frecuencia fuertemente reverberado.<br>Notas de des-reverberación/des-retardo:<br>1. El tiempo de procesamiento para el modelo DeEcho-DeReverb es aproximadamente el doble que los otros dos modelos DeEcho.<br>2. El modelo MDX-Net-Dereverb es bastante lento.<br>3. La configuración más limpia recomendada es aplicar primero MDX-Net y luego DeEcho-Agresivo.",
"输入待处理音频文件夹路径": "Ingrese la ruta a la carpeta de audio que se procesará",
"模型": "Modelo",
"指定输出主人声文件夹": "Especifique la carpeta de salida para la voz principal",
@@ -94,7 +95,6 @@
"Onnx导出": "Exportar Onnx",
"RVC模型路径": "Ruta del modelo RVC",
"Onnx输出路径": "Ruta de salida Onnx",
"MoeVS模型": "Modelo MoeVS",
"导出Onnx模型": "Exportar modelo Onnx",
"常见问题解答": "Preguntas frecuentes",
"招募音高曲线前端编辑器": "Reclutar editores front-end para curvas de tono",
@@ -122,5 +122,11 @@
"开始音频转换": "Iniciar conversión de audio",
"停止音频转换": "Detener la conversión de audio",
"推理时间(ms):": "Inferir tiempo (ms):",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。":"Procesamiento por lotes para la separación de acompañamiento vocal utilizando el modelo UVR5.<br>Ejemplo de formato de ruta de carpeta válido: D:\\ruta\\a\\la\\carpeta\\de\\entrada (copiar desde la barra de direcciones del administrador de archivos).<br>El modelo se divide en tres categorías:<br>1. Preservar voces: Elija esta opción para audio sin armonías. Preserva las voces mejor que HP5. Incluye dos modelos incorporados: HP2 y HP3. HP3 puede filtrar ligeramente el acompañamiento pero conserva las voces un poco mejor que HP2.<br>2. Preservar solo voces principales: Elija esta opción para audio con armonías. Puede debilitar las voces principales. Incluye un modelo incorporado: HP5.<br>3. Modelos de des-reverberación y des-retardo (por FoxJoy):<br>(1) MDX-Net: La mejor opción para la eliminación de reverberación estéreo pero no puede eliminar la reverberación mono;<br>&emsp;(234) DeEcho: Elimina efectos de retardo. El modo Agresivo elimina más a fondo que el modo Normal. DeReverb adicionalmente elimina la reverberación y puede eliminar la reverberación mono, pero no muy efectivamente para contenido de alta frecuencia fuertemente reverberado.<br>Notas de des-reverberación/des-retardo:<br>1. El tiempo de procesamiento para el modelo DeEcho-DeReverb es aproximadamente el doble que los otros dos modelos DeEcho.<br>2. El modelo MDX-Net-Dereverb es bastante lento.<br>3. La configuración más limpia recomendada es aplicar primero MDX-Net y luego DeEcho-Agresivo."
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

130
i18n/it_IT.json Normal file
View File

@@ -0,0 +1,130 @@
{
"很遗憾您这没有能用的显卡来支持您训练": "Sfortunatamente, non è disponibile alcuna GPU compatibile per supportare l'addestramento.",
"是": "SÌ",
"step1:正在处理数据": "Passaggio 1: elaborazione dei dati",
"step2a:无需提取音高": "Step 2a: Saltare l'estrazione del tono",
"step2b:正在提取特征": "Passaggio 2b: estrazione delle funzionalità",
"step3a:正在训练模型": "Passaggio 3a: è iniziato l'addestramento del modello",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Addestramento completato. ",
"全流程结束!": "Tutti i processi sono stati completati!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "Questo software è open source con licenza MIT. <br>Se non si accetta questa clausola, non è possibile utilizzare o fare riferimento a codici e file all'interno del pacchetto software. <b>Contratto-LICENZA.txt</b> per dettagli.",
"模型推理": "Inferenza del modello",
"推理音色": "Voce di inferenza:",
"刷新音色列表和索引路径": "Aggiorna l'elenco delle voci e il percorso dell'indice",
"卸载音色省显存": "Scarica la voce per risparmiare memoria della GPU:",
"请选择说话人id": "Seleziona ID locutore/cantante:",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Tonalità +12 consigliata per la conversione da maschio a femmina e tonalità -12 per la conversione da femmina a maschio. ",
"变调(整数, 半音数量, 升八度12降八度-12)": "Trasposizione (numero intero, numero di semitoni, alza di un'ottava: 12, abbassa di un'ottava: -12):",
"输入待处理音频文件路径(默认是正确格式示例)": "Immettere il percorso del file audio da elaborare (l'impostazione predefinita è l'esempio di formato corretto):",
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU": "Seleziona l'algoritmo di estrazione del tono (\"pm\": estrazione più veloce ma risultato di qualità inferiore; \"harvest\": bassi migliori ma estremamente lenti; \"crepe\": qualità migliore ma utilizzo intensivo della GPU):",
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音": "Se >=3: applica il filtro mediano ai risultati del pitch raccolto. ",
"特征检索库文件路径,为空则使用下拉的选择结果": "Percorso del file di indice delle caratteristiche. ",
"自动检测index路径,下拉式选择(dropdown)": "Rileva automaticamente il percorso dell'indice e seleziona dal menu a tendina:",
"特征文件路径": "Percorso del file delle caratteristiche:",
"检索特征占比": "Rapporto funzionalità di ricerca (controlla la forza dell'accento, troppo alto ha artefatti):",
"后处理重采样至最终采样率0为不进行重采样": "Ricampiona l'audio di output in post-elaborazione alla frequenza di campionamento finale. ",
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络": "Regola il ridimensionamento dell'inviluppo del volume. ",
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果": "Proteggi le consonanti senza voce e i suoni del respiro per evitare artefatti come il tearing nella musica elettronica. ",
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调": "File curva F0 (opzionale). ",
"转换": "Convertire",
"输出信息": "Informazioni sull'uscita",
"输出音频(右下角三个点,点了可以下载)": "Esporta audio (clicca sui tre puntini in basso a destra per scaricarlo)",
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "Conversione massiva. Inserisci il percorso della cartella che contiene i file da convertire o carica più file audio. I file convertiti finiranno nella cartella specificata. (default: opt) ",
"指定输出文件夹": "Specifica la cartella di output:",
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)": "Immettere il percorso della cartella audio da elaborare (copiarlo dalla barra degli indirizzi del file manager):",
"也可批量输入音频文件, 二选一, 优先读文件夹": "Puoi anche inserire file audio in massa. ",
"导出文件格式": "Formato file di esportazione",
"伴奏人声分离&去混响&去回声": "Separazione voce/accompagnamento",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "Elaborazione batch per la separazione dell'accompagnamento vocale utilizzando il modello UVR5.<br>Esempio di un formato di percorso di cartella valido: D:\\path\\to\\input\\folder (copialo dalla barra degli indirizzi del file manager).<br>Il modello è suddiviso in tre categorie:<br>1. Conserva la voce: scegli questa opzione per l'audio senza armonie. <br>2. Mantieni solo la voce principale: scegli questa opzione per l'audio con armonie. <br>3. Modelli di de-riverbero e de-delay (di FoxJoy):<br>(1) MDX-Net: la scelta migliore per la rimozione del riverbero stereo ma non può rimuovere il riverbero mono;<br><br>Note di de-riverbero/de-delay:<br>1. Il tempo di elaborazione per il modello DeEcho-DeReverb è circa il doppio rispetto agli altri due modelli DeEcho.<br>2. Il modello MDX-Net-Dereverb è piuttosto lento.<br>3. La configurazione più pulita consigliata consiste nell'applicare prima MDX-Net e poi DeEcho-Aggressive.",
"输入待处理音频文件夹路径": "Immettere il percorso della cartella audio da elaborare:",
"模型": "Modello",
"指定输出主人声文件夹": "Specifica la cartella di output per le voci:",
"指定输出非主人声文件夹": "Specificare la cartella di output per l'accompagnamento:",
"训练": "Addestramento",
"step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. ": "Passaggio 1: compilare la configurazione sperimentale. ",
"输入实验名": "Inserisci il nome dell'esperimento:",
"目标采样率": "Frequenza di campionamento target:",
"模型是否带音高指导(唱歌一定要, 语音可以不要)": "Se il modello ha una guida del tono (necessario per il canto, facoltativo per il parlato):",
"版本": "Versione",
"提取音高和处理数据使用的CPU进程数": "Numero di processi CPU utilizzati per l'estrazione del tono e l'elaborazione dei dati:",
"step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. ": "Passaggio 2a: attraversa automaticamente tutti i file nella cartella di addestramento che possono essere decodificati in audio ed esegui la normalizzazione delle sezioni. ",
"输入训练文件夹路径": "Inserisci il percorso della cartella di addestramento:",
"请指定说话人id": "Si prega di specificare l'ID del locutore/cantante:",
"处理数据": "Processa dati",
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "Passaggio 2b: utilizzare la CPU per estrarre il tono (se il modello ha il tono), utilizzare la GPU per estrarre le caratteristiche (selezionare l'indice GPU):",
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2": "Inserisci gli indici GPU separati da '-', ad esempio 0-1-2 per utilizzare GPU 0, 1 e 2:",
"显卡信息": "Informazioni GPU",
"选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "Seleziona l'algoritmo di estrazione del tono (\"pm\": estrazione più rapida ma parlato di qualità inferiore; \"dio\": parlato migliorato ma estrazione più lenta; \"harvest\": migliore qualità ma estrazione più lenta):",
"特征提取": "Estrazione delle caratteristiche",
"step3: 填写训练设置, 开始训练模型和索引": "Passaggio 3: compilare le impostazioni di addestramento e avviare l'addestramento del modello e dell'indice",
"保存频率save_every_epoch": "Frequenza di salvataggio (save_every_epoch):",
"总训练轮数total_epoch": "Epoch totali di addestramento (total_epoch):",
"每张显卡的batch_size": "Dimensione batch per GPU:",
"是否仅保存最新的ckpt文件以节省硬盘空间": "Salva solo l'ultimo file '.ckpt' per risparmiare spazio su disco:",
"否": "NO",
"是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "Memorizza nella cache tutti i set di addestramento nella memoria della GPU. ",
"是否在每次保存时间点将最终小模型保存至weights文件夹": "Salva un piccolo modello finale nella cartella \"weights\" in ogni punto di salvataggio:",
"加载预训练底模G路径": "Carica il percorso G del modello base pre-addestrato:",
"加载预训练底模D路径": "Carica il percorso D del modello base pre-addestrato:",
"训练模型": "Addestra modello",
"训练特征索引": "Addestra indice delle caratteristiche",
"一键训练": "Addestramento con un clic",
"ckpt处理": "Elaborazione ckpt",
"模型融合, 可用于测试音色融合": "Model fusion, può essere utilizzato per testare la fusione timbrica",
"A模型路径": "Percorso per il modello A:",
"B模型路径": "Percorso per il modello B:",
"A模型权重": "Peso (w) per il modello A:",
"模型是否带音高指导": "Se il modello ha una guida del tono:",
"要置入的模型信息": "Informazioni sul modello da posizionare:",
"保存的模型名不带后缀": "Nome del modello salvato (senza estensione):",
"模型版本型号": "Versione dell'architettura del modello:",
"融合": "Fusione",
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Modifica le informazioni sul modello (supportato solo per i file di modello di piccole dimensioni estratti dalla cartella 'weights')",
"模型路径": "Percorso al modello:",
"要改的模型信息": "Informazioni sul modello da modificare:",
"保存的文件名, 默认空为和源文件同名": "Salva il nome del file (predefinito: uguale al file di origine):",
"修改": "Modificare",
"查看模型信息(仅支持weights文件夹下提取的小模型文件)": "Visualizza le informazioni sul modello (supportato solo per file di modello piccoli estratti dalla cartella 'weights')",
"查看": "Visualizzazione",
"模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况": "Estrazione del modello (inserire il percorso del modello di file di grandi dimensioni nella cartella \"logs\"). ",
"保存名": "Salva nome:",
"模型是否带音高指导,1是0否": "Se il modello ha una guida del tono (1: sì, 0: no):",
"提取": "Estrai",
"Onnx导出": "Esporta Onnx",
"RVC模型路径": "Percorso modello RVC:",
"Onnx输出路径": "Percorso di esportazione Onnx:",
"导出Onnx模型": "Esporta modello Onnx",
"常见问题解答": "FAQ (Domande frequenti)",
"招募音高曲线前端编辑器": "Reclutamento di redattori front-end per curve di tono",
"加开发群联系我xxxxx": "Unisciti al gruppo di sviluppo e contattami a xxxxx",
"点击查看交流、问题反馈群号": "Fare clic per visualizzare il numero del gruppo di comunicazione e feedback sui problemi",
"xxxxx": "xxxxx",
"加载模型": "Carica modello",
"Hubert模型": "Modello Hubert",
"选择.pth文件": "Seleziona il file .pth",
"选择.index文件": "Seleziona il file .index",
"选择.npy文件": "Seleziona il file .npy",
"输入设备": "Dispositivo di input",
"输出设备": "Dispositivo di uscita",
"音频设备(请使用同种类驱动)": "Dispositivo audio (utilizzare lo stesso tipo di driver)",
"响应阈值": "Soglia di risposta",
"音调设置": "Impostazioni del tono",
"Index Rate": "Tasso di indice",
"常规设置": "Impostazioni generali",
"采样长度": "Lunghezza del campione",
"淡入淡出长度": "Lunghezza dissolvenza",
"额外推理时长": "Tempo di inferenza extra",
"输入降噪": "Riduzione del rumore in ingresso",
"输出降噪": "Riduzione del rumore in uscita",
"性能设置": "Impostazioni delle prestazioni",
"开始音频转换": "Avvia la conversione audio",
"停止音频转换": "Arresta la conversione audio",
"推理时间(ms):": "Tempo di inferenza (ms):",
"请选择pth文件": "请选择pth 文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert 模型路径不可包含中文",
"pth文件路径不可包含中文": "pth è un'app per il futuro",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

130
i18n/ru-RU.json Normal file
View File

@@ -0,0 +1,130 @@
{
"很遗憾您这没有能用的显卡来支持您训练": "К сожалению у вас нету видеокарты, которая поддерживает тренировку модели.",
"是": "Да",
"step1:正在处理数据": "Шаг 1: Переработка данных",
"step2a:无需提取音高": "Шаг 2а: Пропуск вытаскивания тональности",
"step2b:正在提取特征": "Шаг 2б: Вытаскивание черт",
"step3a:正在训练模型": "Шаг 3а: Тренировка модели начата",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Тренировка завершена. Вы можете проверить логи тренировки в консоли или в файле 'train.log' в папке модели.",
"全流程结束!": "Все процессы завершены!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.",
"模型推理": "Обработка модели",
"推理音色": "Обработка голоса:",
"刷新音色列表和索引路径": "Обновить список голосов и индексов",
"卸载音色省显存": "Выгрузить голос для сохранения памяти видеокарты:",
"请选择说话人id": "Выбери айди голоса:",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Рекомендованно +12 для конвертирования мужского голоса в женский и -12 для конвертирования женского в мужской. Если диапазон голоса слищком велик и голос искажается, значение можно изменить на свой вкус.",
"变调(整数, 半音数量, 升八度12降八度-12)": "Высота голоса (число, полутоны, поднять на октаву: 12, понизить на октаву: -12):",
"输入待处理音频文件路径(默认是正确格式示例)": "Введите путь к аудиофайлу, который хотите переработать (по умолчанию введён правильный формат):",
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU": "Выберите алгоритм вытаскивания тональности ('pm': быстрое извлечение но качество речи хуже; 'harvest': бассы лучше но очень медленный; 'crepe': лучшее качество но сильно использует видеокарту):",
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音": "Если больше 3: применить медианную фильтрацию к вытащенным тональностям. Значение контролирует радиус фильтра и может уменьшить излишнее дыхание.",
"特征检索库文件路径,为空则使用下拉的选择结果": "Путь к файлу индекса черт. Оставьте пустым, чтобы использовать выбранный результат из списка:",
"自动检测index路径,下拉式选择(dropdown)": "Автоматически найти путь к индексу и выбрать его из списка:",
"特征文件路径": "Путь к файлу черт:",
"检索特征占比": "Соотношение поиска черт:",
"后处理重采样至最终采样率0为不进行重采样": "Изменить частоту дискретизации в выходном файле на финальную. Поставьте 0, чтобы ничего не изменялось:",
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络": "Использовать громкость входного файла для замены или перемешивания с громкостью выходного файла. Чем ближе соотношение к 1, тем больше используется звука из выходного файла:",
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果": "Защитить глухие согласные и звуки дыхания для предотвращения артефактов, например разрывание в электронной музыке. Поставьте на 0.5, чтобы выключить. Уменьшите значение для повышения защиты, но при этом может ухудшиться аккуратность индексирования:",
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调": "Файл дуги F0 (не обязательно). Одна тональность на каждую строчку. Заменяет обычный F0 и модуляцию тональности:",
"转换": "Конвертировать",
"输出信息": "Выходная информация",
"输出音频(右下角三个点,点了可以下载)": "Экспортировать аудиофайл (нажми на три точки в правом нижнем углу для загрузки)",
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "Конвертировать пачкой. Введите путь к папке, в которой находятся файлы для конвертирования или выложите несколько аудиофайлов. Сконвертированные файлы будут сохранены в указанной папке (по умолчанию 'opt').",
"指定输出文件夹": "Укажите выходную папку:",
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)": "Введите путь к папке с аудио для переработки:",
"也可批量输入音频文件, 二选一, 优先读文件夹": "Вы также можете выложить аудиофайлы пачкой. Выберите одно из двух. Приоритет отдаётся считыванию из папки.",
"导出文件格式": "Формат выходного файла",
"伴奏人声分离&去混响&去回声": "Отделение вокала/инструментала и убирание эхо",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "Пакетная обработка для разделения вокального сопровождения с использованием модели UVR5.<br>Пример допустимого формата пути к папке: D:\\path\\to\\input\\folder<br> Модель разделена на три категории:<br>1. Сохранить вокал: выберите этот вариант для звука без гармоний. Он сохраняет вокал лучше, чем HP5. Он включает в себя две встроенные модели: HP2 и HP3. HP3 может немного пропускать инструментал, но сохраняет вокал немного лучше, чем HP2.<br>2. Сохранить только основной вокал: выберите этот вариант для звука с гармониями. Это может ослабить основной вокал. Он включает одну встроенную модель: HP5.<br>3. Модели удаления реверберации и задержки (от FoxJoy):<br>(1) MDX-Net: лучший выбор для удаления стереореверберации, но он не может удалить монореверберацию;<br>&emsp;(234) DeEcho: удаляет эффекты задержки. Агрессивный режим удаляет более тщательно, чем Нормальный режим. DeReverb дополнительно удаляет реверберацию и может удалять монореверберацию, но не очень эффективно для сильно реверберированного высокочастотного контента.<br>Примечания по удалению реверберации/задержки:<br>1. Время обработки для модели DeEcho-DeReverb примерно в два раза больше, чем для двух других моделей DeEcho.<br>2. Модель MDX-Net-Dereverb довольно медленная.<br>3. Рекомендуемая самая чистая конфигурация — сначала применить MDX-Net, а затем DeEcho-Aggressive.",
"输入待处理音频文件夹路径": "Введите путь к папке с аудиофайлами для переработки:",
"模型": "Модели",
"指定输出主人声文件夹": "Введите путь к папке для вокала:",
"指定输出非主人声文件夹": "Введите путь к папке для инструментала:",
"训练": "Тренировка",
"step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. ": "Шаг 1: Заполните настройки модели. Данные модели сохранены в папку 'logs' и для каждой модели создаётся отдельная папка. Введите вручную путь к настройкам для модели, в которой находятся логи и тренировочные файлы.",
"输入实验名": "Введите название модели:",
"目标采样率": "Частота дискретизации модели:",
"模型是否带音高指导(唱歌一定要, 语音可以不要)": "Наведение по тональности у модели (обязательно для пения, необязательно для речи):",
"版本": "Версия",
"提取音高和处理数据使用的CPU进程数": "Число процессов ЦП, используемое для вытаскивания тональностей и обрабротки данных:",
"step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. ": "Шаг 2а: Автоматически пройтись по всем аудиофайлам в папке тренировки и нормализировать куски. Создаст 2 папки wav в папке модели. В данных момент поддерживается тренировка только одного голоса.",
"输入训练文件夹路径": "Введите путь к папке тренировки:",
"请指定说话人id": "Введите айди голоса:",
"处理数据": "Переработать данные",
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "Шаг 2б: Вытащить тональности с помошью процессора (если в модели есть тональности), вытащить черты с помощью видеокарты (выберите какой):",
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2": "Введите, какие(-ую) видеокарты(-у) хотите использовать через '-', например 0-1-2, чтобы использовать видеокарту 0, 1 и 2:",
"显卡信息": "Информация о видеокартах",
"选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "Выберите алгоритм вытаскивания тональности ('pm': быстрое извлечение но качество речи хуже; 'harvest': бассы лучше но очень медленный; 'crepe': лучшее качество но сильно использует видеокарту):",
"特征提取": "Вытаскивание черт",
"step3: 填写训练设置, 开始训练模型和索引": "Шаг 3: Заполните остальные настройки тренировки и начните тренировать модель и индекс",
"保存频率save_every_epoch": "Частота сохранения (save_every_epoch):",
"总训练轮数total_epoch": "Полное количество эпох (total_epoch):",
"每张显卡的batch_size": "Размер пачки для видеокарты:",
"是否仅保存最新的ckpt文件以节省硬盘空间": "Сохранять только последний файл '.ckpt', чтобы сохранить место на диске:",
"否": "Нет",
"是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "Кэшировать все тренировочные сеты в видеопамять. Кэширование маленький датасетов (меньше 10 минут) может ускорить тренировку, но кэширование больших, наоборот, займёт много видеопамяти и не сильно ускорит тренировку:",
"是否在每次保存时间点将最终小模型保存至weights文件夹": "Сохранять маленькую финальную модель в папку 'weights' на каждой точке сохранения:",
"加载预训练底模G路径": "Путь к натренированой базовой модели G:",
"加载预训练底模D路径": "Путь к натренированой базовой модели D:",
"训练模型": "Тренировать модель",
"训练特征索引": "Тренировать индекс черт",
"一键训练": "Тренировка одним нажатием",
"ckpt处理": "Обработка ckpt",
"模型融合, 可用于测试音色融合": "Слияние моделей, может быть использовано для проверки слияния тембра",
"A模型路径": "Путь к модели А:",
"B模型路径": "Путь к модели Б:",
"A模型权重": "Вес (w) модели А::",
"模型是否带音高指导": "Есть ли у модели наведение по тональности (1: да, 0: нет):",
"要置入的模型信息": "Информация о модели:",
"保存的模型名不带后缀": "Название сохранённой модели (без расширения):",
"模型版本型号": "Версия архитектуры модели:",
"融合": "Слияние",
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Модифицировать информацию о модели (поддерживается только для маленких моделей, взятых из папки 'weights')",
"模型路径": "Путь к папке:",
"要改的模型信息": "Информация о модели, которую нужно модифицировать:",
"保存的文件名, 默认空为和源文件同名": "Название сохранённого файла (по умолчанию такое же, как и входного):",
"修改": "Модифицировать",
"查看模型信息(仅支持weights文件夹下提取的小模型文件)": "Просмотреть информацию о модели (поддерживается только для маленких моделей, взятых из папки 'weights')",
"查看": "Просмотр",
"模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况": "Вытаскивание модели (введите путь к большому файлу модели в папке 'logs'). Полезно, если Вам нужно заверщить тренировку и вручную достать и сохранить маленький файл модели, или если Вам нужно проверить незаконченную модель:",
"保存名": "Имя сохранённого файла:",
"模型是否带音高指导,1是0否": "Есть ли у модели наведение по тональности (1: да, 0: нет):",
"提取": "Вытащить",
"Onnx导出": "Экспортировать Onnx",
"RVC模型路径": "Путь к модели RVC:",
"Onnx输出路径": "Путь для экспотрированного Onnx:",
"导出Onnx模型": "Экспортировать Onnx модель",
"常见问题解答": "ЧаВО (Часто задаваемые вопросы)",
"招募音高曲线前端编辑器": "Использование фронтенд редакторов для тональных дуг",
"加开发群联系我xxxxx": "Присоединитесь к группе разработки и свяжитесь со мной по xxxxx",
"点击查看交流、问题反馈群号": "Нажмите, чтобы просмотреть номер группы коммуникации и отзывах о проблемах",
"xxxxx": "xxxxx",
"加载模型": "Загрузить модель",
"Hubert模型": "Модель Hubert",
"选择.pth文件": "Выбрать файл .pth",
"选择.index文件": "Выбрать файл .index",
"选择.npy文件": "Выбрать файл .npy",
"输入设备": "Входное устройство",
"输出设备": "Выходное устройство",
"音频设备(请使用同种类驱动)": "Аудио устройство (пожалуйста используйте такой=же тип драйвера)",
"响应阈值": "Порог ответа",
"音调设置": "Настройки тональности",
"Index Rate": "Темп индекса",
"常规设置": "Основные настройки",
"采样长度": "Длина сэмпла",
"淡入淡出长度": "Длина затухания",
"额外推理时长": "Доп. время переработки",
"输入降噪": "Уменьшения шума во входной информации",
"输出降噪": "Уменьшения шума во выходной информации",
"性能设置": "Настройки быстроты",
"开始音频转换": "Начать конвертацию аудио",
"停止音频转换": "Закончить конвертацию аудио",
"推理时间(ms):": "Время переработки (мс):",
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

130
i18n/tr_TR.json Normal file
View File

@@ -0,0 +1,130 @@
{
"很遗憾您这没有能用的显卡来支持您训练": "Maalesef, eğitiminizi desteklemek için uyumlu bir GPU bulunmamaktadır.",
"是": "Evet",
"step1:正在处理数据": "Adım 1: Veri işleme",
"step2a:无需提取音高": "Adım 2a: Pitch çıkartma adımını atlama",
"step2b:正在提取特征": "Adım 2b: Özelliklerin çıkarılması",
"step3a:正在训练模型": "Adım 3a: Model eğitimi başladı",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "Eğitim tamamlandı. Eğitim günlüklerini konsolda veya deney klasörü altındaki train.log dosyasında kontrol edebilirsiniz.",
"全流程结束!": "Tüm işlemler tamamlandı!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "Bu yazılım, MIT lisansı altında açık kaynaklıdır. Yazarın yazılım üzerinde herhangi bir kontrolü yoktur. Yazılımı kullanan ve yazılım tarafından dışa aktarılan sesleri dağıtan kullanıcılar sorumludur. <br>Eğer bu maddeyle aynı fikirde değilseniz, yazılım paketi içindeki herhangi bir kod veya dosyayı kullanamaz veya referans göremezsiniz. Detaylar için kök dizindeki <b>Agreement-LICENSE.txt</b> dosyasına bakınız.",
"模型推理": "Model çıkartma (Inference)",
"推理音色": "Ses çıkartma (Inference):",
"刷新音色列表和索引路径": "Ses listesini ve indeks yolunu yenile",
"卸载音色省显存": "GPU bellek kullanımını azaltmak için sesi kaldır",
"请选择说话人id": "Konuşmacı/Şarkıcı No seçin:",
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ": "Erkekten kadına çevirmek için +12 tuş önerilir, kadından erkeğe çevirmek için ise -12 tuş önerilir. Eğer ses aralığı çok fazla genişler ve ses bozulursa, isteğe bağlı olarak uygun aralığa kendiniz de ayarlayabilirsiniz.",
"变调(整数, 半音数量, 升八度12降八度-12)": "Transpoze et (tamsayı, yarıton sayısıyla; bir oktav yükseltmek için: 12, bir oktav düşürmek için: -12):",
"输入待处理音频文件路径(默认是正确格式示例)": "İşlenecek ses dosyasının yolunu girin (varsayılan doğru format örneğidir):",
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU": "Pitch algoritmasını seçin ('pm': daha hızlı çıkarır ancak daha düşük kaliteli konuşma; 'harvest': daha iyi konuşma sesi ancak son derece yavaş; 'crepe': daha da iyi kalite ancak GPU yoğunluğu gerektirir):",
">=3则使用对harvest音高识别的结果使用中值滤波数值为滤波半径使用可以削弱哑音": "Eğer >=3 ise, elde edilen pitch sonuçlarına median filtreleme uygula. Bu değer, filtre yarıçapını temsil eder ve nefesliliği azaltabilir.",
"特征检索库文件路径,为空则使用下拉的选择结果": "Özellik indeksi dosyasının yolunu belirtin. Seçilen sonucu kullanmak için boş bırakın veya açılır menüden seçim yapın.",
"自动检测index路径,下拉式选择(dropdown)": "İndeks yolunu otomatik olarak tespit et ve açılır menüden seçim yap.",
"特征文件路径": "Özellik dosyasının yolu:",
"检索特征占比": "Arama özelliği oranı (vurgu gücünü kontrol eder, çok yüksek olması sanal etkilere neden olur)",
"后处理重采样至最终采样率0为不进行重采样": "Son işleme aşamasında çıktı sesini son örnekleme hızına yeniden örnekle. 0 değeri için yeniden örnekleme yapılmaz:",
"输入源音量包络替换输出音量包络融合比例越靠近1越使用输出包络": "Sesin hacim zarfını ayarlayın. 0'a yakın değerler, sesin orijinal vokallerin hacmine benzer olmasını sağlar. Düşük bir değerle ses gürültüsünü maskeleyebilir ve hacmi daha doğal bir şekilde duyulabilir hale getirebilirsiniz. 1'e yaklaştıkça sürekli bir yüksek ses seviyesi elde edilir:",
"保护清辅音和呼吸声防止电音撕裂等artifact拉满0.5不开启,调低加大保护力度但可能降低索引效果": "Sessiz ünsüzleri ve nefes seslerini koruyarak elektronik müzikte yırtılma gibi sanal hataların oluşmasını engeller. 0.5 olarak ayarlandığında devre dışı kalır. Değerin azaltılması korumayı artırabilir, ancak indeksleme doğruluğunu azaltabilir:",
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调": "F0 eğrisi dosyası (isteğe bağlı). Her satırda bir pitch değeri bulunur. Varsayılan F0 ve pitch modülasyonunu değiştirir:",
"转换": "Dönüştür",
"输出信息": ıkış bilgisi",
"输出音频(右下角三个点,点了可以下载)": "Ses dosyasını dışa aktar (indirmek için sağ alt köşedeki üç noktaya tıklayın)",
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ": "Toplu dönüştür. Dönüştürülecek ses dosyalarının bulunduğu klasörü girin veya birden çok ses dosyasını yükleyin. Dönüştürülen ses dosyaları belirtilen klasöre ('opt' varsayılan olarak) dönüştürülecektir",
"指定输出文件夹": ıkış klasörünü belirt:",
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)": "İşlenecek ses klasörünün yolunu girin (dosya yöneticisinin adres çubuğundan kopyalayın):",
"也可批量输入音频文件, 二选一, 优先读文件夹": "Toplu olarak ses dosyalarını da girebilirsiniz. İki seçenekten birini seçin. Öncelik klasörden okumaya verilir.",
"导出文件格式": "Dışa aktarma dosya formatı",
"伴奏人声分离&去混响&去回声": "Vokal/Müzik Ayrıştırma ve Yankı Giderme",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "Batch işleme kullanarak vokal eşlik ayrımı için UVR5 modeli kullanılır.<br>Geçerli bir klasör yol formatı örneği: D:\\path\\to\\input\\folder (dosya yöneticisi adres çubuğundan kopyalanır).<br>Model üç kategoriye ayrılır:<br>1. Vokalleri koru: Bu seçeneği, harmoni içermeyen sesler için kullanın. HP5'ten daha iyi bir şekilde vokalleri korur. İki dahili model içerir: HP2 ve HP3. HP3, eşlik sesini hafifçe sızdırabilir, ancak vokalleri HP2'den biraz daha iyi korur.<br>2. Sadece ana vokalleri koru: Bu seçeneği, harmoni içeren sesler için kullanın. Ana vokalleri zayıflatabilir. Bir dahili model içerir: HP5.<br>3. Reverb ve gecikme modelleri (FoxJoy tarafından):<br>(1) MDX-Net: Stereo reverb'i kaldırmak için en iyi seçenek, ancak mono reverb'i kaldıramaz;<br>(234) DeEcho: Gecikme efektlerini kaldırır. Agresif mod, Normal moda göre daha kapsamlı bir şekilde kaldırma yapar. DeReverb ayrıca reverb'i kaldırır ve mono reverb'i kaldırabilir, ancak yoğun yankılı yüksek frekanslı içerikler için çok etkili değildir.<br>Reverb/gecikme notları:<br>1. DeEcho-DeReverb modelinin işleme süresi diğer iki DeEcho modeline göre yaklaşık olarak iki kat daha uzundur.<br>2. MDX-Net-Dereverb modeli oldukça yavaştır.<br>3. Tavsiye edilen en temiz yapılandırma önce MDX-Net'i uygulamak ve ardından DeEcho-Aggressive uygulamaktır.",
"输入待处理音频文件夹路径": "İşlenecek ses klasörünün yolunu girin:",
"模型": "Model",
"指定输出主人声文件夹": "Vokal için çıkış klasörünü belirtin:",
"指定输出非主人声文件夹": "Müzik ve diğer sesler için çıkış klasörünü belirtin:",
"训练": "Eğitim",
"step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. ": "Adım 1: Deneysel yapılandırmayı doldurun. Deneysel veriler 'logs' klasöründe saklanır ve her bir deney için ayrı bir klasör vardır. Deneysel adı yolu manuel olarak girin; bu yol, deneysel yapılandırmayı, günlükleri ve eğitilmiş model dosyalarını içerir.",
"输入实验名": "Deneysel adı girin:",
"目标采样率": "Hedef örnekleme oranı:",
"模型是否带音高指导(唱歌一定要, 语音可以不要)": "Modelin ses yüksekliği (Pitch) rehberliği içerip içermediği (şarkı söyleme için şarttır, konuşma için isteğe bağlıdır):",
"版本": "Sürüm",
"提取音高和处理数据使用的CPU进程数": "Ses yüksekliği çıkartmak (Pitch) ve verileri işlemek için kullanılacak CPU işlemci sayısı:",
"step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. ": "Adım 2a: Eğitim klasöründe ses dosyalarını otomatik olarak gezinerek dilimleme normalizasyonu yapın. Deney dizini içinde 2 wav klasörü oluşturur. Şu anda sadece tek kişilik eğitim desteklenmektedir.",
"输入训练文件夹路径": "Eğitim klasörünün yolunu girin:",
"请指定说话人id": "Lütfen konuşmacı/sanatçı no belirtin:",
"处理数据": "Verileri işle",
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)": "Adım 2b: Ses yüksekliği (Pitch) çıkartmak için CPU kullanın (eğer model ses yüksekliği içeriyorsa), özellikleri çıkartmak için GPU kullanın (GPU indeksini seçin):",
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2": "GPU indekslerini '-' ile ayırarak girin, örneğin 0-1-2, GPU 0, 1 ve 2'yi kullanmak için:",
"显卡信息": "GPU Bilgisi",
"选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢": "Ses yüksekliği (Pitch) çıkartma algoritmasını seçin ('pm': daha hızlı çıkartma, ancak düşük kaliteli konuşma; 'dio': geliştirilmiş konuşma kalitesi, ancak daha yavaş çıkartma; 'harvest': daha iyi kalite, ancak daha da yavaş çıkartma):",
"特征提取": "Özellik çıkartma",
"step3: 填写训练设置, 开始训练模型和索引": "Adım 3: Eğitim ayarlarını doldurun ve modeli ve dizini eğitmeye başlayın",
"保存频率save_every_epoch": "Kaydetme sıklığı (save_every_epoch):",
"总训练轮数total_epoch": "Toplam eğitim turu (total_epoch):",
"每张显卡的batch_size": "Her GPU için yığın boyutu (batch_size):",
"是否仅保存最新的ckpt文件以节省硬盘空间": "Sadece en son '.ckpt' dosyasını kaydet:",
"否": "Hayır",
"是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速": "Tüm eğitim verilerini GPU belleğine önbelleğe alıp almayacağınızı belirtin. Küçük veri setlerini (10 dakikadan az) önbelleğe almak eğitimi hızlandırabilir, ancak büyük veri setlerini önbelleğe almak çok fazla GPU belleği tüketir ve çok fazla hız artışı sağlamaz:",
"是否在每次保存时间点将最终小模型保存至weights文件夹": "Her kaydetme noktasında son küçük bir modeli 'weights' klasörüne kaydetmek için:",
"加载预训练底模G路径": "Önceden eğitilmiş temel G modelini yükleme yolu:",
"加载预训练底模D路径": "Önceden eğitilmiş temel D modelini yükleme yolu:",
"训练模型": "Modeli Eğit",
"训练特征索引": "Özellik Dizinini Eğit",
"一键训练": "Tek Tuşla Eğit",
"ckpt处理": "ckpt İşleme",
"模型融合, 可用于测试音色融合": "Model birleştirme, ses rengi birleştirmesi için kullanılabilir",
"A模型路径": "A Modeli Yolu:",
"B模型路径": "B Modeli Yolu:",
"A模型权重": "A Modeli Ağırlığı:",
"模型是否带音高指导": "Modelin ses yüksekliği rehberi içerip içermediği:",
"要置入的模型信息": "Eklemek için model bilgileri:",
"保存的模型名不带后缀": "Kaydedilecek model adı (uzantı olmadan):",
"模型版本型号": "Model mimari versiyonu:",
"融合": "Birleştir",
"修改模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini düzenle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)",
"模型路径": "Model Yolu:",
"要改的模型信息": "Düzenlenecek model bilgileri:",
"保存的文件名, 默认空为和源文件同名": "Kaydedilecek dosya adı (varsayılan: kaynak dosya ile aynı):",
"修改": "Düzenle",
"查看模型信息(仅支持weights文件夹下提取的小模型文件)": "Model bilgilerini görüntüle (sadece 'weights' klasöründen çıkarılan küçük model dosyaları desteklenir)",
"查看": "Görüntüle",
"模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况": "Model çıkartma (büyük dosya modeli yolunu 'logs' klasöründe girin). Bu, eğitimi yarıda bırakmak istediğinizde ve manuel olarak küçük bir model dosyası çıkartmak ve kaydetmek istediğinizde veya bir ara modeli test etmek istediğinizde kullanışlıdır:",
"保存名": "Kaydetme Adı:",
"模型是否带音高指导,1是0否": "Modelin ses yüksekliği rehberi içerip içermediği (1: evet, 0: hayır):",
"提取": ıkart",
"Onnx导出": "Onnx Dışa Aktar",
"RVC模型路径": "RVC Model Yolu:",
"Onnx输出路径": "Onnx Dışa Aktarım Yolu:",
"导出Onnx模型": "Onnx Modeli Dışa Aktar",
"常见问题解答": "Sıkça Sorulan Sorular (SSS)",
"招募音高曲线前端编辑器": "Ses yükseklik eğrisi ön uç düzenleyicisi için işe alım",
"加开发群联系我xxxxx": "Geliştirme grubuna katılın ve benimle iletişime geçin: xxxxx",
"点击查看交流、问题反馈群号": "İletişim ve sorun geri bildirim grup numarasını görüntülemek için tıklayın",
"xxxxx": "xxxxx",
"加载模型": "Model yükle",
"Hubert模型": "Hubert Modeli",
"选择.pth文件": ".pth dosyası seç",
"选择.index文件": ".index dosyası seç",
"选择.npy文件": ".npy dosyası seç",
"输入设备": "Giriş cihazı",
"输出设备": ıkış cihazı",
"音频设备(请使用同种类驱动)": "Ses cihazı (aynı tür sürücüyü kullanın)",
"响应阈值": "Tepki eşiği",
"音调设置": "Pitch ayarları",
"Index Rate": "Index Oranı",
"常规设置": "Genel ayarlar",
"采样长度": "Örnekleme uzunluğu",
"淡入淡出长度": "Geçiş (Fade) uzunluğu",
"额外推理时长": "Ekstra çıkartma süresi",
"输入降噪": "Giriş gürültü azaltma",
"输出降噪": ıkış gürültü azaltma",
"性能设置": "Performans ayarları",
"开始音频转换": "Ses dönüştürmeyi başlat",
"停止音频转换": "Ses dönüştürmeyi durdur",
"推理时间(ms):": ıkarsama süresi (ms):",
"请选择pth文件": "Lütfen .pth dosyası seçin",
"请选择index文件": "Lütfen .index dosyası seçin",
"hubert模型路径不可包含中文": "hubert modeli yolu Çince karakter içeremez",
"pth文件路径不可包含中文": ".pth dosya yolu Çince karakter içeremez",
"index文件路径不可包含中文": ".index dosya yolu Çince karakter içeremez",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

View File

@@ -7,7 +7,7 @@
"step3a:正在训练模型": "step3a:正在训练模型",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"全流程结束!": "全流程结束!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.",
"模型推理": "模型推理",
"推理音色": "推理音色",
"刷新音色列表和索引路径": "刷新音色列表和索引路径",
@@ -37,6 +37,7 @@
"也可批量输入音频文件, 二选一, 优先读文件夹": "也可批量输入音频文件, 二选一, 优先读文件夹",
"导出文件格式": "导出文件格式",
"伴奏人声分离&去混响&去回声": "伴奏人声分离&去混响&去回声",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。",
"输入待处理音频文件夹路径": "输入待处理音频文件夹路径",
"模型": "模型",
"指定输出主人声文件夹": "指定输出主人声文件夹",
@@ -94,7 +95,6 @@
"Onnx导出": "Onnx导出",
"RVC模型路径": "RVC模型路径",
"Onnx输出路径": "Onnx输出路径",
"MoeVS模型": "MoeVS模型",
"导出Onnx模型": "导出Onnx模型",
"常见问题解答": "常见问题解答",
"招募音高曲线前端编辑器": "招募音高曲线前端编辑器",
@@ -121,5 +121,12 @@
"性能设置": "性能设置",
"开始音频转换": "开始音频转换",
"停止音频转换": "停止音频转换",
"推理时间(ms):": "推理时间(ms):"
"推理时间(ms):": "推理时间(ms):",
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

View File

@@ -7,7 +7,7 @@
"step3a:正在训练模型": "step3a:正在训练模型",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"全流程结束!": "全流程结束!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"模型推理": "模型推理",
"推理音色": "推理音色",
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
@@ -37,6 +37,7 @@
"也可批量输入音频文件, 二选一, 优先读文件夹": "也可批量輸入音頻檔案,二選一,優先讀資料夾",
"导出文件格式": "導出檔格式",
"伴奏人声分离&去混响&去回声": "伴奏人聲分離&去混響&去回聲",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。",
"输入待处理音频文件夹路径": "輸入待處理音頻資料夾路徑",
"模型": "模型",
"指定输出主人声文件夹": "指定输出主人声文件夹",
@@ -94,7 +95,6 @@
"Onnx导出": "Onnx导出",
"RVC模型路径": "RVC模型路径",
"Onnx输出路径": "Onnx输出路径",
"MoeVS模型": "MoeSS模型",
"导出Onnx模型": "导出Onnx模型",
"常见问题解答": "常見問題解答",
"招募音高曲线前端编辑器": "招募音高曲線前端編輯器",
@@ -122,5 +122,11 @@
"开始音频转换": "開始音訊轉換",
"停止音频转换": "停止音訊轉換",
"推理时间(ms):": "推理時間(ms):",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。":"使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。"
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

View File

@@ -7,7 +7,7 @@
"step3a:正在训练模型": "step3a:正在训练模型",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"全流程结束!": "全流程结束!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"模型推理": "模型推理",
"推理音色": "推理音色",
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
@@ -37,6 +37,7 @@
"也可批量输入音频文件, 二选一, 优先读文件夹": "也可批量輸入音頻檔案,二選一,優先讀資料夾",
"导出文件格式": "導出檔格式",
"伴奏人声分离&去混响&去回声": "伴奏人聲分離&去混響&去回聲",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。",
"输入待处理音频文件夹路径": "輸入待處理音頻資料夾路徑",
"模型": "模型",
"指定输出主人声文件夹": "指定输出主人声文件夹",
@@ -94,7 +95,6 @@
"Onnx导出": "Onnx导出",
"RVC模型路径": "RVC模型路径",
"Onnx输出路径": "Onnx输出路径",
"MoeVS模型": "MoeSS模型",
"导出Onnx模型": "导出Onnx模型",
"常见问题解答": "常見問題解答",
"招募音高曲线前端编辑器": "招募音高曲線前端編輯器",
@@ -122,5 +122,11 @@
"开始音频转换": "開始音訊轉換",
"停止音频转换": "停止音訊轉換",
"推理时间(ms):": "推理時間(ms):",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。":"使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。"
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

View File

@@ -7,7 +7,7 @@
"step3a:正在训练模型": "step3a:正在训练模型",
"训练结束, 您可查看控制台训练日志或实验文件夹下的train.log": "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log",
"全流程结束!": "全流程结束!",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>使用需遵守的协议-LICENSE.txt</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>.": "本軟體以MIT協議開源作者不對軟體具備任何控制力使用軟體者、傳播軟體導出的聲音者自負全責。<br>如不認可該條款,則不能使用或引用軟體包內任何程式碼和檔案。詳見根目錄<b>使用需遵守的協議-LICENSE.txt</b>。",
"模型推理": "模型推理",
"推理音色": "推理音色",
"刷新音色列表和索引路径": "刷新音色列表和索引路徑",
@@ -37,6 +37,7 @@
"也可批量输入音频文件, 二选一, 优先读文件夹": "也可批量輸入音頻檔案,二選一,優先讀資料夾",
"导出文件格式": "導出檔格式",
"伴奏人声分离&去混响&去回声": "伴奏人聲分離&去混響&去回聲",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。": "使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。",
"输入待处理音频文件夹路径": "輸入待處理音頻資料夾路徑",
"模型": "模型",
"指定输出主人声文件夹": "指定输出主人声文件夹",
@@ -94,7 +95,6 @@
"Onnx导出": "Onnx导出",
"RVC模型路径": "RVC模型路径",
"Onnx输出路径": "Onnx输出路径",
"MoeVS模型": "MoeSS模型",
"导出Onnx模型": "导出Onnx模型",
"常见问题解答": "常見問題解答",
"招募音高曲线前端编辑器": "招募音高曲線前端編輯器",
@@ -122,5 +122,11 @@
"开始音频转换": "開始音訊轉換",
"停止音频转换": "停止音訊轉換",
"推理时间(ms):": "推理時間(ms):",
"人声伴奏分离批量处理, 使用UVR5模型。 <br>合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类: <br>1、保留人声不带和声的音频选这个对主人声保留比HP5更好。内置HP2和HP3两个模型HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点 <br>2、仅保留主人声带和声的音频选这个对主人声可能有削弱。内置HP5一个模型 <br> 3、去混响、去延迟模型by FoxJoy<br>(1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底DeReverb额外去除混响可去除单声道混响但是对高频重的板式混响去不干净。<br>去混响/去延迟,附:<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍<br>2、MDX-Net-Dereverb模型挺慢的<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。":"使用UVR5模型進行人聲伴奏分離的批次處理。<br>有效資料夾路徑格式的例子D:\\path\\to\\input\\folder從檔案管理員地址欄複製。<br>模型分為三類:<br>1. 保留人聲選擇這個選項適用於沒有和聲的音訊。它比HP5更好地保留了人聲。它包括兩個內建模型HP2和HP3。HP3可能輕微漏出伴奏但比HP2更好地保留了人聲<br>2. 僅保留主人聲選擇這個選項適用於有和聲的音訊。它可能會削弱主人聲。它包括一個內建模型HP5。<br>3. 消除混響和延遲模型由FoxJoy提供<br>(1) MDX-Net對於立體聲混響的移除是最好的選擇但不能移除單聲道混響<br>&emsp;(234) DeEcho移除延遲效果。Aggressive模式比Normal模式移除得更徹底。DeReverb另外移除混響可以移除單聲道混響但對於高頻重的板式混響移除不乾淨。<br>消除混響/延遲注意事項:<br>1. DeEcho-DeReverb模型的處理時間是其他兩個DeEcho模型的近兩倍<br>2. MDX-Net-Dereverb模型相當慢<br>3. 個人推薦的最乾淨配置是先使用MDX-Net然後使用DeEcho-Aggressive。"
"请选择pth文件": "请选择pth文件",
"请选择index文件": "请选择index文件",
"hubert模型路径不可包含中文": "hubert模型路径不可包含中文",
"pth文件路径不可包含中文": "pth文件路径不可包含中文",
"index文件路径不可包含中文": "index文件路径不可包含中文",
"音高算法": "音高算法",
"harvest进程数": "harvest进程数"
}

View File

@@ -8,8 +8,6 @@ import os, sys, pdb, torch
now_dir = os.getcwd()
sys.path.append(now_dir)
import argparse
import glob
import sys
import torch
import tqdm as tq
@@ -102,7 +100,7 @@ opt_path = sys.argv[5]
model_path = sys.argv[6]
index_rate = float(sys.argv[7])
device = sys.argv[8]
is_half = bool(sys.argv[9])
is_half = sys.argv[9].lower() != "false"
filter_radius = int(sys.argv[10])
resample_sr = int(sys.argv[11])
rms_mix_rate = float(sys.argv[12])
@@ -112,7 +110,7 @@ config = Config(device, is_half)
now_dir = os.getcwd()
sys.path.append(now_dir)
from vc_infer_pipeline import VC
from infer_pack.models import (
from lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,

View File

@@ -10,12 +10,12 @@ import importlib
import numpy as np
import hashlib, math
from tqdm import tqdm
from uvr5_pack.lib_v5 import spec_utils
from uvr5_pack.utils import _get_name_params, inference
from uvr5_pack.lib_v5.model_param_init import ModelParameters
from lib.uvr5_pack.lib_v5 import spec_utils
from lib.uvr5_pack.utils import _get_name_params, inference
from lib.uvr5_pack.lib_v5.model_param_init import ModelParameters
import soundfile as sf
from uvr5_pack.lib_v5.nets_new import CascadedNet
from uvr5_pack.lib_v5 import nets_61968KB as nets
from lib.uvr5_pack.lib_v5.nets_new import CascadedNet
from lib.uvr5_pack.lib_v5 import nets_61968KB as nets
class _audio_pre_:
@@ -31,7 +31,7 @@ class _audio_pre_:
"agg": agg,
"high_end_process": "mirroring",
}
mp = ModelParameters("uvr5_pack/lib_v5/modelparams/4band_v2.json")
mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
model = nets.CascadedASPPNet(mp.param["bins"] * 2)
cpk = torch.load(model_path, map_location="cpu")
model.load_state_dict(cpk)
@@ -195,7 +195,7 @@ class _audio_pre_new:
"agg": agg,
"high_end_process": "mirroring",
}
mp = ModelParameters("uvr5_pack/lib_v5/modelparams/4band_v3.json")
mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v3.json")
nout = 64 if "DeReverb" in model_path else 48
model = CascadedNet(mp.param["bins"] * 2, nout)
cpk = torch.load(model_path, map_location="cpu")

View File

@@ -5,9 +5,9 @@ import torch
from torch import nn
from torch.nn import functional as F
from infer_pack import commons
from infer_pack import modules
from infer_pack.modules import LayerNorm
from lib.infer_pack import commons
from lib.infer_pack import modules
from lib.infer_pack.modules import LayerNorm
class Encoder(nn.Module):

View File

@@ -3,15 +3,15 @@ from time import time as ttime
import torch
from torch import nn
from torch.nn import functional as F
from infer_pack import modules
from infer_pack import attentions
from infer_pack import commons
from infer_pack.commons import init_weights, get_padding
from lib.infer_pack import modules
from lib.infer_pack import attentions
from lib.infer_pack import commons
from lib.infer_pack.commons import init_weights, get_padding
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from infer_pack.commons import init_weights
from lib.infer_pack.commons import init_weights
import numpy as np
from infer_pack import commons
from lib.infer_pack import commons
class TextEncoder256(nn.Module):
@@ -631,12 +631,17 @@ class SynthesizerTrnMs256NSFsid(nn.Module):
o = self.dec(z_slice, pitchf, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
def infer(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None):
def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
if rate:
head = int(z_p.shape[2] * rate)
z_p = z_p[:, :, -head:]
x_mask = x_mask[:, :, -head:]
nsff0 = nsff0[:, -head:]
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
o = self.dec(z * x_mask, nsff0, g=g)
return o, x_mask, (z, z_p, m_p, logs_p)
@@ -742,12 +747,17 @@ class SynthesizerTrnMs768NSFsid(nn.Module):
o = self.dec(z_slice, pitchf, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
def infer(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None):
def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
if rate:
head = int(z_p.shape[2] * rate)
z_p = z_p[:, :, -head:]
x_mask = x_mask[:, :, -head:]
nsff0 = nsff0[:, -head:]
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
o = self.dec(z * x_mask, nsff0, g=g)
return o, x_mask, (z, z_p, m_p, logs_p)
@@ -844,12 +854,16 @@ class SynthesizerTrnMs256NSFsid_nono(nn.Module):
o = self.dec(z_slice, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
def infer(self, phone, phone_lengths, sid, max_len=None):
def infer(self, phone, phone_lengths, sid, rate=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
if rate:
head = int(z_p.shape[2] * rate)
z_p = z_p[:, :, -head:]
x_mask = x_mask[:, :, -head:]
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], g=g)
o = self.dec(z * x_mask, g=g)
return o, x_mask, (z, z_p, m_p, logs_p)
@@ -946,12 +960,16 @@ class SynthesizerTrnMs768NSFsid_nono(nn.Module):
o = self.dec(z_slice, g=g)
return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q)
def infer(self, phone, phone_lengths, sid, max_len=None):
def infer(self, phone, phone_lengths, sid, rate=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
if rate:
head = int(z_p.shape[2] * rate)
z_p = z_p[:, :, -head:]
x_mask = x_mask[:, :, -head:]
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], g=g)
o = self.dec(z * x_mask, g=g)
return o, x_mask, (z, z_p, m_p, logs_p)

1124
lib/infer_pack/models_dml.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -3,15 +3,15 @@ from time import time as ttime
import torch
from torch import nn
from torch.nn import functional as F
from infer_pack import modules
from infer_pack import attentions
from infer_pack import commons
from infer_pack.commons import init_weights, get_padding
from lib.infer_pack import modules
from lib.infer_pack import attentions
from lib.infer_pack import commons
from lib.infer_pack.commons import init_weights, get_padding
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from infer_pack.commons import init_weights
from lib.infer_pack.commons import init_weights
import numpy as np
from infer_pack import commons
from lib.infer_pack import commons
class TextEncoder256(nn.Module):

View File

@@ -9,9 +9,9 @@ from torch.nn import functional as F
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm
from infer_pack import commons
from infer_pack.commons import init_weights, get_padding
from infer_pack.transforms import piecewise_rational_quadratic_transform
from lib.infer_pack import commons
from lib.infer_pack.commons import init_weights, get_padding
from lib.infer_pack.transforms import piecewise_rational_quadratic_transform
LRELU_SLOPE = 0.1

View File

@@ -1,4 +1,4 @@
from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
import pyworld
import numpy as np

View File

@@ -1,4 +1,4 @@
from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
import pyworld
import numpy as np

View File

@@ -1,4 +1,4 @@
from infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
from lib.infer_pack.modules.F0Predictor.F0Predictor import F0Predictor
import parselmouth
import numpy as np

View File

@@ -33,19 +33,21 @@ class ContentVec:
def get_f0_predictor(f0_predictor, hop_length, sampling_rate, **kargs):
if f0_predictor == "pm":
from infer_pack.modules.F0Predictor.PMF0Predictor import PMF0Predictor
from lib.infer_pack.modules.F0Predictor.PMF0Predictor import PMF0Predictor
f0_predictor_object = PMF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "harvest":
from infer_pack.modules.F0Predictor.HarvestF0Predictor import HarvestF0Predictor
from lib.infer_pack.modules.F0Predictor.HarvestF0Predictor import (
HarvestF0Predictor,
)
f0_predictor_object = HarvestF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate
)
elif f0_predictor == "dio":
from infer_pack.modules.F0Predictor.DioF0Predictor import DioF0Predictor
from lib.infer_pack.modules.F0Predictor.DioF0Predictor import DioF0Predictor
f0_predictor_object = DioF0Predictor(
hop_length=hop_length, sampling_rate=sampling_rate

View File

@@ -6,7 +6,7 @@ import torch
import torch.utils.data
from tqdm import tqdm
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class VocalRemoverValidationSet(torch.utils.data.Dataset):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import spec_utils
from . import spec_utils
class Conv2DBNActiv(nn.Module):

View File

@@ -2,8 +2,8 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers
from uvr5_pack.lib_v5 import spec_utils
import layers
from . import spec_utils
class BaseASPPNet(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_123821KB as layers
from . import layers_123821KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_123821KB as layers
from . import layers_123821KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_33966KB as layers
from . import layers_33966KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -3,7 +3,7 @@ import numpy as np
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_537238KB as layers
from . import layers_537238KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -3,7 +3,7 @@ import numpy as np
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_537238KB as layers
from . import layers_537238KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -2,7 +2,7 @@ import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_123821KB as layers
from . import layers_123821KB as layers
class BaseASPPNet(nn.Module):

View File

@@ -1,7 +1,7 @@
import torch
from torch import nn
import torch.nn.functional as F
from uvr5_pack.lib_v5 import layers_new as layers
from . import layers_new
class BaseNet(nn.Module):
@@ -9,19 +9,19 @@ class BaseNet(nn.Module):
self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))
):
super(BaseNet, self).__init__()
self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1)
self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1)
self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1)
self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1)
self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1)
self.enc1 = layers_new.Conv2DBNActiv(nin, nout, 3, 1, 1)
self.enc2 = layers_new.Encoder(nout, nout * 2, 3, 2, 1)
self.enc3 = layers_new.Encoder(nout * 2, nout * 4, 3, 2, 1)
self.enc4 = layers_new.Encoder(nout * 4, nout * 6, 3, 2, 1)
self.enc5 = layers_new.Encoder(nout * 6, nout * 8, 3, 2, 1)
self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
self.aspp = layers_new.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm)
self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
self.dec4 = layers_new.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
self.dec3 = layers_new.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
self.dec2 = layers_new.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
self.lstm_dec2 = layers_new.LSTMModule(nout * 2, nin_lstm, nout_lstm)
self.dec1 = layers_new.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
def __call__(self, x):
e1 = self.enc1(x)
@@ -52,7 +52,7 @@ class CascadedNet(nn.Module):
self.stg1_low_band_net = nn.Sequential(
BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
layers_new.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
)
self.stg1_high_band_net = BaseNet(
@@ -61,7 +61,7 @@ class CascadedNet(nn.Module):
self.stg2_low_band_net = nn.Sequential(
BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
layers_new.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
)
self.stg2_high_band_net = BaseNet(
nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2

View File

@@ -4,92 +4,92 @@
"model_hash_name" : [
{
"hash_name": "47939caf0cfe52a0e81442b85b971dfd",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "4e4ecb9764c50a8c414fee6e10395bbe",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2.json",
"param_name": "4band_v2"
},
{
"hash_name": "ca106edd563e034bde0bdec4bb7a4b36",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2.json",
"param_name": "4band_v2"
},
{
"hash_name": "e60a1e84803ce4efc0a6551206cc4b71",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "a82f14e75892e55e994376edbf0c8435",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "6dd9eaa6f0420af9f1d403aaafa4cc06",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"param_name": "4band_v2_sn"
},
{
"hash_name": "08611fb99bd59eaa79ad27c58d137727",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"param_name": "4band_v2_sn"
},
{
"hash_name": "5c7bbca45a187e81abbbd351606164e5",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"param_name": "3band_44100_msb2"
},
{
"hash_name": "d6b2cb685a058a091e5e7098192d3233",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"param_name": "3band_44100_msb2"
},
{
"hash_name": "c1b9f38170a7c90e96f027992eb7c62b",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "c3448ec923fa0edf3d03a19e633faa53",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "68aa2c8093d0080704b200d140f59e54",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100.json",
"param_name": "3band_44100"
},
{
"hash_name": "fdc83be5b798e4bd29fe00fe6600e147",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"param_name": "3band_44100_mid.json"
},
{
"hash_name": "2ce34bc92fd57f55db16b7a4def3d745",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"param_name": "3band_44100_mid.json"
},
{
"hash_name": "52fdca89576f06cf4340b74a4730ee5f",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100.json"
},
{
"hash_name": "41191165b05d38fc77f072fa9e8e8a30",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100.json"
},
{
"hash_name": "89e83b511ad474592689e562d5b1f80e",
"model_params": "uvr5_pack/lib_v5/modelparams/2band_32000.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/2band_32000.json",
"param_name": "2band_32000.json"
},
{
"hash_name": "0b954da81d453b716b114d6d7c95177f",
"model_params": "uvr5_pack/lib_v5/modelparams/2band_32000.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/2band_32000.json",
"param_name": "2band_32000.json"
}
@@ -97,47 +97,47 @@
"v4 Models": [
{
"hash_name": "6a00461c51c2920fd68937d4609ed6c8",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json",
"param_name": "1band_sr16000_hl512"
},
{
"hash_name": "0ab504864d20f1bd378fe9c81ef37140",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"param_name": "1band_sr32000_hl512"
},
{
"hash_name": "7dd21065bf91c10f7fccb57d7d83b07f",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"param_name": "1band_sr32000_hl512"
},
{
"hash_name": "80ab74d65e515caa3622728d2de07d23",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"param_name": "1band_sr32000_hl512"
},
{
"hash_name": "edc115e7fc523245062200c00caa847f",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"param_name": "1band_sr33075_hl384"
},
{
"hash_name": "28063e9f6ab5b341c5f6d3c67f2045b7",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"param_name": "1band_sr33075_hl384"
},
{
"hash_name": "b58090534c52cbc3e9b5104bad666ef2",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"param_name": "1band_sr44100_hl512"
},
{
"hash_name": "0cdab9947f1b0928705f518f3c78ea8f",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"param_name": "1band_sr44100_hl512"
},
{
"hash_name": "ae702fed0238afb5346db8356fe25f13",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json",
"param_name": "1band_sr44100_hl1024"
}
]
@@ -148,113 +148,113 @@
"1 Band": [
{
"hash_name": "1band_sr16000_hl512",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json",
"param_name": "1band_sr16000_hl512"
},
{
"hash_name": "1band_sr32000_hl512",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json",
"param_name": "1band_sr16000_hl512"
},
{
"hash_name": "1band_sr33075_hl384",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json",
"param_name": "1band_sr33075_hl384"
},
{
"hash_name": "1band_sr44100_hl256",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json",
"param_name": "1band_sr44100_hl256"
},
{
"hash_name": "1band_sr44100_hl512",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json",
"param_name": "1band_sr44100_hl512"
},
{
"hash_name": "1band_sr44100_hl1024",
"model_params": "uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json",
"param_name": "1band_sr44100_hl1024"
}
],
"2 Band": [
{
"hash_name": "2band_44100_lofi",
"model_params": "uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json",
"param_name": "2band_44100_lofi"
},
{
"hash_name": "2band_32000",
"model_params": "uvr5_pack/lib_v5/modelparams/2band_32000.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/2band_32000.json",
"param_name": "2band_32000"
},
{
"hash_name": "2band_48000",
"model_params": "uvr5_pack/lib_v5/modelparams/2band_48000.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/2band_48000.json",
"param_name": "2band_48000"
}
],
"3 Band": [
{
"hash_name": "3band_44100",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100.json",
"param_name": "3band_44100"
},
{
"hash_name": "3band_44100_mid",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json",
"param_name": "3band_44100_mid"
},
{
"hash_name": "3band_44100_msb2",
"model_params": "uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json",
"param_name": "3band_44100_msb2"
}
],
"4 Band": [
{
"hash_name": "4band_44100",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100.json",
"param_name": "4band_44100"
},
{
"hash_name": "4band_44100_mid",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100_mid.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100_mid.json",
"param_name": "4band_44100_mid"
},
{
"hash_name": "4band_44100_msb",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100_msb.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb.json",
"param_name": "4band_44100_msb"
},
{
"hash_name": "4band_44100_msb2",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json",
"param_name": "4band_44100_msb2"
},
{
"hash_name": "4band_44100_reverse",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json",
"param_name": "4band_44100_reverse"
},
{
"hash_name": "4band_44100_sw",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_44100_sw.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_44100_sw.json",
"param_name": "4band_44100_sw"
},
{
"hash_name": "4band_v2",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2.json",
"param_name": "4band_v2"
},
{
"hash_name": "4band_v2_sn",
"model_params": "uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json",
"param_name": "4band_v2_sn"
},
{
"hash_name": "tmodelparam",
"model_params": "uvr5_pack/lib_v5/modelparams/tmodelparam.json",
"model_params": "lib/uvr5_pack/lib_v5/modelparams/tmodelparam.json",
"param_name": "User Model Param Set"
}
]

View File

@@ -4,7 +4,7 @@ from tqdm import tqdm
import json
def load_data(file_name: str = "./uvr5_pack/name_params.json") -> dict:
def load_data(file_name: str = "./lib/uvr5_pack/name_params.json") -> dict:
with open(file_name, "r") as f:
data = json.load(f)

213
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -10,7 +10,7 @@ python = "^3.8"
torch = "^2.0.0"
torchaudio = "^2.0.1"
Cython = "^0.29.34"
gradio = "^3.24.1"
gradio = "^3.34.0"
future = "^0.18.3"
pydub = "^0.25.1"
soundfile = "^0.12.1"
@@ -53,6 +53,7 @@ absl-py = "^1.4.0"
audioread = "^3.0.0"
uvicorn = "^0.21.1"
colorama = "^0.4.6"
torchcrepe = "0.0.20"
[tool.poetry.dev-dependencies]

View File

@@ -21,7 +21,6 @@ praat-parselmouth>=0.4.2
Pillow>=9.1.1
resampy>=0.4.2
scikit-learn
starlette>=0.25.0
tensorboard
tensorboard-data-server
tensorboard-plugin-wit
@@ -43,5 +42,6 @@ uvicorn>=0.21.1
colorama>=0.4.5
pyworld>=0.3.2
httpx==0.23.0
onnxruntime-gpu
#onnxruntime-gpu
torchcrepe==0.0.20
fastapi==0.88

432
rmvpe.py Normal file
View File

@@ -0,0 +1,432 @@
import sys, torch, numpy as np, traceback, pdb
import torch.nn as nn
from time import time as ttime
import torch.nn.functional as F
class BiGRU(nn.Module):
def __init__(self, input_features, hidden_features, num_layers):
super(BiGRU, self).__init__()
self.gru = nn.GRU(
input_features,
hidden_features,
num_layers=num_layers,
batch_first=True,
bidirectional=True,
)
def forward(self, x):
return self.gru(x)[0]
class ConvBlockRes(nn.Module):
def __init__(self, in_channels, out_channels, momentum=0.01):
super(ConvBlockRes, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(3, 3),
stride=(1, 1),
padding=(1, 1),
bias=False,
),
nn.BatchNorm2d(out_channels, momentum=momentum),
nn.ReLU(),
nn.Conv2d(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=(3, 3),
stride=(1, 1),
padding=(1, 1),
bias=False,
),
nn.BatchNorm2d(out_channels, momentum=momentum),
nn.ReLU(),
)
if in_channels != out_channels:
self.shortcut = nn.Conv2d(in_channels, out_channels, (1, 1))
self.is_shortcut = True
else:
self.is_shortcut = False
def forward(self, x):
if self.is_shortcut:
return self.conv(x) + self.shortcut(x)
else:
return self.conv(x) + x
class Encoder(nn.Module):
def __init__(
self,
in_channels,
in_size,
n_encoders,
kernel_size,
n_blocks,
out_channels=16,
momentum=0.01,
):
super(Encoder, self).__init__()
self.n_encoders = n_encoders
self.bn = nn.BatchNorm2d(in_channels, momentum=momentum)
self.layers = nn.ModuleList()
self.latent_channels = []
for i in range(self.n_encoders):
self.layers.append(
ResEncoderBlock(
in_channels, out_channels, kernel_size, n_blocks, momentum=momentum
)
)
self.latent_channels.append([out_channels, in_size])
in_channels = out_channels
out_channels *= 2
in_size //= 2
self.out_size = in_size
self.out_channel = out_channels
def forward(self, x):
concat_tensors = []
x = self.bn(x)
for i in range(self.n_encoders):
_, x = self.layers[i](x)
concat_tensors.append(_)
return x, concat_tensors
class ResEncoderBlock(nn.Module):
def __init__(
self, in_channels, out_channels, kernel_size, n_blocks=1, momentum=0.01
):
super(ResEncoderBlock, self).__init__()
self.n_blocks = n_blocks
self.conv = nn.ModuleList()
self.conv.append(ConvBlockRes(in_channels, out_channels, momentum))
for i in range(n_blocks - 1):
self.conv.append(ConvBlockRes(out_channels, out_channels, momentum))
self.kernel_size = kernel_size
if self.kernel_size is not None:
self.pool = nn.AvgPool2d(kernel_size=kernel_size)
def forward(self, x):
for i in range(self.n_blocks):
x = self.conv[i](x)
if self.kernel_size is not None:
return x, self.pool(x)
else:
return x
class Intermediate(nn.Module): #
def __init__(self, in_channels, out_channels, n_inters, n_blocks, momentum=0.01):
super(Intermediate, self).__init__()
self.n_inters = n_inters
self.layers = nn.ModuleList()
self.layers.append(
ResEncoderBlock(in_channels, out_channels, None, n_blocks, momentum)
)
for i in range(self.n_inters - 1):
self.layers.append(
ResEncoderBlock(out_channels, out_channels, None, n_blocks, momentum)
)
def forward(self, x):
for i in range(self.n_inters):
x = self.layers[i](x)
return x
class ResDecoderBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride, n_blocks=1, momentum=0.01):
super(ResDecoderBlock, self).__init__()
out_padding = (0, 1) if stride == (1, 2) else (1, 1)
self.n_blocks = n_blocks
self.conv1 = nn.Sequential(
nn.ConvTranspose2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=(3, 3),
stride=stride,
padding=(1, 1),
output_padding=out_padding,
bias=False,
),
nn.BatchNorm2d(out_channels, momentum=momentum),
nn.ReLU(),
)
self.conv2 = nn.ModuleList()
self.conv2.append(ConvBlockRes(out_channels * 2, out_channels, momentum))
for i in range(n_blocks - 1):
self.conv2.append(ConvBlockRes(out_channels, out_channels, momentum))
def forward(self, x, concat_tensor):
x = self.conv1(x)
x = torch.cat((x, concat_tensor), dim=1)
for i in range(self.n_blocks):
x = self.conv2[i](x)
return x
class Decoder(nn.Module):
def __init__(self, in_channels, n_decoders, stride, n_blocks, momentum=0.01):
super(Decoder, self).__init__()
self.layers = nn.ModuleList()
self.n_decoders = n_decoders
for i in range(self.n_decoders):
out_channels = in_channels // 2
self.layers.append(
ResDecoderBlock(in_channels, out_channels, stride, n_blocks, momentum)
)
in_channels = out_channels
def forward(self, x, concat_tensors):
for i in range(self.n_decoders):
x = self.layers[i](x, concat_tensors[-1 - i])
return x
class DeepUnet(nn.Module):
def __init__(
self,
kernel_size,
n_blocks,
en_de_layers=5,
inter_layers=4,
in_channels=1,
en_out_channels=16,
):
super(DeepUnet, self).__init__()
self.encoder = Encoder(
in_channels, 128, en_de_layers, kernel_size, n_blocks, en_out_channels
)
self.intermediate = Intermediate(
self.encoder.out_channel // 2,
self.encoder.out_channel,
inter_layers,
n_blocks,
)
self.decoder = Decoder(
self.encoder.out_channel, en_de_layers, kernel_size, n_blocks
)
def forward(self, x):
x, concat_tensors = self.encoder(x)
x = self.intermediate(x)
x = self.decoder(x, concat_tensors)
return x
class E2E(nn.Module):
def __init__(
self,
n_blocks,
n_gru,
kernel_size,
en_de_layers=5,
inter_layers=4,
in_channels=1,
en_out_channels=16,
):
super(E2E, self).__init__()
self.unet = DeepUnet(
kernel_size,
n_blocks,
en_de_layers,
inter_layers,
in_channels,
en_out_channels,
)
self.cnn = nn.Conv2d(en_out_channels, 3, (3, 3), padding=(1, 1))
if n_gru:
self.fc = nn.Sequential(
BiGRU(3 * 128, 256, n_gru),
nn.Linear(512, 360),
nn.Dropout(0.25),
nn.Sigmoid(),
)
else:
self.fc = nn.Sequential(
nn.Linear(3 * N_MELS, N_CLASS), nn.Dropout(0.25), nn.Sigmoid()
)
def forward(self, mel):
mel = mel.transpose(-1, -2).unsqueeze(1)
x = self.cnn(self.unet(mel)).transpose(1, 2).flatten(-2)
x = self.fc(x)
return x
from librosa.filters import mel
class MelSpectrogram(torch.nn.Module):
def __init__(
self,
is_half,
n_mel_channels,
sampling_rate,
win_length,
hop_length,
n_fft=None,
mel_fmin=0,
mel_fmax=None,
clamp=1e-5,
):
super().__init__()
n_fft = win_length if n_fft is None else n_fft
self.hann_window = {}
mel_basis = mel(
sr=sampling_rate,
n_fft=n_fft,
n_mels=n_mel_channels,
fmin=mel_fmin,
fmax=mel_fmax,
htk=True,
)
mel_basis = torch.from_numpy(mel_basis).float()
self.register_buffer("mel_basis", mel_basis)
self.n_fft = win_length if n_fft is None else n_fft
self.hop_length = hop_length
self.win_length = win_length
self.sampling_rate = sampling_rate
self.n_mel_channels = n_mel_channels
self.clamp = clamp
self.is_half = is_half
def forward(self, audio, keyshift=0, speed=1, center=True):
factor = 2 ** (keyshift / 12)
n_fft_new = int(np.round(self.n_fft * factor))
win_length_new = int(np.round(self.win_length * factor))
hop_length_new = int(np.round(self.hop_length * speed))
keyshift_key = str(keyshift) + "_" + str(audio.device)
if keyshift_key not in self.hann_window:
self.hann_window[keyshift_key] = torch.hann_window(win_length_new).to(
audio.device
)
fft = torch.stft(
audio,
n_fft=n_fft_new,
hop_length=hop_length_new,
win_length=win_length_new,
window=self.hann_window[keyshift_key],
center=center,
return_complex=True,
)
magnitude = torch.sqrt(fft.real.pow(2) + fft.imag.pow(2))
if keyshift != 0:
size = self.n_fft // 2 + 1
resize = magnitude.size(1)
if resize < size:
magnitude = F.pad(magnitude, (0, 0, 0, size - resize))
magnitude = magnitude[:, :size, :] * self.win_length / win_length_new
mel_output = torch.matmul(self.mel_basis, magnitude)
if self.is_half == True:
mel_output = mel_output.half()
log_mel_spec = torch.log(torch.clamp(mel_output, min=self.clamp))
return log_mel_spec
class RMVPE:
def __init__(self, model_path, is_half, device=None):
self.resample_kernel = {}
model = E2E(4, 1, (2, 2))
ckpt = torch.load(model_path, map_location="cpu")
model.load_state_dict(ckpt)
model.eval()
if is_half == True:
model = model.half()
self.model = model
self.resample_kernel = {}
self.is_half = is_half
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
self.device = device
self.mel_extractor = MelSpectrogram(
is_half, 128, 16000, 1024, 160, None, 30, 8000
).to(device)
self.model = self.model.to(device)
cents_mapping = 20 * np.arange(360) + 1997.3794084376191
self.cents_mapping = np.pad(cents_mapping, (4, 4)) # 368
def mel2hidden(self, mel):
with torch.no_grad():
n_frames = mel.shape[-1]
mel = F.pad(
mel, (0, 32 * ((n_frames - 1) // 32 + 1) - n_frames), mode="reflect"
)
hidden = self.model(mel)
return hidden[:, :n_frames]
def decode(self, hidden, thred=0.03):
cents_pred = self.to_local_average_cents(hidden, thred=thred)
f0 = 10 * (2 ** (cents_pred / 1200))
f0[f0 == 10] = 0
# f0 = np.array([10 * (2 ** (cent_pred / 1200)) if cent_pred else 0 for cent_pred in cents_pred])
return f0
def infer_from_audio(self, audio, thred=0.03):
audio = torch.from_numpy(audio).float().to(self.device).unsqueeze(0)
# torch.cuda.synchronize()
# t0=ttime()
mel = self.mel_extractor(audio, center=True)
# torch.cuda.synchronize()
# t1=ttime()
hidden = self.mel2hidden(mel)
# torch.cuda.synchronize()
# t2=ttime()
hidden = hidden.squeeze(0).cpu().numpy()
if self.is_half == True:
hidden = hidden.astype("float32")
f0 = self.decode(hidden, thred=thred)
# torch.cuda.synchronize()
# t3=ttime()
# print("hmvpe:%s\t%s\t%s\t%s"%(t1-t0,t2-t1,t3-t2,t3-t0))
return f0
def to_local_average_cents(self, salience, thred=0.05):
# t0 = ttime()
center = np.argmax(salience, axis=1) # 帧长#index
salience = np.pad(salience, ((0, 0), (4, 4))) # 帧长,368
# t1 = ttime()
center += 4
todo_salience = []
todo_cents_mapping = []
starts = center - 4
ends = center + 5
for idx in range(salience.shape[0]):
todo_salience.append(salience[:, starts[idx] : ends[idx]][idx])
todo_cents_mapping.append(self.cents_mapping[starts[idx] : ends[idx]])
# t2 = ttime()
todo_salience = np.array(todo_salience) # 帧长9
todo_cents_mapping = np.array(todo_cents_mapping) # 帧长9
product_sum = np.sum(todo_salience * todo_cents_mapping, 1)
weight_sum = np.sum(todo_salience, 1) # 帧长
devided = product_sum / weight_sum # 帧长
# t3 = ttime()
maxx = np.max(salience, axis=1) # 帧长
devided[maxx <= thred] = 0
# t4 = ttime()
# print("decode:%s\t%s\t%s\t%s" % (t1 - t0, t2 - t1, t3 - t2, t4 - t3))
return devided
# if __name__ == '__main__':
# audio, sampling_rate = sf.read("卢本伟语录~1.wav")
# if len(audio.shape) > 1:
# audio = librosa.to_mono(audio.transpose(1, 0))
# audio_bak = audio.copy()
# if sampling_rate != 16000:
# audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
# model_path = "/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/test-RMVPE/weights/rmvpe_llc_half.pt"
# thred = 0.03 # 0.01
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# rmvpe = RMVPE(model_path,is_half=False, device=device)
# t0=ttime()
# f0 = rmvpe.infer_from_audio(audio, thred=thred)
# f0 = rmvpe.infer_from_audio(audio, thred=thred)
# f0 = rmvpe.infer_from_audio(audio, thred=thred)
# f0 = rmvpe.infer_from_audio(audio, thred=thred)
# f0 = rmvpe.infer_from_audio(audio, thred=thred)
# t1=ttime()
# print(f0.shape,t1-t0)

45
run.sh Normal file
View File

@@ -0,0 +1,45 @@
#!/bin/bash
if [[ "$(uname)" == "Darwin" ]]; then
# macOS specific env:
export PYTORCH_ENABLE_MPS_FALLBACK=1
export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
elif [[ "$(uname)" != "Linux" ]]; then
echo "Unsupported operating system."
exit 1
fi
requirements_file="requirements.txt"
# Check if Python 3.8 is installed
if ! command -v python3.8 &> /dev/null; then
echo "Python 3.8 not found. Attempting to install..."
if [[ "$(uname)" == "Darwin" ]] && command -v brew &> /dev/null; then
brew install python@3.8
elif [[ "$(uname)" == "Linux" ]] && command -v apt-get &> /dev/null; then
sudo apt-get update
sudo apt-get install python3.8
else
echo "Please install Python 3.8 manually."
exit 1
fi
fi
# Check if required packages are installed and install them if not
if [ -f "${requirements_file}" ]; then
installed_packages=$(python3.8 -m pip freeze)
while IFS= read -r package; do
[[ "${package}" =~ ^#.* ]] && continue
package_name=$(echo "${package}" | sed 's/[<>=!].*//')
if ! echo "${installed_packages}" | grep -q "${package_name}"; then
echo "${package_name} not found. Attempting to install..."
python3.8 -m pip install --upgrade "${package}"
fi
done < "${requirements_file}"
else
echo "${requirements_file} not found. Please ensure the requirements file with required packages exists."
exit 1
fi
# Run the main script
python3.8 infer-web.py --pycmd python3.8

297
rvc_for_realtime.py Normal file
View File

@@ -0,0 +1,297 @@
import faiss, torch, traceback, parselmouth, numpy as np, torchcrepe, torch.nn as nn, pyworld
from fairseq import checkpoint_utils
from lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
import os, sys
from time import time as ttime
import torch.nn.functional as F
import scipy.signal as signal
now_dir = os.getcwd()
sys.path.append(now_dir)
from config import Config
from multiprocessing import Manager as M
mm = M()
config = Config()
class RVC:
def __init__(
self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device
) -> None:
"""
初始化
"""
try:
global config
self.inp_q = inp_q
self.opt_q = opt_q
self.device = device
self.f0_up_key = key
self.time_step = 160 / 16000 * 1000
self.f0_min = 50
self.f0_max = 1100
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
self.sr = 16000
self.window = 160
self.n_cpu = n_cpu
if index_rate != 0:
self.index = faiss.read_index(index_path)
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
print("index search enabled")
self.index_rate = index_rate
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
["hubert_base.pt"],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(config.device)
if config.is_half:
hubert_model = hubert_model.half()
else:
hubert_model = hubert_model.float()
hubert_model.eval()
self.model = hubert_model
cpt = torch.load(pth_path, map_location="cpu")
self.tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
self.if_f0 = cpt.get("f0", 1)
self.version = cpt.get("version", "v1")
if self.version == "v1":
if self.if_f0 == 1:
self.net_g = SynthesizerTrnMs256NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
elif self.version == "v2":
if self.if_f0 == 1:
self.net_g = SynthesizerTrnMs768NSFsid(
*cpt["config"], is_half=config.is_half
)
else:
self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
del self.net_g.enc_q
print(self.net_g.load_state_dict(cpt["weight"], strict=False))
self.net_g.eval().to(device)
if config.is_half:
self.net_g = self.net_g.half()
else:
self.net_g = self.net_g.float()
self.is_half = config.is_half
except:
print(traceback.format_exc())
def get_f0_post(self, f0):
f0_min = self.f0_min
f0_max = self.f0_max
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
f0_mel_max - f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = np.rint(f0_mel).astype(np.int)
return f0_coarse, f0bak
def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
n_cpu = int(n_cpu)
if method == "crepe":
return self.get_f0_crepe(x, f0_up_key)
if method == "rmvpe":
return self.get_f0_rmvpe(x, f0_up_key)
if method == "pm":
p_len = x.shape[0] // 160
f0 = (
parselmouth.Sound(x, 16000)
.to_pitch_ac(
time_step=0.01,
voicing_threshold=0.6,
pitch_floor=50,
pitch_ceiling=1100,
)
.selected_array["frequency"]
)
pad_size = (p_len - len(f0) + 1) // 2
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
print(pad_size, p_len - len(f0) - pad_size)
f0 = np.pad(
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
if n_cpu == 1:
f0, t = pyworld.harvest(
x.astype(np.double),
fs=16000,
f0_ceil=1100,
f0_floor=50,
frame_period=10,
)
f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
f0bak = np.zeros(x.shape[0] // 160, dtype=np.float64)
length = len(x)
part_length = int(length / n_cpu / 160) * 160
ts = ttime()
res_f0 = mm.dict()
for idx in range(n_cpu):
tail = part_length * (idx + 1) + 320
if idx == 0:
self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
else:
self.inp_q.put(
(idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
)
while 1:
res_ts = self.opt_q.get()
if res_ts == ts:
break
f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
for idx, f0 in enumerate(f0s):
if idx == 0:
f0 = f0[:-3]
elif idx != n_cpu - 1:
f0 = f0[2:-3]
else:
f0 = f0[2:-1]
f0bak[
part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
] = f0
f0bak = signal.medfilt(f0bak, 3)
f0bak *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0bak)
def get_f0_crepe(self, x, f0_up_key):
audio = torch.tensor(np.copy(x))[None].float()
f0, pd = torchcrepe.predict(
audio,
self.sr,
160,
self.f0_min,
self.f0_max,
"full",
batch_size=512,
device=self.device,
return_periodicity=True,
)
pd = torchcrepe.filter.median(pd, 3)
f0 = torchcrepe.filter.mean(f0, 3)
f0[pd < 0.1] = 0
f0 = f0[0].cpu().numpy()
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
def get_f0_rmvpe(self, x, f0_up_key):
if hasattr(self, "model_rmvpe") == False:
from rmvpe import RMVPE
print("loading rmvpe model")
self.model_rmvpe = RMVPE(
"rmvpe.pt", is_half=self.is_half, device=self.device
)
# self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
def infer(
self,
feats: torch.Tensor,
indata: np.ndarray,
rate1,
rate2,
cache_pitch,
cache_pitchf,
f0method,
) -> np.ndarray:
feats = feats.view(1, -1)
if config.is_half:
feats = feats.half()
else:
feats = feats.float()
feats = feats.to(self.device)
t1 = ttime()
with torch.no_grad():
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
inputs = {
"source": feats,
"padding_mask": padding_mask,
"output_layer": 9 if self.version == "v1" else 12,
}
logits = self.model.extract_features(**inputs)
feats = (
self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
)
t2 = ttime()
try:
if hasattr(self, "index") and self.index_rate != 0:
leng_replace_head = int(rate1 * feats[0].shape[0])
npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32")
score, ix = self.index.search(npy, k=8)
weight = np.square(1 / score)
weight /= weight.sum(axis=1, keepdims=True)
npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
if config.is_half:
npy = npy.astype("float16")
feats[0][-leng_replace_head:] = (
torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate
+ (1 - self.index_rate) * feats[0][-leng_replace_head:]
)
else:
print("index search FAIL or disabled")
except:
traceback.print_exc()
print("index search FAIL")
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
t3 = ttime()
if self.if_f0 == 1:
pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method)
cache_pitch[:] = np.append(cache_pitch[pitch[:-1].shape[0] :], pitch[:-1])
cache_pitchf[:] = np.append(
cache_pitchf[pitchf[:-1].shape[0] :], pitchf[:-1]
)
p_len = min(feats.shape[1], 13000, cache_pitch.shape[0])
else:
cache_pitch, cache_pitchf = None, None
p_len = min(feats.shape[1], 13000)
t4 = ttime()
feats = feats[:, :p_len, :]
if self.if_f0 == 1:
cache_pitch = cache_pitch[:p_len]
cache_pitchf = cache_pitchf[:p_len]
cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device)
cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device)
p_len = torch.LongTensor([p_len]).to(self.device)
ii = 0 # sid
sid = torch.LongTensor([ii]).to(self.device)
with torch.no_grad():
if self.if_f0 == 1:
infered_audio = (
self.net_g.infer(
feats, p_len, cache_pitch, cache_pitchf, sid, rate2
)[0][0, 0]
.data.cpu()
.float()
)
else:
infered_audio = (
self.net_g.infer(feats, p_len, sid, rate2)[0][0, 0]
.data.cpu()
.float()
)
t5 = ttime()
print("time->fea-index-f0-model:", t2 - t1, t3 - t2, t4 - t3, t5 - t4)
return infered_audio

546
tools/dlmodels.sh Executable file
View File

@@ -0,0 +1,546 @@
#!/bin/bash
echo working dir is $(pwd)
echo downloading requirement aria2 check.
if command -v aria2c &> /dev/null
then
echo "aria2c command found"
else
echo failed. please install aria2
sleep 5
exit 1
fi
d32="f0D32k.pth"
d40="f0D40k.pth"
d48="f0D48k.pth"
g32="f0G32k.pth"
g40="f0G40k.pth"
g48="f0G48k.pth"
d40v2="f0D40k.pth"
g40v2="f0G40k.pth"
dld32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth"
dld40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth"
dld48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth"
dlg32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth"
dlg40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth"
dlg48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth"
dld40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth"
dlg40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth"
hp2_all="HP2_all_vocals.pth"
hp3_all="HP3_all_vocals.pth"
hp5_only="HP5_only_main_vocal.pth"
VR_DeEchoAggressive="VR-DeEchoAggressive.pth"
VR_DeEchoDeReverb="VR-DeEchoDeReverb.pth"
VR_DeEchoNormal="VR-DeEchoNormal.pth"
onnx_dereverb="vocals.onnx"
dlhp2_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth"
dlhp3_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth"
dlhp5_only="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth"
dlVR_DeEchoAggressive="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth"
dlVR_DeEchoDeReverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth"
dlVR_DeEchoNormal="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth"
dlonnx_dereverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx"
hb="hubert_base.pt"
dlhb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt"
echo dir check start.
if [ -d "./pretrained" ]; then
echo dir ./pretrained checked.
else
echo failed. generating dir ./pretrained.
mkdir pretrained
fi
if [ -d "./pretrained_v2" ]; then
echo dir ./pretrained_v2 checked.
else
echo failed. generating dir ./pretrained_v2.
mkdir pretrained_v2
fi
if [ -d "./uvr5_weights" ]; then
echo dir ./uvr5_weights checked.
else
echo failed. generating dir ./uvr5_weights.
mkdir uvr5_weights
fi
if [ -d "./uvr5_weights/onnx_dereverb_By_FoxJoy" ]; then
echo dir ./uvr5_weights/onnx_dereverb_By_FoxJoy checked.
else
echo failed. generating dir ./uvr5_weights/onnx_dereverb_By_FoxJoy.
mkdir uvr5_weights/onnx_dereverb_By_FoxJoy
fi
echo dir check finished.
echo required files check start.
echo checking D32k.pth
if [ -f "./pretrained/D32k.pth" ]; then
echo D32k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d ./pretrained -o D32k.pth
if [ -f "./pretrained/D32k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking D40k.pth
if [ -f "./pretrained/D40k.pth" ]; then
echo D40k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d ./pretrained -o D40k.pth
if [ -f "./pretrained/D40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking D40k.pth
if [ -f "./pretrained_v2/D40k.pth" ]; then
echo D40k.pth in ./pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d ./pretrained_v2 -o D40k.pth
if [ -f "./pretrained_v2/D40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking D48k.pth
if [ -f "./pretrained/D48k.pth" ]; then
echo D48k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d ./pretrained -o D48k.pth
if [ -f "./pretrained/D48k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G32k.pth
if [ -f "./pretrained/G32k.pth" ]; then
echo G32k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d ./pretrained -o G32k.pth
if [ -f "./pretrained/G32k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G40k.pth
if [ -f "./pretrained/G40k.pth" ]; then
echo G40k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d ./pretrained -o G40k.pth
if [ -f "./pretrained/G40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G40k.pth
if [ -f "./pretrained_v2/G40k.pth" ]; then
echo G40k.pth in ./pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d ./pretrained_v2 -o G40k.pth
if [ -f "./pretrained_v2/G40k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking G48k.pth
if [ -f "./pretrained/G48k.pth" ]; then
echo G48k.pth in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d ./pretrained -o G48k.pth
if [ -f "./pretrained/G48k.pth" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d32
if [ -f "./pretrained/$d32" ]; then
echo $d32 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld32 -d ./pretrained -o $d32
if [ -f "./pretrained/$d32" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d40
if [ -f "./pretrained/$d40" ]; then
echo $d40 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40 -d ./pretrained -o $d40
if [ -f "./pretrained/$d40" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d40v2
if [ -f "./pretrained_v2/$d40v2" ]; then
echo $d40v2 in ./pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40v2 -d ./pretrained_v2 -o $d40v2
if [ -f "./pretrained_v2/$d40v2" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $d48
if [ -f "./pretrained/$d48" ]; then
echo $d48 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld48 -d ./pretrained -o $d48
if [ -f "./pretrained/$d48" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g32
if [ -f "./pretrained/$g32" ]; then
echo $g32 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg32 -d ./pretrained -o $g32
if [ -f "./pretrained/$g32" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g40
if [ -f "./pretrained/$g40" ]; then
echo $g40 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40 -d ./pretrained -o $g40
if [ -f "./pretrained/$g40" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g40v2
if [ -f "./pretrained_v2/$g40v2" ]; then
echo $g40v2 in ./pretrained_v2 checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40v2 -d ./pretrained_v2 -o $g40v2
if [ -f "./pretrained_v2/$g40v2" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $g48
if [ -f "./pretrained/$g48" ]; then
echo $g48 in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg48 -d ./pretrained -o $g48
if [ -f "./pretrained/$g48" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp2_all
if [ -f "./uvr5_weights/$hp2_all" ]; then
echo $hp2_all in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp2_all -d ./uvr5_weights -o $hp2_all
if [ -f "./uvr5_weights/$hp2_all" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp3_all
if [ -f "./uvr5_weights/$hp3_all" ]; then
echo $hp3_all in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp3_all -d ./uvr5_weights -o $hp3_all
if [ -f "./uvr5_weights/$hp3_all" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hp5_only
if [ -f "./uvr5_weights/$hp5_only" ]; then
echo $hp5_only in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp5_only -d ./uvr5_weights -o $hp5_only
if [ -f "./uvr5_weights/$hp5_only" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoAggressive
if [ -f "./uvr5_weights/$VR_DeEchoAggressive" ]; then
echo $VR_DeEchoAggressive in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoAggressive -d ./uvr5_weights -o $VR_DeEchoAggressive
if [ -f "./uvr5_weights/$VR_DeEchoAggressive" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoDeReverb
if [ -f "./uvr5_weights/$VR_DeEchoDeReverb" ]; then
echo $VR_DeEchoDeReverb in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoDeReverb -d ./uvr5_weights -o $VR_DeEchoDeReverb
if [ -f "./uvr5_weights/$VR_DeEchoDeReverb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $VR_DeEchoNormal
if [ -f "./uvr5_weights/$VR_DeEchoNormal" ]; then
echo $VR_DeEchoNormal in ./uvr5_weights checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoNormal -d ./uvr5_weights -o $VR_DeEchoNormal
if [ -f "./uvr5_weights/$VR_DeEchoNormal" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $onnx_dereverb
if [ -f "./uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then
echo $onnx_dereverb in ./uvr5_weights/onnx_dereverb_By_FoxJoy checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlonnx_dereverb -d ./uvr5_weights/onnx_dereverb_By_FoxJoy -o $onnx_dereverb
if [ -f "./uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo checking $hb
if [ -f "./pretrained/$hb" ]; then
echo $hb in ./pretrained checked.
else
echo failed. starting download from huggingface.
if command -v aria2c &> /dev/null; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhb -d ./ -o $hb
if [ -f "./$hb" ]; then
echo download successful.
else
echo please try again!
exit 1
fi
else
echo aria2c command not found. Please install aria2c and try again.
exit 1
fi
fi
echo required files check finished.
read -p "Press any key to continue..." -n1 -s

View File

@@ -1,4 +1,4 @@
from infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
import torch
if __name__ == "__main__":

Some files were not shown because too many files have changed in this diff Show More