diff --git a/README.md b/README.md
index 95b1410..b64d43d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Features:
+# 7/22 Changelog:
- Experimental Formant Shift using StftPitchShift(tried using praat with praatio but to no avail)
- Added `Stop Training` button when training, no need to restart RVC every time you want to stop the training of a model!
- Auto-detect Index path for models selected + Auto-detect paths, no more default values like this: `E:\codes\py39\vits_vc_gpu_train\logs\mi-test-1key\total_fea.npy`, We're getting Root Dir and subfolders using
@@ -12,87 +12,6 @@ os.path.abspath(os.getcwd())
- Auto-open TensorBoard localhost URL when `tensor-launch.py` is executed
- RMVPE implemented in both inferencing and training (the one in `Training` tab doesn't work properly though, requires some additional work to do)
-## Installation:
-
-1. Simply either extract directly or use git clone
-
-2. Run `installstft.bat`. It'll automatically:
- - Upgrade/Downgrade Gradio if its version isn't 3.34.0;
- - Install `rmvpe.pt` if it hasn't been already installed;
- - Install `StftPitchShift` if it hasn't been already installed;
-
-
-
-3. Done! You're good to go and use the RVC-WebUI Tweaked by me for you to use :)
-
-## Change Gradio Theme:
-
-- [OPTIONAL] Change Gradio's theme:
- 1. Open `infer-web.py` in any code/text editing software (e.g. `notepad++`, `notepad`, `vscode`, etc)
-
- 2a. Press Ctrl+F and search for `with gr.Blocks(`, select the one that's not fully commented
-
- 2b. Go to line `1842`, you'll see the `with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:`
-
- 3. Go to [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery):
-
- 3.1 Select any theme you like (e.g. [this one](https://huggingface.co/spaces/freddyaboulton/dracula_revamped))
-
- 3.2 Look at the top of the page
-
- 
-
- 3.3 Copy theme variable(in this case, it's `theme='freddyaboulton/dracula_revamped'`)
-
- 4. Replace `theme='HaleyCH/HaleyCH_Theme'` in `infer-web.py` with any value of a theme from [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery)
-
-### Current Todo-list:
-
-- [x] Fix `Unload voice to save GPU memory` button Traceback
-- [ ] Add Accordions so people with Firefox browser get a much more compact GUI rather than [This](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/assets/79400603/67e0cc08-82a2-4dc3-86cf-e23d1dcad9f8).
-- [ ] Fix weird way Median Filtering value inputted in a slider is utilized
-- [ ] Replace regular refresh buttons with these tiny ones from [AUTOMATIC'S1111 Stable DIffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
-
-- [ ] Add a way to change the Gradio's theme from WebUI itself, like in [AUTOMATIC'S1111 Stable DIffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
-
-- [ ] Implement Praat in the GUI for f0 curve file manipulation and easier usage
-
-
-# Screenshots:
-
-- ## Inference Tab:
-
-
-
-- ## UVR Tab:
-
-
-
-- ## Training Tab:
-
-
-
-- ## Ckpt-Processing Tab:
-
-
-
-The rest of the tabs are left untouched code-wise.
-
-
-
-# Formant Shift:
-
-
-
-- ### Click `Apply` button every time you change the values for inferencing.
-
-- ### As the name implies, you can only use `wav` files so far, also it is very slow, so be patient.
-
-- ### If you added a new `preset.txt` in the `\formantshiftcfg\` folder, click button with refresh emoji
-
-- ### If the preset you selected somehow got edited, by pressing refresh emoji button you'll update values, by grabbing them from the file
-
-
Mangio-RVC-Fork with v2 Support! 💻
@@ -343,6 +262,41 @@ make tensorboard
```
Then click the tensorboard link it provides and refresh the data.
+## Change Gradio Theme:
+
+- [OPTIONAL] Change Gradio's theme:
+ 1. Open `infer-web.py` in any code/text editing software (e.g. `notepad++`, `notepad`, `vscode`, etc)
+
+ 2a. Press Ctrl+F and search for `with gr.Blocks(`, select the one that's not fully commented
+
+ 2b. Go to line `1842`, you'll see the `with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:`
+
+ 3. Go to [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery):
+
+ 3.1 Select any theme you like (e.g. [this one](https://huggingface.co/spaces/freddyaboulton/dracula_revamped))
+
+ 3.2 Look at the top of the page
+
+ 
+
+ 3.3 Copy theme variable(in this case, it's `theme='freddyaboulton/dracula_revamped'`)
+
+ 4. Replace `theme='HaleyCH/HaleyCH_Theme'` in `infer-web.py` with any value of a theme from [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery)
+
+
+# Formant Shift Explanation
+
+
+
+- ### Click `Apply` button every time you change the values for inferencing.
+
+- ### As the name implies, you can only use `wav` files so far, also it is very slow, so be patient.
+
+- ### If you added a new `preset.txt` in the `\formantshiftcfg\` folder, click button with refresh emoji
+
+- ### If the preset you selected somehow got edited, by pressing refresh emoji button you'll update values, by grabbing them from the file
+
+
# Other
If you are using Windows, you can download and extract `RVC-beta.7z` to use RVC directly and use `go-web.bat` to start Webui.
diff --git a/audios/.gitignore b/audios/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/config.py b/config.py
index 2f64e13..5b72235 100644
--- a/config.py
+++ b/config.py
@@ -1,19 +1,73 @@
import argparse
import sys
import torch
+import json
from multiprocessing import cpu_count
+global usefp16
+usefp16 = False
+
def use_fp32_config():
- for config_file in ["32k.json", "40k.json", "48k.json"]:
- with open(f"configs/{config_file}", "r") as f:
- strr = f.read().replace("true", "false")
- with open(f"configs/{config_file}", "w") as f:
- f.write(strr)
- with open("trainset_preprocess_pipeline_print.py", "r") as f:
- strr = f.read().replace("3.7", "3.0")
- with open("trainset_preprocess_pipeline_print.py", "w") as f:
- f.write(strr)
+ usefp16 = False
+ device_capability = 0
+ if torch.cuda.is_available():
+ device = torch.device("cuda:0") # Assuming you have only one GPU (index 0).
+ device_capability = torch.cuda.get_device_capability(device)[0]
+ if device_capability >= 7:
+ usefp16 = True
+ for config_file in ["32k.json", "40k.json", "48k.json"]:
+ with open(f"configs/{config_file}", "r") as d:
+ data = json.load(d)
+
+ if "train" in data and "fp16_run" in data["train"]:
+ data["train"]["fp16_run"] = True
+
+ with open(f"configs/{config_file}", "w") as d:
+ json.dump(data, d, indent=4)
+
+ print(f"Set fp16_run to true in {config_file}")
+
+ with open(
+ "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
+ ) as f:
+ strr = f.read()
+
+ strr = strr.replace("3.0", "3.7")
+
+ with open(
+ "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
+ ) as f:
+ f.write(strr)
+ else:
+ for config_file in ["32k.json", "40k.json", "48k.json"]:
+ with open(f"configs/{config_file}", "r") as f:
+ data = json.load(f)
+
+ if "train" in data and "fp16_run" in data["train"]:
+ data["train"]["fp16_run"] = False
+
+ with open(f"configs/{config_file}", "w") as d:
+ json.dump(data, d, indent=4)
+
+ print(f"Set fp16_run to false in {config_file}")
+
+ with open(
+ "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
+ ) as f:
+ strr = f.read()
+
+ strr = strr.replace("3.7", "3.0")
+
+ with open(
+ "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
+ ) as f:
+ f.write(strr)
+ else:
+ print(
+ "CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed."
+ )
+ return (usefp16, device_capability)
class Config:
@@ -32,7 +86,7 @@ class Config:
self.paperspace,
self.is_cli,
) = self.arg_parse()
-
+
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@staticmethod
@@ -50,11 +104,15 @@ class Config:
action="store_true",
help="Do not open in browser automatically",
)
- parser.add_argument( # Fork Feature. Paperspace integration for web UI
- "--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems."
+ parser.add_argument( # Fork Feature. Paperspace integration for web UI
+ "--paperspace",
+ action="store_true",
+ help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.",
)
- parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
- "--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!"
+ parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
+ "--is_cli",
+ action="store_true",
+ help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
)
cmd_opts = parser.parse_args()
@@ -95,9 +153,9 @@ class Config:
):
print("Found GPU", self.gpu_name, ", force to fp32")
self.is_half = False
- use_fp32_config()
else:
print("Found GPU", self.gpu_name)
+ use_fp32_config()
self.gpu_mem = int(
torch.cuda.get_device_properties(i_device).total_memory
/ 1024
diff --git a/extract_f0_print.py b/extract_f0_print.py
index 3290125..4be7f5e 100644
--- a/extract_f0_print.py
+++ b/extract_f0_print.py
@@ -5,10 +5,10 @@ sys.path.append(now_dir)
from my_utils import load_audio
import pyworld
import numpy as np, logging
-import torchcrepe # Fork Feature. Crepe algo for training and preprocess
+import torchcrepe # Fork Feature. Crepe algo for training and preprocess
import torch
-from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe.
-import scipy.signal as signal # Fork Feature hybrid inference
+from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe.
+import scipy.signal as signal # Fork Feature hybrid inference
import tqdm
logging.getLogger("numba").setLevel(logging.WARNING)
@@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
DoFormant = False
-with open('formanting.txt', 'r') as fvf:
- content = fvf.readlines()
- Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+with open("formanting.txt", "r") as fvf:
+ content = fvf.readlines()
+ Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
def printt(strr):
@@ -32,7 +32,7 @@ def printt(strr):
n_p = int(sys.argv[2])
f0method = sys.argv[3]
-extraction_crepe_hop_length = 0
+extraction_crepe_hop_length = 0
try:
extraction_crepe_hop_length = int(sys.argv[4])
except:
@@ -53,11 +53,11 @@ class FeatureInput(object):
self.f0_min = 50.0
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
-
+
# EXPERIMENTAL. PROBABLY BUGGY
def get_f0_hybrid_computation(
- self,
- methods_str,
+ self,
+ methods_str,
x,
f0_min,
f0_max,
@@ -67,9 +67,9 @@ class FeatureInput(object):
):
# Get various f0 methods from input to use in the computation stack
s = methods_str
- s = s.split('hybrid')[1]
- s = s.replace('[', '').replace(']', '')
- methods = s.split('+')
+ s = s.split("hybrid")[1]
+ s = s.replace("[", "").replace("]", "")
+ methods = s.split("+")
f0_computation_stack = []
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -99,7 +99,9 @@ class FeatureInput(object):
torch_device_index = 0
torch_device = None
if torch.cuda.is_available():
- torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+ torch_device = torch.device(
+ f"cuda:{torch_device_index % torch.cuda.device_count()}"
+ )
elif torch.backends.mps.is_available():
torch_device = torch.device("mps")
else:
@@ -123,7 +125,7 @@ class FeatureInput(object):
f0 = torchcrepe.filter.mean(f0, 3)
f0[pd < 0.1] = 0
f0 = f0[0].cpu().numpy()
- f0 = f0[1:] # Get rid of extra first frame
+ f0 = f0[1:] # Get rid of extra first frame
elif method == "mangio-crepe":
# print("Performing crepe pitch extraction. (EXPERIMENTAL)")
# print("CREPE PITCH EXTRACTION HOP LENGTH: " + str(crepe_hop_length))
@@ -132,7 +134,9 @@ class FeatureInput(object):
torch_device_index = 0
torch_device = None
if torch.cuda.is_available():
- torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+ torch_device = torch.device(
+ f"cuda:{torch_device_index % torch.cuda.device_count()}"
+ )
elif torch.backends.mps.is_available():
torch_device = torch.device("mps")
else:
@@ -156,7 +160,7 @@ class FeatureInput(object):
"full",
batch_size=crepe_hop_length * 2,
device=torch_device,
- pad=True
+ pad=True,
)
p_len = p_len or x.shape[0] // crepe_hop_length
# Resize the pitch
@@ -165,7 +169,7 @@ class FeatureInput(object):
target = np.interp(
np.arange(0, len(source) * p_len, len(source)) / p_len,
np.arange(0, len(source)),
- source
+ source,
)
f0 = np.nan_to_num(target)
elif method == "harvest":
@@ -191,12 +195,12 @@ class FeatureInput(object):
f0 = signal.medfilt(f0, 3)
f0 = f0[1:]
f0_computation_stack.append(f0)
-
+
for fc in f0_computation_stack:
print(len(fc))
# print("Calculating hybrid median f0 from the stack of: %s" % str(methods))
-
+
f0_median_hybrid = None
if len(f0_computation_stack) == 1:
f0_median_hybrid = f0_computation_stack[0]
@@ -236,10 +240,9 @@ class FeatureInput(object):
elif f0_method == "rmvpe":
if hasattr(self, "model_rmvpe") == False:
from rmvpe import RMVPE
+
print("loading rmvpe model")
- self.model_rmvpe = RMVPE(
- "rmvpe.pt", is_half=False, device="cuda:0"
- )
+ self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0")
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
elif f0_method == "dio":
f0, t = pyworld.dio(
@@ -250,12 +253,16 @@ class FeatureInput(object):
frame_period=1000 * self.hop / self.fs,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
- elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction
+ elif (
+ f0_method == "crepe"
+ ): # Fork Feature: Added crepe f0 for f0 feature extraction
# Pick a batch size that doesn't cause memory errors on your gpu
torch_device_index = 0
torch_device = None
if torch.cuda.is_available():
- torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+ torch_device = torch.device(
+ f"cuda:{torch_device_index % torch.cuda.device_count()}"
+ )
elif torch.backends.mps.is_available():
torch_device = torch.device("mps")
else:
@@ -287,7 +294,9 @@ class FeatureInput(object):
torch_device_index = 0
torch_device = None
if torch.cuda.is_available():
- torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+ torch_device = torch.device(
+ f"cuda:{torch_device_index % torch.cuda.device_count()}"
+ )
elif torch.backends.mps.is_available():
torch_device = torch.device("mps")
else:
@@ -311,7 +320,7 @@ class FeatureInput(object):
"full",
batch_size=crepe_hop_length * 2,
device=torch_device,
- pad=True
+ pad=True,
)
p_len = p_len or x.shape[0] // crepe_hop_length
# Resize the pitch
@@ -320,20 +329,20 @@ class FeatureInput(object):
target = np.interp(
np.arange(0, len(source) * p_len, len(source)) / p_len,
np.arange(0, len(source)),
- source
+ source,
)
f0 = np.nan_to_num(target)
- elif "hybrid" in f0_method: # EXPERIMENTAL
+ elif "hybrid" in f0_method: # EXPERIMENTAL
# Perform hybrid median pitch estimation
time_step = 160 / 16000 * 1000
f0 = self.get_f0_hybrid_computation(
- f0_method,
+ f0_method,
x,
self.f0_min,
self.f0_max,
p_len,
crepe_hop_length,
- time_step
+ time_step,
)
# Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...
@@ -362,14 +371,19 @@ class FeatureInput(object):
with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
try:
- pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length))
+ pbar.set_description(
+ "thread:%s, f0ing, Hop-Length:%s"
+ % (thread_n, crepe_hop_length)
+ )
pbar.update(1)
if (
os.path.exists(opt_path1 + ".npy") == True
and os.path.exists(opt_path2 + ".npy") == True
):
continue
- featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length)
+ featur_pit = self.compute_f0(
+ inp_path, f0_method, crepe_hop_length
+ )
np.save(
opt_path2,
featur_pit,
@@ -382,7 +396,9 @@ class FeatureInput(object):
allow_pickle=False,
) # ori
except:
- printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
+ printt(
+ "f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())
+ )
if __name__ == "__main__":
@@ -411,12 +427,7 @@ if __name__ == "__main__":
for i in range(n_p):
p = Process(
target=featureInput.go,
- args=(
- paths[i::n_p],
- f0method,
- extraction_crepe_hop_length,
- i
- ),
+ args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i),
)
ps.append(p)
p.start()
diff --git a/gui_v0.py b/gui_v0.py
index 2bd2e75..0c31844 100644
--- a/gui_v0.py
+++ b/gui_v0.py
@@ -51,8 +51,10 @@ class RVC:
self.window = 160
# Get Torch Device
- if(torch.cuda.is_available()):
- self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}")
+ if torch.cuda.is_available():
+ self.torch_device = torch.device(
+ f"cuda:{0 % torch.cuda.device_count()}"
+ )
elif torch.backends.mps.is_available():
self.torch_device = torch.device("mps")
else:
@@ -141,7 +143,7 @@ class RVC:
def get_f0(self, x, f0_up_key, inp_f0=None):
# Calculate Padding and f0 details here
- p_len = x.shape[0] // 512 # For Now This probs doesn't work
+ p_len = x.shape[0] // 512 # For Now This probs doesn't work
x_pad = 1
f0_min = 50
f0_max = 1100
@@ -150,11 +152,11 @@ class RVC:
f0 = 0
# Here, check f0_methods and get their computations
- if(self.f0_method == 'harvest'):
+ if self.f0_method == "harvest":
f0 = self.get_harvest_computation(x, f0_min, f0_max)
- elif(self.f0_method == 'reg-crepe'):
+ elif self.f0_method == "reg-crepe":
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
- elif(self.f0_method == 'reg-crepe-tiny'):
+ elif self.f0_method == "reg-crepe-tiny":
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")
# Calculate f0_course and f0_bak here
@@ -300,7 +302,7 @@ class GUI:
with open("values1.json", "r") as j:
data = json.load(j)
except:
- # Injecting f0_method into the json data
+ # Injecting f0_method into the json data
with open("values1.json", "w") as j:
data = {
"pth_path": "",
@@ -328,11 +330,7 @@ class GUI:
[
sg.Frame(
title="Proudly forked by Mangio621",
- layout=[
- [
- sg.Image('./mangio_utils/lol.png')
- ]
- ]
+ layout=[[sg.Image("./mangio_utils/lol.png")]],
),
sg.Frame(
title=i18n("加载模型"),
@@ -384,14 +382,16 @@ class GUI:
),
],
],
- )
+ ),
],
[
# Mangio f0 Selection frame Here
sg.Frame(
layout=[
[
- sg.Radio("Harvest", "f0_method", key="harvest", default=True),
+ sg.Radio(
+ "Harvest", "f0_method", key="harvest", default=True
+ ),
sg.Radio("Crepe", "f0_method", key="reg-crepe"),
sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
]
@@ -536,20 +536,21 @@ class GUI:
if event == "stop_vc" and self.flag_vc == True:
self.flag_vc = False
- # Function that returns the used f0 method in string format "harvest"
+ # Function that returns the used f0 method in string format "harvest"
def get_f0_method_from_radios(self, values):
f0_array = [
- {"name": "harvest", "val": values['harvest']},
- {"name": "reg-crepe", "val": values['reg-crepe']},
- {"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']},
+ {"name": "harvest", "val": values["harvest"]},
+ {"name": "reg-crepe", "val": values["reg-crepe"]},
+ {"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]},
]
# Filter through to find a true value
used_f0 = ""
for f0 in f0_array:
- if(f0['val'] == True):
- used_f0 = f0['name']
+ if f0["val"] == True:
+ used_f0 = f0["name"]
break
- if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
+ if used_f0 == "":
+ used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
return used_f0
def set_values(self, values):
diff --git a/infer-web.py b/infer-web.py
index 68a2af3..ff88efa 100644
--- a/infer-web.py
+++ b/infer-web.py
@@ -1,7 +1,7 @@
import os
import shutil
import sys
-import json # Mangio fork using json for preset saving
+import json # Mangio fork using json for preset saving
import signal
@@ -13,6 +13,7 @@ import warnings
import numpy as np
import torch
import re
+
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
import logging
@@ -60,11 +61,11 @@ DoFormant = False
Quefrency = 8.0
Timbre = 1.2
-with open('formanting.txt', 'w+') as fsf:
+with open("formanting.txt", "w+") as fsf:
fsf.truncate(0)
- fsf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
-
+ fsf.writelines([str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"])
+
config = Config()
i18n = I18nAuto()
@@ -158,7 +159,7 @@ index_root = "./logs/"
global audio_root
audio_root = "audios"
global input_audio_path0
-global input_audio_path1
+global input_audio_path1
names = []
for name in os.listdir(weight_root):
if name.endswith(".pth"):
@@ -166,77 +167,83 @@ for name in os.listdir(weight_root):
index_paths = []
global indexes_list
-indexes_list=[]
+indexes_list = []
audio_paths = []
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s\\%s" % (root, name))
-
+
for root, dirs, files in os.walk(audio_root, topdown=False):
for name in files:
-
audio_paths.append("%s/%s" % (root, name))
-
+
uvr5_names = []
for name in os.listdir(weight_uvr5_root):
if name.endswith(".pth") or "onnx" in name:
uvr5_names.append(name.replace(".pth", ""))
+
def check_for_name():
if len(names) > 0:
return sorted(names)[0]
else:
- return ''
+ return ""
+
def get_index():
- if check_for_name() != '':
- chosen_model=sorted(names)[0].split(".")[0]
- logs_path="./logs/"+chosen_model
+ if check_for_name() != "":
+ chosen_model = sorted(names)[0].split(".")[0]
+ logs_path = "./logs/" + chosen_model
if os.path.exists(logs_path):
for file in os.listdir(logs_path):
if file.endswith(".index"):
- return os.path.join(logs_path, file).replace('\\','/')
- return ''
+ return os.path.join(logs_path, file).replace("\\", "/")
+ return ""
else:
- return ''
+ return ""
+
def get_indexes():
for dirpath, dirnames, filenames in os.walk("./logs/"):
for filename in filenames:
if filename.endswith(".index") and "trained" not in filename:
- indexes_list.append(os.path.join(dirpath,filename).replace('\\','/'))
+ indexes_list.append(os.path.join(dirpath, filename).replace("\\", "/"))
if len(indexes_list) > 0:
return indexes_list
else:
- return ''
+ return ""
+
fshift_presets_list = []
+
def get_fshift_presets():
fshift_presets_list = []
for dirpath, dirnames, filenames in os.walk("./formantshiftcfg/"):
for filename in filenames:
if filename.endswith(".txt"):
- fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/'))
-
+ fshift_presets_list.append(
+ os.path.join(dirpath, filename).replace("\\", "/")
+ )
+
if len(fshift_presets_list) > 0:
return fshift_presets_list
else:
- return ''
+ return ""
def get_audios():
- if check_for_name() != '':
- audios_path= '"' + os.path.abspath(os.getcwd()) + '/audios/'
+ if check_for_name() != "":
+ audios_path = '"' + os.path.abspath(os.getcwd()) + "/audios/"
if os.path.exists(audios_path):
for file in os.listdir(audios_path):
print(audios_path.join(file) + '"')
return os.path.join(audios_path, file + '"')
- return ''
+ return ""
else:
- return ''
+ return ""
def vc_single(
@@ -261,12 +268,12 @@ def vc_single(
return "You need to upload an audio", None
f0_up_key = int(f0_up_key)
try:
- if input_audio_path0 == '':
+ if input_audio_path0 == "":
audio = load_audio(input_audio_path1, 16000, DoFormant, Quefrency, Timbre)
-
+
else:
audio = load_audio(input_audio_path0, 16000, DoFormant, Quefrency, Timbre)
-
+
audio_max = np.abs(audio).max() / 0.95
if audio_max > 1:
audio /= audio_max
@@ -378,7 +385,7 @@ def vc_multi(
resample_sr,
rms_mix_rate,
protect,
- crepe_hop_length
+ crepe_hop_length,
)
if "Success" in info:
try:
@@ -522,7 +529,11 @@ def get_vc(sid, to_return_protect0, to_return_protect1):
if torch.cuda.is_available():
torch.cuda.empty_cache()
cpt = None
- return ({"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"})
+ return (
+ {"visible": False, "__type__": "update"},
+ {"visible": False, "__type__": "update"},
+ {"visible": False, "__type__": "update"},
+ )
person = "%s/%s" % (weight_root, sid)
print("loading %s" % person)
cpt = torch.load(person, map_location="cpu")
@@ -580,19 +591,23 @@ def change_choices():
names.append(name)
index_paths = []
audio_paths = []
- audios_path=os.path.abspath(os.getcwd()) + "/audios/"
+ audios_path = os.path.abspath(os.getcwd()) + "/audios/"
for root, dirs, files in os.walk(index_root, topdown=False):
for name in files:
if name.endswith(".index") and "trained" not in name:
index_paths.append("%s/%s" % (root, name))
for file in os.listdir(audios_path):
- audio_paths.append("%s/%s" % (audio_root, file))
- return {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(index_paths), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"}
+ audio_paths.append("%s/%s" % (audio_root, file))
+ return (
+ {"choices": sorted(names), "__type__": "update"},
+ {"choices": sorted(index_paths), "__type__": "update"},
+ {"choices": sorted(audio_paths), "__type__": "update"},
+ )
def clean():
- return ({"value": "", "__type__": "update"})
-
+ return {"value": "", "__type__": "update"}
+
sr_dict = {
"32k": 32000,
@@ -624,17 +639,20 @@ def if_done_multi(done, ps):
break
done[0] = True
-def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button):
-
- if (cbox):
+def formant_enabled(
+ cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button
+):
+ if cbox:
DoFormant = True
- with open('formanting.txt', 'w') as fxxf:
+ with open("formanting.txt", "w") as fxxf:
fxxf.truncate(0)
- fxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
- #print(f"is checked? - {cbox}\ngot {DoFormant}")
-
+ fxxf.writelines(
+ [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
+ )
+ # print(f"is checked? - {cbox}\ngot {DoFormant}")
+
return (
{"value": True, "__type__": "update"},
{"visible": True, "__type__": "update"},
@@ -643,16 +661,16 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref
{"visible": True, "__type__": "update"},
{"visible": True, "__type__": "update"},
)
-
-
+
else:
-
DoFormant = False
- with open('formanting.txt', 'w') as fxf:
+ with open("formanting.txt", "w") as fxf:
fxf.truncate(0)
- fxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
- #print(f"is checked? - {cbox}\ngot {DoFormant}")
+ fxf.writelines(
+ [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
+ )
+ # print(f"is checked? - {cbox}\ngot {DoFormant}")
return (
{"value": False, "__type__": "update"},
{"visible": False, "__type__": "update"},
@@ -662,28 +680,33 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref
{"visible": False, "__type__": "update"},
{"visible": False, "__type__": "update"},
)
-
+
def formant_apply(qfrency, tmbre):
Quefrency = qfrency
Timbre = tmbre
DoFormant = True
-
- with open('formanting.txt', 'w') as fxxxf:
+
+ with open("formanting.txt", "w") as fxxxf:
fxxxf.truncate(0)
- fxxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
- return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"})
+ fxxxf.writelines(
+ [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
+ )
+ return (
+ {"value": Quefrency, "__type__": "update"},
+ {"value": Timbre, "__type__": "update"},
+ )
+
def update_fshift_presets(preset, qfrency, tmbre):
-
qfrency, tmbre = preset_apply(preset, qfrency, tmbre)
-
- if (str(preset) != ''):
- with open(str(preset), 'r') as p:
+
+ if str(preset) != "":
+ with open(str(preset), "r") as p:
content = p.readlines()
- qfrency, tmbre = content[0].split('\n')[0], content[1]
-
+ qfrency, tmbre = content[0].split("\n")[0], content[1]
+
formant_apply(qfrency, tmbre)
else:
pass
@@ -1161,7 +1184,7 @@ def train1key(
if_cache_gpu17,
if_save_every_weights18,
version19,
- echl
+ echl,
):
infos = []
@@ -1202,7 +1225,7 @@ def train1key(
model_log_dir,
np7,
f0method8,
- echl
+ echl,
)
yield get_info_str(cmd)
p = Popen(cmd, shell=True, cwd=now_dir)
@@ -1434,7 +1457,6 @@ def export_onnx(ModelPath, ExportedPath):
device = "cpu" # 导出时设备(不影响使用模型)
-
net_g = SynthesizerTrnMsNSFsidM(
*cpt["config"], is_half=False, version=cpt.get("version", "v1")
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
@@ -1470,20 +1492,24 @@ def export_onnx(ModelPath, ExportedPath):
return "Finished"
-#region Mangio-RVC-Fork CLI App
+# region Mangio-RVC-Fork CLI App
import re as regex
import scipy.io.wavfile as wavfile
cli_current_page = "HOME"
+
def cli_split_command(com):
exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)'
split_array = regex.findall(exp, com)
split_array = [group[0] if group[0] else group[1] for group in split_array]
return split_array
+
def execute_generator_function(genObject):
- for _ in genObject: pass
+ for _ in genObject:
+ pass
+
def cli_infer(com):
# get VC first
@@ -1492,7 +1518,7 @@ def cli_infer(com):
source_audio_path = com[1]
output_file_name = com[2]
feature_index_path = com[3]
- f0_file = None # Not Implemented Yet
+ f0_file = None # Not Implemented Yet
# Get parameters for inference
speaker_id = int(com[4])
@@ -1505,7 +1531,7 @@ def cli_infer(com):
feature_ratio = float(com[11])
protection_amnt = float(com[12])
#####
-
+
print("Mangio-RVC-Fork Infer-CLI: Starting the inference...")
vc_data = get_vc(model_name)
print(vc_data)
@@ -1524,16 +1550,27 @@ def cli_infer(com):
resample,
mix,
protection_amnt,
- crepe_hop_length,
+ crepe_hop_length,
)
if "Success." in conversion_data[0]:
- print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name))
- wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1])
- print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name))
+ print(
+ "Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..."
+ % ("audio-outputs", output_file_name)
+ )
+ wavfile.write(
+ "%s/%s" % ("audio-outputs", output_file_name),
+ conversion_data[1][0],
+ conversion_data[1][1],
+ )
+ print(
+ "Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s"
+ % ("audio-outputs", output_file_name)
+ )
else:
print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ")
print(conversion_data[0])
+
def cli_pre_process(com):
com = cli_split_command(com)
model_name = com[0]
@@ -1543,14 +1580,12 @@ def cli_pre_process(com):
print("Mangio-RVC-Fork Pre-process: Starting...")
generator = preprocess_dataset(
- trainset_directory,
- model_name,
- sample_rate,
- num_processes
+ trainset_directory, model_name, sample_rate, num_processes
)
execute_generator_function(generator)
print("Mangio-RVC-Fork Pre-process: Finished")
+
def cli_extract_feature(com):
com = cli_split_command(com)
model_name = com[0]
@@ -1559,23 +1594,24 @@ def cli_extract_feature(com):
has_pitch_guidance = True if (int(com[3]) == 1) else False
f0_method = com[4]
crepe_hop_length = int(com[5])
- version = com[6] # v1 or v2
-
+ version = com[6] # v1 or v2
+
print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance))
print("Mangio-RVC-CLI: Extract Feature Version: " + str(version))
print("Mangio-RVC-Fork Feature Extraction: Starting...")
generator = extract_f0_feature(
- gpus,
- num_processes,
- f0_method,
- has_pitch_guidance,
- model_name,
- version,
- crepe_hop_length
+ gpus,
+ num_processes,
+ f0_method,
+ has_pitch_guidance,
+ model_name,
+ version,
+ crepe_hop_length,
)
execute_generator_function(generator)
print("Mangio-RVC-Fork Feature Extraction: Finished")
+
def cli_train(com):
com = cli_split_command(com)
model_name = com[0]
@@ -1583,7 +1619,7 @@ def cli_train(com):
has_pitch_guidance = True if (int(com[2]) == 1) else False
speaker_id = int(com[3])
save_epoch_iteration = int(com[4])
- total_epoch = int(com[5]) # 10000
+ total_epoch = int(com[5]) # 10000
batch_size = int(com[6])
gpu_card_slot_numbers = com[7]
if_save_latest = i18n("是") if (int(com[8]) == 1) else i18n("否")
@@ -1591,8 +1627,8 @@ def cli_train(com):
if_save_every_weight = i18n("是") if (int(com[10]) == 1) else i18n("否")
version = com[11]
- pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/"
-
+ pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/"
+
g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate)
d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate)
@@ -1611,21 +1647,20 @@ def cli_train(com):
gpu_card_slot_numbers,
if_cache_gpu,
if_save_every_weight,
- version
+ version,
)
+
def cli_train_feature(com):
com = cli_split_command(com)
model_name = com[0]
version = com[1]
print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait")
- generator = train_index(
- model_name,
- version
- )
+ generator = train_index(model_name, version)
execute_generator_function(generator)
print("Mangio-RVC-Fork Train Feature Index-CLI: Done!")
+
def cli_extract_model(com):
com = cli_split_command(com)
model_path = com[0]
@@ -1635,55 +1670,74 @@ def cli_extract_model(com):
info = com[4]
version = com[5]
extract_small_model_process = extract_small_model(
- model_path,
- save_name,
- sample_rate,
- has_pitch_guidance,
- info,
- version
+ model_path, save_name, sample_rate, has_pitch_guidance, info, version
)
if extract_small_model_process == "Success.":
print("Mangio-RVC-Fork Extract Small Model: Success!")
else:
- print(str(extract_small_model_process))
+ print(str(extract_small_model_process))
print("Mangio-RVC-Fork Extract Small Model: Failed!")
def preset_apply(preset, qfer, tmbr):
- if str(preset) != '':
- with open(str(preset), 'r') as p:
+ if str(preset) != "":
+ with open(str(preset), "r") as p:
content = p.readlines()
- qfer, tmbr = content[0].split('\n')[0], content[1]
-
+ qfer, tmbr = content[0].split("\n")[0], content[1]
+
formant_apply(qfer, tmbr)
else:
pass
- return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"})
+ return (
+ {"value": qfer, "__type__": "update"},
+ {"value": tmbr, "__type__": "update"},
+ )
+
def print_page_details():
if cli_current_page == "HOME":
print(" go home : Takes you back to home with a navigation list.")
print(" go infer : Takes you to inference command execution.\n")
- print(" go pre-process : Takes you to training step.1) pre-process command execution.")
- print(" go extract-feature : Takes you to training step.2) extract-feature command execution.")
- print(" go train : Takes you to training step.3) being or continue training command execution.")
- print(" go train-feature : Takes you to the train feature index command execution.\n")
- print(" go extract-model : Takes you to the extract small model command execution.")
+ print(
+ " go pre-process : Takes you to training step.1) pre-process command execution."
+ )
+ print(
+ " go extract-feature : Takes you to training step.2) extract-feature command execution."
+ )
+ print(
+ " go train : Takes you to training step.3) being or continue training command execution."
+ )
+ print(
+ " go train-feature : Takes you to the train feature index command execution.\n"
+ )
+ print(
+ " go extract-model : Takes you to the extract small model command execution."
+ )
elif cli_current_page == "INFER":
print(" arg 1) model name with .pth in ./weights: mi-test.pth")
print(" arg 2) source audio path: myFolder\\MySource.wav")
- print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav")
- print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index")
+ print(
+ " arg 3) output file name to be placed in './audio-outputs': MyTest.wav"
+ )
+ print(
+ " arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index"
+ )
print(" arg 5) speaker id: 0")
print(" arg 6) transposition: 0")
- print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)")
+ print(
+ " arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)"
+ )
print(" arg 8) crepe hop length: 160")
print(" arg 9) harvest median filter radius: 3 (0-7)")
print(" arg 10) post resample rate: 0")
print(" arg 11) mix volume envelope: 1")
print(" arg 12) feature index ratio: 0.78 (0-1)")
- print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n")
- print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33")
+ print(
+ " arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n"
+ )
+ print(
+ "Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33"
+ )
elif cli_current_page == "PRE-PROCESS":
print(" arg 1) Model folder name in ./logs: mi-test")
print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set")
@@ -1709,8 +1763,12 @@ def print_page_details():
print(" arg 7) Batch size: 8")
print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)")
print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)")
- print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)")
- print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)")
+ print(
+ " arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)"
+ )
+ print(
+ " arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)"
+ )
print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n")
print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2")
elif cli_current_page == "TRAIN-FEATURE":
@@ -1724,14 +1782,18 @@ def print_page_details():
print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)")
print(' arg 5) Model information: "My Model"')
print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n")
- print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2')
+ print(
+ 'Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2'
+ )
print("")
+
def change_page(page):
global cli_current_page
cli_current_page = page
return 0
+
def execute_command(com):
if com == "go home":
return change_page("HOME")
@@ -1751,7 +1813,7 @@ def execute_command(com):
if com[:3] == "go ":
print("page '%s' does not exist!" % com[3:])
return 0
-
+
if cli_current_page == "INFER":
cli_infer(com)
elif cli_current_page == "PRE-PROCESS":
@@ -1765,6 +1827,7 @@ def execute_command(com):
elif cli_current_page == "EXTRACT-MODEL":
cli_extract_model(com)
+
def cli_navigation_loop():
while True:
print("You are currently in '%s':" % cli_current_page)
@@ -1775,97 +1838,108 @@ def cli_navigation_loop():
except:
print(traceback.format_exc())
-if(config.is_cli):
+
+if config.is_cli:
print("\n\nMangio-RVC-Fork v2 CLI App!\n")
- print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n")
+ print(
+ "Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n"
+ )
cli_navigation_loop()
-#endregion
+# endregion
+
+# region RVC WebUI App
-#region RVC WebUI App
def get_presets():
data = None
- with open('../inference-presets.json', 'r') as file:
+ with open("../inference-presets.json", "r") as file:
data = json.load(file)
preset_names = []
- for preset in data['presets']:
- preset_names.append(preset['name'])
-
+ for preset in data["presets"]:
+ preset_names.append(preset["name"])
+
return preset_names
+
def match_index(sid0):
picked = False
- #folder = sid0.split('.')[0]
-
- #folder = re.split(r'. |_', sid0)[0]
- folder = sid0.split('.')[0].split('_')[0]
- #folder_test = sid0.split('.')[0].split('_')[0].split('-')[0]
+ # folder = sid0.split('.')[0]
+
+ # folder = re.split(r'. |_', sid0)[0]
+ folder = sid0.split(".")[0].split("_")[0]
+ # folder_test = sid0.split('.')[0].split('_')[0].split('-')[0]
parent_dir = "./logs/" + folder
- #print(parent_dir)
+ # print(parent_dir)
if os.path.exists(parent_dir):
- #print('path exists')
- for filename in os.listdir(parent_dir.replace('\\','/')):
+ # print('path exists')
+ for filename in os.listdir(parent_dir.replace("\\", "/")):
if filename.endswith(".index"):
for i in range(len(indexes_list)):
- if indexes_list[i] == (os.path.join(("./logs/" + folder), filename).replace('\\','/')):
- print('regular index found')
+ if indexes_list[i] == (
+ os.path.join(("./logs/" + folder), filename).replace("\\", "/")
+ ):
+ print("regular index found")
break
else:
- if indexes_list[i] == (os.path.join(("./logs/" + folder.lower()), filename).replace('\\','/')):
- print('lowered index found')
+ if indexes_list[i] == (
+ os.path.join(
+ ("./logs/" + folder.lower()), filename
+ ).replace("\\", "/")
+ ):
+ print("lowered index found")
parent_dir = "./logs/" + folder.lower()
break
- #elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()):
+ # elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()):
# print('8')
# parent_dir = "./logs/" + folder.casefold()
# break
- #elif (indexes_list[i]) == ((os.path.join(("./logs/" + folder_test), filename).replace('\\','/'))):
+ # elif (indexes_list[i]) == ((os.path.join(("./logs/" + folder_test), filename).replace('\\','/'))):
# parent_dir = "./logs/" + folder_test
# print(parent_dir)
# break
- #elif (indexes_list[i]) == (os.path.join(("./logs/" + folder_test.lower()), filename).replace('\\','/')):
+ # elif (indexes_list[i]) == (os.path.join(("./logs/" + folder_test.lower()), filename).replace('\\','/')):
# parent_dir = "./logs/" + folder_test
# print(parent_dir)
# break
- #else:
+ # else:
# #print('couldnt find index')
# continue
-
- #print('all done')
- index_path=os.path.join(parent_dir.replace('\\','/'), filename.replace('\\','/')).replace('\\','/')
- #print(index_path)
+
+ # print('all done')
+ index_path = os.path.join(
+ parent_dir.replace("\\", "/"), filename.replace("\\", "/")
+ ).replace("\\", "/")
+ # print(index_path)
return (index_path, index_path)
-
else:
- #print('nothing found')
- return ('', '')
+ # print('nothing found')
+ return ("", "")
+
def choveraudio():
- return ''
+ return ""
-def stoptraining(mim):
+def stoptraining(mim):
if int(mim) == 1:
-
with open("stop.txt", "w+") as tostops:
-
-
- tostops.writelines('stop')
- #p.terminate()
- #p.kill()
+ tostops.writelines("stop")
+ # p.terminate()
+ # p.kill()
os.kill(PID, signal.SIGTERM)
else:
pass
-
+
return (
- {"visible": False, "__type__": "update"},
+ {"visible": False, "__type__": "update"},
{"visible": True, "__type__": "update"},
)
-
-#Default-GUI
-with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
+
+
+# Default-GUI
+with gr.Blocks(theme="HaleyCH/HaleyCH_Theme") as app:
gr.HTML(" The Mangio-RVC-Fork 💻
")
gr.Markdown(
value=i18n(
@@ -1873,7 +1947,6 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
)
)
with gr.Tabs():
-
with gr.TabItem(i18n("模型推理")):
# Inference Preset Row
# with gr.Row():
@@ -1885,13 +1958,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
# Other RVC stuff
with gr.Row():
-
- #sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=check_for_name())
- sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value='')
- #input_audio_path2
-
-
- refresh_button = gr.Button(i18n("Refresh voice list, index path and audio files"), variant="primary")
+ # sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=check_for_name())
+ sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value="")
+ # input_audio_path2
+
+ refresh_button = gr.Button(
+ i18n("Refresh voice list, index path and audio files"),
+ variant="primary",
+ )
clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
spk_item = gr.Slider(
minimum=0,
@@ -1914,21 +1988,38 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
)
input_audio0 = gr.Textbox(
- label=i18n("Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:"),
- value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/" + "audio.wav",
+ label=i18n(
+ "Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:"
+ ),
+ value=os.path.abspath(os.getcwd()).replace("\\", "/")
+ + "/audios/"
+ + "audio.wav",
)
input_audio1 = gr.Dropdown(
- label=i18n("Auto detect audio path and select from the dropdown:"),
+ label=i18n(
+ "Auto detect audio path and select from the dropdown:"
+ ),
choices=sorted(audio_paths),
value=get_audios(),
interactive=True,
)
- input_audio1.change(fn=choveraudio,inputs=[],outputs=[input_audio0])
+ input_audio1.change(
+ fn=choveraudio, inputs=[], outputs=[input_audio0]
+ )
f0method0 = gr.Radio(
label=i18n(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
),
- choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"], # Fork Feature. Add Crepe-Tiny
+ choices=[
+ "pm",
+ "harvest",
+ "dio",
+ "crepe",
+ "crepe-tiny",
+ "mangio-crepe",
+ "mangio-crepe-tiny",
+ "rmvpe",
+ ], # Fork Feature. Add Crepe-Tiny
value="rmvpe",
interactive=True,
)
@@ -1938,7 +2029,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
step=1,
label=i18n("crepe_hop_length"),
value=120,
- interactive=True
+ interactive=True,
)
filter_radius0 = gr.Slider(
minimum=0,
@@ -1954,22 +2045,21 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
value="",
interactive=True,
)
-
+
file_index2 = gr.Dropdown(
label="3. Path to your added.index file (if it didn't automatically find it.)",
choices=get_indexes(),
value=get_index(),
interactive=True,
allow_custom_value=True,
- )
- #sid0.select(fn=match_index, inputs=sid0, outputs=file_index2)
-
-
+ )
+ # sid0.select(fn=match_index, inputs=sid0, outputs=file_index2)
-
refresh_button.click(
- fn=change_choices, inputs=[], outputs=[sid0, file_index2, input_audio1]
- )
+ fn=change_choices,
+ inputs=[],
+ outputs=[sid0, file_index2, input_audio1],
+ )
# file_big_npy1 = gr.Textbox(
# label=i18n("特征文件路径"),
# value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
@@ -2015,26 +2105,28 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
interactive=True,
visible=True,
)
-
+
formant_preset = gr.Dropdown(
- value='',
+ value="",
choices=get_fshift_presets(),
label="browse presets for formanting",
visible=False,
)
- formant_refresh_button = gr.Button(value='\U0001f504', visible=False,variant='primary')
- #formant_refresh_button = ToolButton( elem_id='1')
- #create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets")
-
+ formant_refresh_button = gr.Button(
+ value="\U0001f504", visible=False, variant="primary"
+ )
+ # formant_refresh_button = ToolButton( elem_id='1')
+ # create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets")
+
qfrency = gr.Slider(
- value=Quefrency,
- label="Quefrency for formant shifting",
- minimum=-16.0,
- maximum=16.0,
- step=0.1,
- visible=False,
- interactive=True,
- )
+ value=Quefrency,
+ label="Quefrency for formant shifting",
+ minimum=-16.0,
+ maximum=16.0,
+ step=0.1,
+ visible=False,
+ interactive=True,
+ )
tmbre = gr.Slider(
value=Timbre,
label="Timbre for formant shifting",
@@ -2044,12 +2136,42 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
visible=False,
interactive=True,
)
-
- formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre])
+
+ formant_preset.change(
+ fn=preset_apply,
+ inputs=[formant_preset, qfrency, tmbre],
+ outputs=[qfrency, tmbre],
+ )
frmntbut = gr.Button("Apply", variant="primary", visible=False)
- formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
- frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
- formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
+ formanting.change(
+ fn=formant_enabled,
+ inputs=[
+ formanting,
+ qfrency,
+ tmbre,
+ frmntbut,
+ formant_preset,
+ formant_refresh_button,
+ ],
+ outputs=[
+ formanting,
+ qfrency,
+ tmbre,
+ frmntbut,
+ formant_preset,
+ formant_refresh_button,
+ ],
+ )
+ frmntbut.click(
+ fn=formant_apply,
+ inputs=[qfrency, tmbre],
+ outputs=[qfrency, tmbre],
+ )
+ formant_refresh_button.click(
+ fn=update_fshift_presets,
+ inputs=[formant_preset, qfrency, tmbre],
+ outputs=[formant_preset, qfrency, tmbre],
+ )
##formant_refresh_button.click(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre])
##formant_refresh_button.click(fn=update_fshift_presets, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre])
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
@@ -2074,7 +2196,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
resample_sr0,
rms_mix_rate0,
protect0,
- crepe_hop_length
+ crepe_hop_length,
],
[vc_output1, vc_output2],
)
@@ -2110,13 +2232,17 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
value="",
interactive=True,
)
- file_index4 = gr.Dropdown( #file index dropdown for batch
+ file_index4 = gr.Dropdown( # file index dropdown for batch
label=i18n("自动检测index路径,下拉式选择(dropdown)"),
choices=get_indexes(),
value=get_index(),
interactive=True,
)
- sid0.select(fn=match_index, inputs=[sid0], outputs=[file_index2, file_index4])
+ sid0.select(
+ fn=match_index,
+ inputs=[sid0],
+ outputs=[file_index2, file_index4],
+ )
refresh_button.click(
fn=lambda: change_choices()[1],
inputs=[],
@@ -2163,7 +2289,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
with gr.Column():
dir_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
- value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/",
+ value=os.path.abspath(os.getcwd()).replace("\\", "/")
+ + "/audios/",
)
inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
@@ -2226,11 +2353,11 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
with gr.Column():
dir_wav_input = gr.Textbox(
label=i18n("输入待处理音频文件夹路径"),
- value=((os.getcwd()).replace('\\', '/') + "/audios/")
+ value=((os.getcwd()).replace("\\", "/") + "/audios/"),
)
wav_inputs = gr.File(
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
- ) #####
+ ) #####
with gr.Column():
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
agg = gr.Slider(
@@ -2312,7 +2439,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
)
with gr.Row():
trainset_dir4 = gr.Textbox(
- label=i18n("输入训练文件夹路径"), value=os.path.abspath(os.getcwd()) + "\\datasets\\"
+ label=i18n("输入训练文件夹路径"),
+ value=os.path.abspath(os.getcwd()) + "\\datasets\\",
)
spk_id5 = gr.Slider(
minimum=0,
@@ -2342,7 +2470,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
label=i18n(
"选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢"
),
- choices=["pm", "harvest", "dio", "crepe", "mangio-crepe", "rmvpe"], # Fork feature: Crepe on f0 extraction for training.
+ choices=[
+ "pm",
+ "harvest",
+ "dio",
+ "crepe",
+ "mangio-crepe",
+ "rmvpe",
+ ], # Fork feature: Crepe on f0 extraction for training.
value="rmvpe",
interactive=True,
)
@@ -2352,13 +2487,21 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
step=1,
label=i18n("crepe_hop_length"),
value=64,
- interactive=True
+ interactive=True,
)
but2 = gr.Button(i18n("特征提取"), variant="primary")
info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
but2.click(
extract_f0_feature,
- [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length],
+ [
+ gpus6,
+ np7,
+ f0method8,
+ if_f0_3,
+ exp_dir1,
+ version19,
+ extraction_crepe_hop_length,
+ ],
[info2],
)
with gr.Group():
@@ -2442,19 +2585,26 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
interactive=True,
)
butstop = gr.Button(
- "Stop Training",
- variant='primary',
- visible=False,
+ "Stop Training",
+ variant="primary",
+ visible=False,
)
but3 = gr.Button(i18n("训练模型"), variant="primary", visible=True)
- but3.click(fn=stoptraining, inputs=[gr.Number(value=0, visible=False)], outputs=[but3, butstop])
- butstop.click(fn=stoptraining, inputs=[gr.Number(value=1, visible=False)], outputs=[butstop, but3])
-
-
+ but3.click(
+ fn=stoptraining,
+ inputs=[gr.Number(value=0, visible=False)],
+ outputs=[but3, butstop],
+ )
+ butstop.click(
+ fn=stoptraining,
+ inputs=[gr.Number(value=1, visible=False)],
+ outputs=[butstop, but3],
+ )
+
but4 = gr.Button(i18n("训练特征索引"), variant="primary")
- #but5 = gr.Button(i18n("一键训练"), variant="primary")
+ # but5 = gr.Button(i18n("一键训练"), variant="primary")
info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
-
+
but3.click(
click_train,
[
@@ -2475,12 +2625,10 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
],
[info3],
)
-
+
but4.click(train_index, [exp_dir1, version19], info3)
-
-
-
- #but5.click(
+
+ # but5.click(
# train1key,
# [
# exp_dir1,
@@ -2503,8 +2651,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
# extraction_crepe_hop_length
# ],
# info3,
- #)
-
+ # )
+
with gr.TabItem(i18n("ckpt处理")):
with gr.Group():
gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
@@ -2601,7 +2749,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
ckpt_path2 = gr.Textbox(
lines=3,
label=i18n("模型路径"),
- value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/logs/[YOUR_MODEL]/G_23333.pth",
+ value=os.path.abspath(os.getcwd()).replace("\\", "/")
+ + "/logs/[YOUR_MODEL]/G_23333.pth",
interactive=True,
)
save_name = gr.Textbox(
@@ -2665,8 +2814,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
except:
gr.Markdown(traceback.format_exc())
-
- #region Mangio Preset Handler Region
+ # region Mangio Preset Handler Region
def save_preset(
preset_name,
sid0,
@@ -2682,45 +2830,44 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
resample_sr,
rms_mix_rate,
protect,
- f0_file
+ f0_file,
):
data = None
- with open('../inference-presets.json', 'r') as file:
+ with open("../inference-presets.json", "r") as file:
data = json.load(file)
preset_json = {
- 'name': preset_name,
- 'model': sid0,
- 'transpose': vc_transform,
- 'audio_file': input_audio0,
- 'auto_audio_file': input_audio1,
- 'f0_method': f0method,
- 'crepe_hop_length': crepe_hop_length,
- 'median_filtering': filter_radius,
- 'feature_path': file_index1,
- 'auto_feature_path': file_index2,
- 'search_feature_ratio': index_rate,
- 'resample': resample_sr,
- 'volume_envelope': rms_mix_rate,
- 'protect_voiceless': protect,
- 'f0_file_path': f0_file
+ "name": preset_name,
+ "model": sid0,
+ "transpose": vc_transform,
+ "audio_file": input_audio0,
+ "auto_audio_file": input_audio1,
+ "f0_method": f0method,
+ "crepe_hop_length": crepe_hop_length,
+ "median_filtering": filter_radius,
+ "feature_path": file_index1,
+ "auto_feature_path": file_index2,
+ "search_feature_ratio": index_rate,
+ "resample": resample_sr,
+ "volume_envelope": rms_mix_rate,
+ "protect_voiceless": protect,
+ "f0_file_path": f0_file,
}
- data['presets'].append(preset_json)
- with open('../inference-presets.json', 'w') as file:
+ data["presets"].append(preset_json)
+ with open("../inference-presets.json", "w") as file:
json.dump(data, file)
file.flush()
print("Saved Preset %s into inference-presets.json!" % preset_name)
-
def on_preset_changed(preset_name):
print("Changed Preset to %s!" % preset_name)
data = None
- with open('../inference-presets.json', 'r') as file:
+ with open("../inference-presets.json", "r") as file:
data = json.load(file)
print("Searching for " + preset_name)
returning_preset = None
- for preset in data['presets']:
- if(preset['name'] == preset_name):
+ for preset in data["presets"]:
+ if preset["name"] == preset_name:
print("Found a preset")
returning_preset = preset
# return all new input values
@@ -2740,11 +2887,11 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
# returning_preset['f0_file_path']
)
- # Preset State Changes
-
+ # Preset State Changes
+
# This click calls save_preset that saves the preset into inference-presets.json with the preset name
# mangio_preset_save_btn.click(
- # fn=save_preset,
+ # fn=save_preset,
# inputs=[
# mangio_preset_name_save,
# sid0,
@@ -2760,16 +2907,16 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
# rms_mix_rate0,
# protect0,
# f0_file
- # ],
+ # ],
# outputs=[]
# )
# mangio_preset.change(
- # on_preset_changed,
+ # on_preset_changed,
# inputs=[
# # Pass inputs here
# mangio_preset
- # ],
+ # ],
# outputs=[
# # Pass Outputs here. These refer to the gradio elements that we want to directly change
# # sid0,
@@ -2787,14 +2934,16 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
# # f0_file
# ]
# )
- #endregion
+ # endregion
- # with gr.TabItem(i18n("招募音高曲线前端编辑器")):
- # gr.Markdown(value=i18n("加开发群联系我xxxxx"))
- # with gr.TabItem(i18n("点击查看交流、问题反馈群号")):
- # gr.Markdown(value=i18n("xxxxx"))
+ # with gr.TabItem(i18n("招募音高曲线前端编辑器")):
+ # gr.Markdown(value=i18n("加开发群联系我xxxxx"))
+ # with gr.TabItem(i18n("点击查看交流、问题反馈群号")):
+ # gr.Markdown(value=i18n("xxxxx"))
- if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE)
+ if (
+ config.iscolab or config.paperspace
+ ): # Share gradio link for colab and paperspace (FORK FEATURE)
app.queue(concurrency_count=511, max_size=1022).launch(share=True)
else:
app.queue(concurrency_count=511, max_size=1022).launch(
@@ -2804,8 +2953,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
quiet=False,
)
-#endregion
-''' #End of Default-GUI
+# endregion
+""" #End of Default-GUI
with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
gr.HTML(" The Mangio-RVC-Fork 💻
")
@@ -3735,4 +3884,4 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
)
#endregion
-'''
\ No newline at end of file
+"""
diff --git a/my_utils.py b/my_utils.py
index 1990d82..90eeb52 100644
--- a/my_utils.py
+++ b/my_utils.py
@@ -1,11 +1,14 @@
import ffmpeg
import numpy as np
-#import praatio
-#import praatio.praat_scripts
-import os
-#from os.path import join
-#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
+# import praatio
+# import praatio.praat_scripts
+import os
+
+# from os.path import join
+
+# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
+
def load_audio(file, sr, DoFormant, Quefrency, Timbre):
try:
@@ -15,43 +18,47 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
file = (
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
) # 防止小白拷路径头尾带了空格和"和回车
- file_formanted = (
- file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
- )
- with open('formanting.txt', 'r') as fvf:
+ file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+ with open("formanting.txt", "r") as fvf:
content = fvf.readlines()
- if 'True' in content[0].split('\n')[0]:
- #print("true")
+ if "True" in content[0].split("\n")[0]:
+ # print("true")
DoFormant = True
- Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
-
+ Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
+
else:
- #print("not true")
+ # print("not true")
DoFormant = False
-
+
if DoFormant:
- #os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
- #print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
+ # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
+ # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
print("formanting...")
-
- os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
+
+ os.system(
+ 'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"'
+ % (file, Quefrency, Timbre, file_formanted)
+ )
print("formanted!")
- #filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
- #file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
+ # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
+ # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
out, _ = (
- ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
+ ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
- .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+ .run(
+ cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+ )
)
-
- os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
+
+ os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
else:
-
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
- .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+ .run(
+ cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+ )
)
except Exception as e:
raise RuntimeError(f"Failed to load audio: {e}")
diff --git a/train_nsf_sim_cache_sid_load_pretrain.py b/train_nsf_sim_cache_sid_load_pretrain.py
index 5bb3526..4ae7308 100644
--- a/train_nsf_sim_cache_sid_load_pretrain.py
+++ b/train_nsf_sim_cache_sid_load_pretrain.py
@@ -568,10 +568,10 @@ def train_and_evaluate(
),
)
)
-
+
with open("stop.txt", "r+") as tostop:
content = tostop.read()
- if 'stop' in content:
+ if "stop" in content:
logger.info("Stop Button was pressed. The program is closed.")
if hasattr(net_g, "module"):
ckpt = net_g.module.state_dict()
@@ -581,15 +581,21 @@ def train_and_evaluate(
"saving final ckpt:%s"
% (
savee(
- ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
+ ckpt,
+ hps.sample_rate,
+ hps.if_f0,
+ hps.name,
+ epoch,
+ hps.version,
+ hps,
)
)
)
-
+
tostop.truncate(0)
tostop.writelines("not")
os._exit(2333333)
-
+
if rank == 0:
logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record()))
if epoch >= hps.total_epoch and rank == 0:
diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py
index 185cf29..b0ef248 100644
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@@ -24,9 +24,10 @@ Timbre = 0.0
mutex = multiprocessing.Lock()
f = open("%s/preprocess.log" % exp_dir, "a+")
-with open('formanting.txt', 'r') as fvf:
- content = fvf.readlines()
- Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+with open("formanting.txt", "r") as fvf:
+ content = fvf.readlines()
+ Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
+
def println(strr):
mutex.acquire()
@@ -104,12 +105,14 @@ class PreProcess:
idx1 += 1
break
self.norm_write(tmp_audio, idx0, idx1)
- #println("%s->Suc." % path)
+ # println("%s->Suc." % path)
except:
println("%s->%s" % (path, traceback.format_exc()))
def pipeline_mp(self, infos, thread_n):
- for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n):
+ for path, idx0 in tqdm.tqdm(
+ infos, position=thread_n, leave=True, desc="thread:%s" % thread_n
+ ):
self.pipeline(path, idx0)
def pipeline_mp_inp_dir(self, inp_root, n_p):
diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py
index 05ce82c..43cd829 100644
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@@ -1,7 +1,7 @@
import numpy as np, parselmouth, torch, pdb, sys, os
from time import time as ttime
import torch.nn.functional as F
-import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
+import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
from torch import Tensor
import scipy.signal as signal
import pyworld, os, traceback, faiss, librosa, torchcrepe
@@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
input_audio_path2wav = {}
+
@lru_cache
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
audio = input_audio_path2wav[input_audio_path]
@@ -74,24 +75,28 @@ class VC(object):
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
# Get cuda device
if torch.cuda.is_available():
- return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast
+ return torch.device(
+ f"cuda:{index % torch.cuda.device_count()}"
+ ) # Very fast
elif torch.backends.mps.is_available():
return torch.device("mps")
# Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
- # Else wise return the "cpu" as a torch device,
+ # Else wise return the "cpu" as a torch device,
return torch.device("cpu")
# Fork Feature: Compute f0 with the crepe method
def get_f0_crepe_computation(
- self,
- x,
- f0_min,
- f0_max,
- p_len,
- hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
- model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
+ self,
+ x,
+ f0_min,
+ f0_max,
+ p_len,
+ hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
+ model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
):
- x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float.
+ x = x.astype(
+ np.float32
+ ) # fixes the F.conv2D exception. We needed to convert double to float.
x /= np.quantile(np.abs(x), 0.999)
torch_device = self.get_optimal_torch_device()
audio = torch.from_numpy(x).to(torch_device, copy=True)
@@ -109,7 +114,7 @@ class VC(object):
model,
batch_size=hop_length * 2,
device=torch_device,
- pad=True
+ pad=True,
)
p_len = p_len or x.shape[0] // hop_length
# Resize the pitch for final f0
@@ -118,17 +123,17 @@ class VC(object):
target = np.interp(
np.arange(0, len(source) * p_len, len(source)) / p_len,
np.arange(0, len(source)),
- source
+ source,
)
f0 = np.nan_to_num(target)
- return f0 # Resized f0
-
+ return f0 # Resized f0
+
def get_f0_official_crepe_computation(
- self,
- x,
- f0_min,
- f0_max,
- model="full",
+ self,
+ x,
+ f0_min,
+ f0_max,
+ model="full",
):
# Pick a batch size that doesn't cause memory errors on your gpu
batch_size = 512
@@ -153,15 +158,15 @@ class VC(object):
# Fork Feature: Compute pYIN f0 method
def get_f0_pyin_computation(self, x, f0_min, f0_max):
- y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True)
+ y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
- f0 = f0[1:] # Get rid of extra first frame
+ f0 = f0[1:] # Get rid of extra first frame
return f0
# Fork Feature: Acquire median hybrid f0 estimation calculation
def get_f0_hybrid_computation(
- self,
- methods_str,
+ self,
+ methods_str,
input_audio_path,
x,
f0_min,
@@ -173,9 +178,9 @@ class VC(object):
):
# Get various f0 methods from input to use in the computation stack
s = methods_str
- s = s.split('hybrid')[1]
- s = s.replace('[', '').replace(']', '')
- methods = s.split('+')
+ s = s.split("hybrid")[1]
+ s = s.replace("[", "").replace("]", "")
+ methods = s.split("+")
f0_computation_stack = []
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -202,35 +207,39 @@ class VC(object):
)
elif method == "crepe":
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
- f0 = f0[1:] # Get rid of extra first frame
+ f0 = f0[1:] # Get rid of extra first frame
elif method == "crepe-tiny":
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
- f0 = f0[1:] # Get rid of extra first frame
+ f0 = f0[1:] # Get rid of extra first frame
elif method == "mangio-crepe":
- f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+ f0 = self.get_f0_crepe_computation(
+ x, f0_min, f0_max, p_len, crepe_hop_length
+ )
elif method == "mangio-crepe-tiny":
- f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+ f0 = self.get_f0_crepe_computation(
+ x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
+ )
elif method == "harvest":
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if filter_radius > 2:
f0 = signal.medfilt(f0, 3)
- f0 = f0[1:] # Get rid of first frame.
- elif method == "dio": # Potentially buggy?
+ f0 = f0[1:] # Get rid of first frame.
+ elif method == "dio": # Potentially buggy?
f0, t = pyworld.dio(
x.astype(np.double),
fs=self.sr,
f0_ceil=f0_max,
f0_floor=f0_min,
- frame_period=10
+ frame_period=10,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
f0 = signal.medfilt(f0, 3)
f0 = f0[1:]
- #elif method == "pyin": Not Working just yet
+ # elif method == "pyin": Not Working just yet
# f0 = self.get_f0_pyin_computation(x, f0_min, f0_max)
# Push method to the stack
f0_computation_stack.append(f0)
-
+
for fc in f0_computation_stack:
print(len(fc))
@@ -280,13 +289,13 @@ class VC(object):
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
if filter_radius > 2:
f0 = signal.medfilt(f0, 3)
- elif f0_method == "dio": # Potentially Buggy?
+ elif f0_method == "dio": # Potentially Buggy?
f0, t = pyworld.dio(
x.astype(np.double),
fs=self.sr,
f0_ceil=f0_max,
f0_floor=f0_min,
- frame_period=10
+ frame_period=10,
)
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
f0 = signal.medfilt(f0, 3)
@@ -295,12 +304,17 @@ class VC(object):
elif f0_method == "crepe-tiny":
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
elif f0_method == "mangio-crepe":
- f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+ f0 = self.get_f0_crepe_computation(
+ x, f0_min, f0_max, p_len, crepe_hop_length
+ )
elif f0_method == "mangio-crepe-tiny":
- f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+ f0 = self.get_f0_crepe_computation(
+ x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
+ )
elif f0_method == "rmvpe":
if hasattr(self, "model_rmvpe") == False:
from rmvpe import RMVPE
+
print("loading rmvpe model")
self.model_rmvpe = RMVPE(
"rmvpe.pt", is_half=self.is_half, device=self.device
@@ -311,7 +325,7 @@ class VC(object):
# Perform hybrid median pitch estimation
input_audio_path2wav[input_audio_path] = x.astype(np.double)
f0 = self.get_f0_hybrid_computation(
- f0_method,
+ f0_method,
input_audio_path,
x,
f0_min,
@@ -319,7 +333,7 @@ class VC(object):
p_len,
filter_radius,
crepe_hop_length,
- time_step
+ time_step,
)
f0 *= pow(2, f0_up_key / 12)