mirror of
https://github.com/Mangio621/Mangio-RVC-Fork.git
synced 2025-12-16 11:37:44 +01:00
Merge pull request #49 from Mangio621/TRUE-FIXED-FP16
Fix training times by fixing fp16 detection
This commit is contained in:
76
config.py
76
config.py
@@ -1,19 +1,73 @@
|
||||
import argparse
|
||||
import sys
|
||||
import torch
|
||||
import json
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
global usefp16
|
||||
usefp16 = False
|
||||
|
||||
|
||||
def use_fp32_config():
|
||||
usefp16 = False
|
||||
device_capability = 0
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0") # Assuming you have only one GPU (index 0).
|
||||
device_capability = torch.cuda.get_device_capability(device)[0]
|
||||
if device_capability >= 7:
|
||||
usefp16 = True
|
||||
for config_file in ["32k.json", "40k.json", "48k.json"]:
|
||||
with open(f"configs/{config_file}", "r") as d:
|
||||
data = json.load(d)
|
||||
|
||||
if "train" in data and "fp16_run" in data["train"]:
|
||||
data["train"]["fp16_run"] = True
|
||||
|
||||
with open(f"configs/{config_file}", "w") as d:
|
||||
json.dump(data, d, indent=4)
|
||||
|
||||
print(f"Set fp16_run to true in {config_file}")
|
||||
|
||||
with open(
|
||||
"trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
|
||||
) as f:
|
||||
strr = f.read()
|
||||
|
||||
strr = strr.replace("3.0", "3.7")
|
||||
|
||||
with open(
|
||||
"trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
|
||||
) as f:
|
||||
f.write(strr)
|
||||
else:
|
||||
for config_file in ["32k.json", "40k.json", "48k.json"]:
|
||||
with open(f"configs/{config_file}", "r") as f:
|
||||
strr = f.read().replace("true", "false")
|
||||
with open(f"configs/{config_file}", "w") as f:
|
||||
f.write(strr)
|
||||
with open("trainset_preprocess_pipeline_print.py", "r") as f:
|
||||
strr = f.read().replace("3.7", "3.0")
|
||||
with open("trainset_preprocess_pipeline_print.py", "w") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if "train" in data and "fp16_run" in data["train"]:
|
||||
data["train"]["fp16_run"] = False
|
||||
|
||||
with open(f"configs/{config_file}", "w") as d:
|
||||
json.dump(data, d, indent=4)
|
||||
|
||||
print(f"Set fp16_run to false in {config_file}")
|
||||
|
||||
with open(
|
||||
"trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
|
||||
) as f:
|
||||
strr = f.read()
|
||||
|
||||
strr = strr.replace("3.7", "3.0")
|
||||
|
||||
with open(
|
||||
"trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
|
||||
) as f:
|
||||
f.write(strr)
|
||||
else:
|
||||
print(
|
||||
"CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed."
|
||||
)
|
||||
return (usefp16, device_capability)
|
||||
|
||||
|
||||
class Config:
|
||||
@@ -51,10 +105,14 @@ class Config:
|
||||
help="Do not open in browser automatically",
|
||||
)
|
||||
parser.add_argument( # Fork Feature. Paperspace integration for web UI
|
||||
"--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems."
|
||||
"--paperspace",
|
||||
action="store_true",
|
||||
help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.",
|
||||
)
|
||||
parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
|
||||
"--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!"
|
||||
"--is_cli",
|
||||
action="store_true",
|
||||
help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
|
||||
)
|
||||
cmd_opts = parser.parse_args()
|
||||
|
||||
@@ -95,9 +153,9 @@ class Config:
|
||||
):
|
||||
print("Found GPU", self.gpu_name, ", force to fp32")
|
||||
self.is_half = False
|
||||
use_fp32_config()
|
||||
else:
|
||||
print("Found GPU", self.gpu_name)
|
||||
use_fp32_config()
|
||||
self.gpu_mem = int(
|
||||
torch.cuda.get_device_properties(i_device).total_memory
|
||||
/ 1024
|
||||
|
||||
@@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
|
||||
|
||||
DoFormant = False
|
||||
|
||||
with open('formanting.txt', 'r') as fvf:
|
||||
with open("formanting.txt", "r") as fvf:
|
||||
content = fvf.readlines()
|
||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
||||
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||
|
||||
|
||||
def printt(strr):
|
||||
@@ -67,9 +67,9 @@ class FeatureInput(object):
|
||||
):
|
||||
# Get various f0 methods from input to use in the computation stack
|
||||
s = methods_str
|
||||
s = s.split('hybrid')[1]
|
||||
s = s.replace('[', '').replace(']', '')
|
||||
methods = s.split('+')
|
||||
s = s.split("hybrid")[1]
|
||||
s = s.replace("[", "").replace("]", "")
|
||||
methods = s.split("+")
|
||||
f0_computation_stack = []
|
||||
|
||||
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
||||
@@ -99,7 +99,9 @@ class FeatureInput(object):
|
||||
torch_device_index = 0
|
||||
torch_device = None
|
||||
if torch.cuda.is_available():
|
||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
||||
torch_device = torch.device(
|
||||
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||
)
|
||||
elif torch.backends.mps.is_available():
|
||||
torch_device = torch.device("mps")
|
||||
else:
|
||||
@@ -132,7 +134,9 @@ class FeatureInput(object):
|
||||
torch_device_index = 0
|
||||
torch_device = None
|
||||
if torch.cuda.is_available():
|
||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
||||
torch_device = torch.device(
|
||||
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||
)
|
||||
elif torch.backends.mps.is_available():
|
||||
torch_device = torch.device("mps")
|
||||
else:
|
||||
@@ -156,7 +160,7 @@ class FeatureInput(object):
|
||||
"full",
|
||||
batch_size=crepe_hop_length * 2,
|
||||
device=torch_device,
|
||||
pad=True
|
||||
pad=True,
|
||||
)
|
||||
p_len = p_len or x.shape[0] // crepe_hop_length
|
||||
# Resize the pitch
|
||||
@@ -165,7 +169,7 @@ class FeatureInput(object):
|
||||
target = np.interp(
|
||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||
np.arange(0, len(source)),
|
||||
source
|
||||
source,
|
||||
)
|
||||
f0 = np.nan_to_num(target)
|
||||
elif method == "harvest":
|
||||
@@ -236,10 +240,9 @@ class FeatureInput(object):
|
||||
elif f0_method == "rmvpe":
|
||||
if hasattr(self, "model_rmvpe") == False:
|
||||
from rmvpe import RMVPE
|
||||
|
||||
print("loading rmvpe model")
|
||||
self.model_rmvpe = RMVPE(
|
||||
"rmvpe.pt", is_half=False, device="cuda:0"
|
||||
)
|
||||
self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0")
|
||||
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
|
||||
elif f0_method == "dio":
|
||||
f0, t = pyworld.dio(
|
||||
@@ -250,12 +253,16 @@ class FeatureInput(object):
|
||||
frame_period=1000 * self.hop / self.fs,
|
||||
)
|
||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
|
||||
elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction
|
||||
elif (
|
||||
f0_method == "crepe"
|
||||
): # Fork Feature: Added crepe f0 for f0 feature extraction
|
||||
# Pick a batch size that doesn't cause memory errors on your gpu
|
||||
torch_device_index = 0
|
||||
torch_device = None
|
||||
if torch.cuda.is_available():
|
||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
||||
torch_device = torch.device(
|
||||
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||
)
|
||||
elif torch.backends.mps.is_available():
|
||||
torch_device = torch.device("mps")
|
||||
else:
|
||||
@@ -287,7 +294,9 @@ class FeatureInput(object):
|
||||
torch_device_index = 0
|
||||
torch_device = None
|
||||
if torch.cuda.is_available():
|
||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
||||
torch_device = torch.device(
|
||||
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||
)
|
||||
elif torch.backends.mps.is_available():
|
||||
torch_device = torch.device("mps")
|
||||
else:
|
||||
@@ -311,7 +320,7 @@ class FeatureInput(object):
|
||||
"full",
|
||||
batch_size=crepe_hop_length * 2,
|
||||
device=torch_device,
|
||||
pad=True
|
||||
pad=True,
|
||||
)
|
||||
p_len = p_len or x.shape[0] // crepe_hop_length
|
||||
# Resize the pitch
|
||||
@@ -320,7 +329,7 @@ class FeatureInput(object):
|
||||
target = np.interp(
|
||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||
np.arange(0, len(source)),
|
||||
source
|
||||
source,
|
||||
)
|
||||
f0 = np.nan_to_num(target)
|
||||
elif "hybrid" in f0_method: # EXPERIMENTAL
|
||||
@@ -333,7 +342,7 @@ class FeatureInput(object):
|
||||
self.f0_max,
|
||||
p_len,
|
||||
crepe_hop_length,
|
||||
time_step
|
||||
time_step,
|
||||
)
|
||||
# Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...
|
||||
|
||||
@@ -362,14 +371,19 @@ class FeatureInput(object):
|
||||
with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
|
||||
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
|
||||
try:
|
||||
pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length))
|
||||
pbar.set_description(
|
||||
"thread:%s, f0ing, Hop-Length:%s"
|
||||
% (thread_n, crepe_hop_length)
|
||||
)
|
||||
pbar.update(1)
|
||||
if (
|
||||
os.path.exists(opt_path1 + ".npy") == True
|
||||
and os.path.exists(opt_path2 + ".npy") == True
|
||||
):
|
||||
continue
|
||||
featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length)
|
||||
featur_pit = self.compute_f0(
|
||||
inp_path, f0_method, crepe_hop_length
|
||||
)
|
||||
np.save(
|
||||
opt_path2,
|
||||
featur_pit,
|
||||
@@ -382,7 +396,9 @@ class FeatureInput(object):
|
||||
allow_pickle=False,
|
||||
) # ori
|
||||
except:
|
||||
printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
|
||||
printt(
|
||||
"f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -411,12 +427,7 @@ if __name__ == "__main__":
|
||||
for i in range(n_p):
|
||||
p = Process(
|
||||
target=featureInput.go,
|
||||
args=(
|
||||
paths[i::n_p],
|
||||
f0method,
|
||||
extraction_crepe_hop_length,
|
||||
i
|
||||
),
|
||||
args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i),
|
||||
)
|
||||
ps.append(p)
|
||||
p.start()
|
||||
|
||||
37
gui_v0.py
37
gui_v0.py
@@ -51,8 +51,10 @@ class RVC:
|
||||
self.window = 160
|
||||
|
||||
# Get Torch Device
|
||||
if(torch.cuda.is_available()):
|
||||
self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}")
|
||||
if torch.cuda.is_available():
|
||||
self.torch_device = torch.device(
|
||||
f"cuda:{0 % torch.cuda.device_count()}"
|
||||
)
|
||||
elif torch.backends.mps.is_available():
|
||||
self.torch_device = torch.device("mps")
|
||||
else:
|
||||
@@ -150,11 +152,11 @@ class RVC:
|
||||
|
||||
f0 = 0
|
||||
# Here, check f0_methods and get their computations
|
||||
if(self.f0_method == 'harvest'):
|
||||
if self.f0_method == "harvest":
|
||||
f0 = self.get_harvest_computation(x, f0_min, f0_max)
|
||||
elif(self.f0_method == 'reg-crepe'):
|
||||
elif self.f0_method == "reg-crepe":
|
||||
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
|
||||
elif(self.f0_method == 'reg-crepe-tiny'):
|
||||
elif self.f0_method == "reg-crepe-tiny":
|
||||
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||
|
||||
# Calculate f0_course and f0_bak here
|
||||
@@ -328,11 +330,7 @@ class GUI:
|
||||
[
|
||||
sg.Frame(
|
||||
title="Proudly forked by Mangio621",
|
||||
layout=[
|
||||
[
|
||||
sg.Image('./mangio_utils/lol.png')
|
||||
]
|
||||
]
|
||||
layout=[[sg.Image("./mangio_utils/lol.png")]],
|
||||
),
|
||||
sg.Frame(
|
||||
title=i18n("加载模型"),
|
||||
@@ -384,14 +382,16 @@ class GUI:
|
||||
),
|
||||
],
|
||||
],
|
||||
)
|
||||
),
|
||||
],
|
||||
[
|
||||
# Mangio f0 Selection frame Here
|
||||
sg.Frame(
|
||||
layout=[
|
||||
[
|
||||
sg.Radio("Harvest", "f0_method", key="harvest", default=True),
|
||||
sg.Radio(
|
||||
"Harvest", "f0_method", key="harvest", default=True
|
||||
),
|
||||
sg.Radio("Crepe", "f0_method", key="reg-crepe"),
|
||||
sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
|
||||
]
|
||||
@@ -539,17 +539,18 @@ class GUI:
|
||||
# Function that returns the used f0 method in string format "harvest"
|
||||
def get_f0_method_from_radios(self, values):
|
||||
f0_array = [
|
||||
{"name": "harvest", "val": values['harvest']},
|
||||
{"name": "reg-crepe", "val": values['reg-crepe']},
|
||||
{"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']},
|
||||
{"name": "harvest", "val": values["harvest"]},
|
||||
{"name": "reg-crepe", "val": values["reg-crepe"]},
|
||||
{"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]},
|
||||
]
|
||||
# Filter through to find a true value
|
||||
used_f0 = ""
|
||||
for f0 in f0_array:
|
||||
if(f0['val'] == True):
|
||||
used_f0 = f0['name']
|
||||
if f0["val"] == True:
|
||||
used_f0 = f0["name"]
|
||||
break
|
||||
if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
|
||||
if used_f0 == "":
|
||||
used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
|
||||
return used_f0
|
||||
|
||||
def set_values(self, values):
|
||||
|
||||
469
infer-web.py
469
infer-web.py
@@ -13,6 +13,7 @@ import warnings
|
||||
import numpy as np
|
||||
import torch
|
||||
import re
|
||||
|
||||
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
||||
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
|
||||
import logging
|
||||
@@ -60,10 +61,10 @@ DoFormant = False
|
||||
Quefrency = 8.0
|
||||
Timbre = 1.2
|
||||
|
||||
with open('formanting.txt', 'w+') as fsf:
|
||||
with open("formanting.txt", "w+") as fsf:
|
||||
fsf.truncate(0)
|
||||
|
||||
fsf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
|
||||
fsf.writelines([str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"])
|
||||
|
||||
|
||||
config = Config()
|
||||
@@ -176,7 +177,6 @@ for root, dirs, files in os.walk(index_root, topdown=False):
|
||||
|
||||
for root, dirs, files in os.walk(audio_root, topdown=False):
|
||||
for name in files:
|
||||
|
||||
audio_paths.append("%s/%s" % (root, name))
|
||||
|
||||
uvr5_names = []
|
||||
@@ -184,59 +184,66 @@ for name in os.listdir(weight_uvr5_root):
|
||||
if name.endswith(".pth") or "onnx" in name:
|
||||
uvr5_names.append(name.replace(".pth", ""))
|
||||
|
||||
|
||||
def check_for_name():
|
||||
if len(names) > 0:
|
||||
return sorted(names)[0]
|
||||
else:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def get_index():
|
||||
if check_for_name() != '':
|
||||
if check_for_name() != "":
|
||||
chosen_model = sorted(names)[0].split(".")[0]
|
||||
logs_path = "./logs/" + chosen_model
|
||||
if os.path.exists(logs_path):
|
||||
for file in os.listdir(logs_path):
|
||||
if file.endswith(".index"):
|
||||
return os.path.join(logs_path, file).replace('\\','/')
|
||||
return ''
|
||||
return os.path.join(logs_path, file).replace("\\", "/")
|
||||
return ""
|
||||
else:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def get_indexes():
|
||||
for dirpath, dirnames, filenames in os.walk("./logs/"):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".index") and "trained" not in filename:
|
||||
indexes_list.append(os.path.join(dirpath,filename).replace('\\','/'))
|
||||
indexes_list.append(os.path.join(dirpath, filename).replace("\\", "/"))
|
||||
if len(indexes_list) > 0:
|
||||
return indexes_list
|
||||
else:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
fshift_presets_list = []
|
||||
|
||||
|
||||
def get_fshift_presets():
|
||||
fshift_presets_list = []
|
||||
for dirpath, dirnames, filenames in os.walk("./formantshiftcfg/"):
|
||||
for filename in filenames:
|
||||
if filename.endswith(".txt"):
|
||||
fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/'))
|
||||
fshift_presets_list.append(
|
||||
os.path.join(dirpath, filename).replace("\\", "/")
|
||||
)
|
||||
|
||||
if len(fshift_presets_list) > 0:
|
||||
return fshift_presets_list
|
||||
else:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def get_audios():
|
||||
if check_for_name() != '':
|
||||
audios_path= '"' + os.path.abspath(os.getcwd()) + '/audios/'
|
||||
if check_for_name() != "":
|
||||
audios_path = '"' + os.path.abspath(os.getcwd()) + "/audios/"
|
||||
if os.path.exists(audios_path):
|
||||
for file in os.listdir(audios_path):
|
||||
print(audios_path.join(file) + '"')
|
||||
return os.path.join(audios_path, file + '"')
|
||||
return ''
|
||||
return ""
|
||||
else:
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def vc_single(
|
||||
@@ -261,7 +268,7 @@ def vc_single(
|
||||
return "You need to upload an audio", None
|
||||
f0_up_key = int(f0_up_key)
|
||||
try:
|
||||
if input_audio_path0 == '':
|
||||
if input_audio_path0 == "":
|
||||
audio = load_audio(input_audio_path1, 16000, DoFormant, Quefrency, Timbre)
|
||||
|
||||
else:
|
||||
@@ -378,7 +385,7 @@ def vc_multi(
|
||||
resample_sr,
|
||||
rms_mix_rate,
|
||||
protect,
|
||||
crepe_hop_length
|
||||
crepe_hop_length,
|
||||
)
|
||||
if "Success" in info:
|
||||
try:
|
||||
@@ -522,7 +529,11 @@ def get_vc(sid, to_return_protect0, to_return_protect1):
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
cpt = None
|
||||
return ({"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"})
|
||||
return (
|
||||
{"visible": False, "__type__": "update"},
|
||||
{"visible": False, "__type__": "update"},
|
||||
{"visible": False, "__type__": "update"},
|
||||
)
|
||||
person = "%s/%s" % (weight_root, sid)
|
||||
print("loading %s" % person)
|
||||
cpt = torch.load(person, map_location="cpu")
|
||||
@@ -587,11 +598,15 @@ def change_choices():
|
||||
index_paths.append("%s/%s" % (root, name))
|
||||
for file in os.listdir(audios_path):
|
||||
audio_paths.append("%s/%s" % (audio_root, file))
|
||||
return {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(index_paths), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"}
|
||||
return (
|
||||
{"choices": sorted(names), "__type__": "update"},
|
||||
{"choices": sorted(index_paths), "__type__": "update"},
|
||||
{"choices": sorted(audio_paths), "__type__": "update"},
|
||||
)
|
||||
|
||||
|
||||
def clean():
|
||||
return ({"value": "", "__type__": "update"})
|
||||
return {"value": "", "__type__": "update"}
|
||||
|
||||
|
||||
sr_dict = {
|
||||
@@ -624,15 +639,18 @@ def if_done_multi(done, ps):
|
||||
break
|
||||
done[0] = True
|
||||
|
||||
def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button):
|
||||
|
||||
if (cbox):
|
||||
|
||||
def formant_enabled(
|
||||
cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button
|
||||
):
|
||||
if cbox:
|
||||
DoFormant = True
|
||||
with open('formanting.txt', 'w') as fxxf:
|
||||
with open("formanting.txt", "w") as fxxf:
|
||||
fxxf.truncate(0)
|
||||
|
||||
fxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
|
||||
fxxf.writelines(
|
||||
[str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
|
||||
)
|
||||
# print(f"is checked? - {cbox}\ngot {DoFormant}")
|
||||
|
||||
return (
|
||||
@@ -644,14 +662,14 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref
|
||||
{"visible": True, "__type__": "update"},
|
||||
)
|
||||
|
||||
|
||||
else:
|
||||
|
||||
DoFormant = False
|
||||
with open('formanting.txt', 'w') as fxf:
|
||||
with open("formanting.txt", "w") as fxf:
|
||||
fxf.truncate(0)
|
||||
|
||||
fxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
|
||||
fxf.writelines(
|
||||
[str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
|
||||
)
|
||||
# print(f"is checked? - {cbox}\ngot {DoFormant}")
|
||||
return (
|
||||
{"value": False, "__type__": "update"},
|
||||
@@ -669,20 +687,25 @@ def formant_apply(qfrency, tmbre):
|
||||
Timbre = tmbre
|
||||
DoFormant = True
|
||||
|
||||
with open('formanting.txt', 'w') as fxxxf:
|
||||
with open("formanting.txt", "w") as fxxxf:
|
||||
fxxxf.truncate(0)
|
||||
|
||||
fxxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n'])
|
||||
return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"})
|
||||
fxxxf.writelines(
|
||||
[str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]
|
||||
)
|
||||
return (
|
||||
{"value": Quefrency, "__type__": "update"},
|
||||
{"value": Timbre, "__type__": "update"},
|
||||
)
|
||||
|
||||
|
||||
def update_fshift_presets(preset, qfrency, tmbre):
|
||||
|
||||
qfrency, tmbre = preset_apply(preset, qfrency, tmbre)
|
||||
|
||||
if (str(preset) != ''):
|
||||
with open(str(preset), 'r') as p:
|
||||
if str(preset) != "":
|
||||
with open(str(preset), "r") as p:
|
||||
content = p.readlines()
|
||||
qfrency, tmbre = content[0].split('\n')[0], content[1]
|
||||
qfrency, tmbre = content[0].split("\n")[0], content[1]
|
||||
|
||||
formant_apply(qfrency, tmbre)
|
||||
else:
|
||||
@@ -1161,7 +1184,7 @@ def train1key(
|
||||
if_cache_gpu17,
|
||||
if_save_every_weights18,
|
||||
version19,
|
||||
echl
|
||||
echl,
|
||||
):
|
||||
infos = []
|
||||
|
||||
@@ -1202,7 +1225,7 @@ def train1key(
|
||||
model_log_dir,
|
||||
np7,
|
||||
f0method8,
|
||||
echl
|
||||
echl,
|
||||
)
|
||||
yield get_info_str(cmd)
|
||||
p = Popen(cmd, shell=True, cwd=now_dir)
|
||||
@@ -1434,7 +1457,6 @@ def export_onnx(ModelPath, ExportedPath):
|
||||
|
||||
device = "cpu" # 导出时设备(不影响使用模型)
|
||||
|
||||
|
||||
net_g = SynthesizerTrnMsNSFsidM(
|
||||
*cpt["config"], is_half=False, version=cpt.get("version", "v1")
|
||||
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
|
||||
@@ -1476,14 +1498,18 @@ import scipy.io.wavfile as wavfile
|
||||
|
||||
cli_current_page = "HOME"
|
||||
|
||||
|
||||
def cli_split_command(com):
|
||||
exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)'
|
||||
split_array = regex.findall(exp, com)
|
||||
split_array = [group[0] if group[0] else group[1] for group in split_array]
|
||||
return split_array
|
||||
|
||||
|
||||
def execute_generator_function(genObject):
|
||||
for _ in genObject: pass
|
||||
for _ in genObject:
|
||||
pass
|
||||
|
||||
|
||||
def cli_infer(com):
|
||||
# get VC first
|
||||
@@ -1527,13 +1553,24 @@ def cli_infer(com):
|
||||
crepe_hop_length,
|
||||
)
|
||||
if "Success." in conversion_data[0]:
|
||||
print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name))
|
||||
wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1])
|
||||
print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name))
|
||||
print(
|
||||
"Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..."
|
||||
% ("audio-outputs", output_file_name)
|
||||
)
|
||||
wavfile.write(
|
||||
"%s/%s" % ("audio-outputs", output_file_name),
|
||||
conversion_data[1][0],
|
||||
conversion_data[1][1],
|
||||
)
|
||||
print(
|
||||
"Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s"
|
||||
% ("audio-outputs", output_file_name)
|
||||
)
|
||||
else:
|
||||
print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ")
|
||||
print(conversion_data[0])
|
||||
|
||||
|
||||
def cli_pre_process(com):
|
||||
com = cli_split_command(com)
|
||||
model_name = com[0]
|
||||
@@ -1543,14 +1580,12 @@ def cli_pre_process(com):
|
||||
|
||||
print("Mangio-RVC-Fork Pre-process: Starting...")
|
||||
generator = preprocess_dataset(
|
||||
trainset_directory,
|
||||
model_name,
|
||||
sample_rate,
|
||||
num_processes
|
||||
trainset_directory, model_name, sample_rate, num_processes
|
||||
)
|
||||
execute_generator_function(generator)
|
||||
print("Mangio-RVC-Fork Pre-process: Finished")
|
||||
|
||||
|
||||
def cli_extract_feature(com):
|
||||
com = cli_split_command(com)
|
||||
model_name = com[0]
|
||||
@@ -1571,11 +1606,12 @@ def cli_extract_feature(com):
|
||||
has_pitch_guidance,
|
||||
model_name,
|
||||
version,
|
||||
crepe_hop_length
|
||||
crepe_hop_length,
|
||||
)
|
||||
execute_generator_function(generator)
|
||||
print("Mangio-RVC-Fork Feature Extraction: Finished")
|
||||
|
||||
|
||||
def cli_train(com):
|
||||
com = cli_split_command(com)
|
||||
model_name = com[0]
|
||||
@@ -1611,21 +1647,20 @@ def cli_train(com):
|
||||
gpu_card_slot_numbers,
|
||||
if_cache_gpu,
|
||||
if_save_every_weight,
|
||||
version
|
||||
version,
|
||||
)
|
||||
|
||||
|
||||
def cli_train_feature(com):
|
||||
com = cli_split_command(com)
|
||||
model_name = com[0]
|
||||
version = com[1]
|
||||
print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait")
|
||||
generator = train_index(
|
||||
model_name,
|
||||
version
|
||||
)
|
||||
generator = train_index(model_name, version)
|
||||
execute_generator_function(generator)
|
||||
print("Mangio-RVC-Fork Train Feature Index-CLI: Done!")
|
||||
|
||||
|
||||
def cli_extract_model(com):
|
||||
com = cli_split_command(com)
|
||||
model_path = com[0]
|
||||
@@ -1635,12 +1670,7 @@ def cli_extract_model(com):
|
||||
info = com[4]
|
||||
version = com[5]
|
||||
extract_small_model_process = extract_small_model(
|
||||
model_path,
|
||||
save_name,
|
||||
sample_rate,
|
||||
has_pitch_guidance,
|
||||
info,
|
||||
version
|
||||
model_path, save_name, sample_rate, has_pitch_guidance, info, version
|
||||
)
|
||||
if extract_small_model_process == "Success.":
|
||||
print("Mangio-RVC-Fork Extract Small Model: Success!")
|
||||
@@ -1650,40 +1680,64 @@ def cli_extract_model(com):
|
||||
|
||||
|
||||
def preset_apply(preset, qfer, tmbr):
|
||||
if str(preset) != '':
|
||||
with open(str(preset), 'r') as p:
|
||||
if str(preset) != "":
|
||||
with open(str(preset), "r") as p:
|
||||
content = p.readlines()
|
||||
qfer, tmbr = content[0].split('\n')[0], content[1]
|
||||
qfer, tmbr = content[0].split("\n")[0], content[1]
|
||||
|
||||
formant_apply(qfer, tmbr)
|
||||
else:
|
||||
pass
|
||||
return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"})
|
||||
return (
|
||||
{"value": qfer, "__type__": "update"},
|
||||
{"value": tmbr, "__type__": "update"},
|
||||
)
|
||||
|
||||
|
||||
def print_page_details():
|
||||
if cli_current_page == "HOME":
|
||||
print(" go home : Takes you back to home with a navigation list.")
|
||||
print(" go infer : Takes you to inference command execution.\n")
|
||||
print(" go pre-process : Takes you to training step.1) pre-process command execution.")
|
||||
print(" go extract-feature : Takes you to training step.2) extract-feature command execution.")
|
||||
print(" go train : Takes you to training step.3) being or continue training command execution.")
|
||||
print(" go train-feature : Takes you to the train feature index command execution.\n")
|
||||
print(" go extract-model : Takes you to the extract small model command execution.")
|
||||
print(
|
||||
" go pre-process : Takes you to training step.1) pre-process command execution."
|
||||
)
|
||||
print(
|
||||
" go extract-feature : Takes you to training step.2) extract-feature command execution."
|
||||
)
|
||||
print(
|
||||
" go train : Takes you to training step.3) being or continue training command execution."
|
||||
)
|
||||
print(
|
||||
" go train-feature : Takes you to the train feature index command execution.\n"
|
||||
)
|
||||
print(
|
||||
" go extract-model : Takes you to the extract small model command execution."
|
||||
)
|
||||
elif cli_current_page == "INFER":
|
||||
print(" arg 1) model name with .pth in ./weights: mi-test.pth")
|
||||
print(" arg 2) source audio path: myFolder\\MySource.wav")
|
||||
print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav")
|
||||
print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index")
|
||||
print(
|
||||
" arg 3) output file name to be placed in './audio-outputs': MyTest.wav"
|
||||
)
|
||||
print(
|
||||
" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index"
|
||||
)
|
||||
print(" arg 5) speaker id: 0")
|
||||
print(" arg 6) transposition: 0")
|
||||
print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)")
|
||||
print(
|
||||
" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)"
|
||||
)
|
||||
print(" arg 8) crepe hop length: 160")
|
||||
print(" arg 9) harvest median filter radius: 3 (0-7)")
|
||||
print(" arg 10) post resample rate: 0")
|
||||
print(" arg 11) mix volume envelope: 1")
|
||||
print(" arg 12) feature index ratio: 0.78 (0-1)")
|
||||
print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n")
|
||||
print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33")
|
||||
print(
|
||||
" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n"
|
||||
)
|
||||
print(
|
||||
"Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33"
|
||||
)
|
||||
elif cli_current_page == "PRE-PROCESS":
|
||||
print(" arg 1) Model folder name in ./logs: mi-test")
|
||||
print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set")
|
||||
@@ -1709,8 +1763,12 @@ def print_page_details():
|
||||
print(" arg 7) Batch size: 8")
|
||||
print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)")
|
||||
print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)")
|
||||
print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)")
|
||||
print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)")
|
||||
print(
|
||||
" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)"
|
||||
)
|
||||
print(
|
||||
" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)"
|
||||
)
|
||||
print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n")
|
||||
print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2")
|
||||
elif cli_current_page == "TRAIN-FEATURE":
|
||||
@@ -1724,14 +1782,18 @@ def print_page_details():
|
||||
print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)")
|
||||
print(' arg 5) Model information: "My Model"')
|
||||
print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n")
|
||||
print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2')
|
||||
print(
|
||||
'Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2'
|
||||
)
|
||||
print("")
|
||||
|
||||
|
||||
def change_page(page):
|
||||
global cli_current_page
|
||||
cli_current_page = page
|
||||
return 0
|
||||
|
||||
|
||||
def execute_command(com):
|
||||
if com == "go home":
|
||||
return change_page("HOME")
|
||||
@@ -1765,6 +1827,7 @@ def execute_command(com):
|
||||
elif cli_current_page == "EXTRACT-MODEL":
|
||||
cli_extract_model(com)
|
||||
|
||||
|
||||
def cli_navigation_loop():
|
||||
while True:
|
||||
print("You are currently in '%s':" % cli_current_page)
|
||||
@@ -1775,45 +1838,56 @@ def cli_navigation_loop():
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
|
||||
if(config.is_cli):
|
||||
|
||||
if config.is_cli:
|
||||
print("\n\nMangio-RVC-Fork v2 CLI App!\n")
|
||||
print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n")
|
||||
print(
|
||||
"Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n"
|
||||
)
|
||||
cli_navigation_loop()
|
||||
|
||||
# endregion
|
||||
|
||||
# region RVC WebUI App
|
||||
|
||||
|
||||
def get_presets():
|
||||
data = None
|
||||
with open('../inference-presets.json', 'r') as file:
|
||||
with open("../inference-presets.json", "r") as file:
|
||||
data = json.load(file)
|
||||
preset_names = []
|
||||
for preset in data['presets']:
|
||||
preset_names.append(preset['name'])
|
||||
for preset in data["presets"]:
|
||||
preset_names.append(preset["name"])
|
||||
|
||||
return preset_names
|
||||
|
||||
|
||||
def match_index(sid0):
|
||||
picked = False
|
||||
# folder = sid0.split('.')[0]
|
||||
|
||||
# folder = re.split(r'. |_', sid0)[0]
|
||||
folder = sid0.split('.')[0].split('_')[0]
|
||||
folder = sid0.split(".")[0].split("_")[0]
|
||||
# folder_test = sid0.split('.')[0].split('_')[0].split('-')[0]
|
||||
parent_dir = "./logs/" + folder
|
||||
# print(parent_dir)
|
||||
if os.path.exists(parent_dir):
|
||||
# print('path exists')
|
||||
for filename in os.listdir(parent_dir.replace('\\','/')):
|
||||
for filename in os.listdir(parent_dir.replace("\\", "/")):
|
||||
if filename.endswith(".index"):
|
||||
for i in range(len(indexes_list)):
|
||||
if indexes_list[i] == (os.path.join(("./logs/" + folder), filename).replace('\\','/')):
|
||||
print('regular index found')
|
||||
if indexes_list[i] == (
|
||||
os.path.join(("./logs/" + folder), filename).replace("\\", "/")
|
||||
):
|
||||
print("regular index found")
|
||||
break
|
||||
else:
|
||||
if indexes_list[i] == (os.path.join(("./logs/" + folder.lower()), filename).replace('\\','/')):
|
||||
print('lowered index found')
|
||||
if indexes_list[i] == (
|
||||
os.path.join(
|
||||
("./logs/" + folder.lower()), filename
|
||||
).replace("\\", "/")
|
||||
):
|
||||
print("lowered index found")
|
||||
parent_dir = "./logs/" + folder.lower()
|
||||
break
|
||||
# elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()):
|
||||
@@ -1833,26 +1907,25 @@ def match_index(sid0):
|
||||
# continue
|
||||
|
||||
# print('all done')
|
||||
index_path=os.path.join(parent_dir.replace('\\','/'), filename.replace('\\','/')).replace('\\','/')
|
||||
index_path = os.path.join(
|
||||
parent_dir.replace("\\", "/"), filename.replace("\\", "/")
|
||||
).replace("\\", "/")
|
||||
# print(index_path)
|
||||
return (index_path, index_path)
|
||||
|
||||
|
||||
else:
|
||||
# print('nothing found')
|
||||
return ('', '')
|
||||
return ("", "")
|
||||
|
||||
|
||||
def choveraudio():
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def stoptraining(mim):
|
||||
if int(mim) == 1:
|
||||
|
||||
with open("stop.txt", "w+") as tostops:
|
||||
|
||||
|
||||
tostops.writelines('stop')
|
||||
tostops.writelines("stop")
|
||||
# p.terminate()
|
||||
# p.kill()
|
||||
os.kill(PID, signal.SIGTERM)
|
||||
@@ -1864,8 +1937,9 @@ def stoptraining(mim):
|
||||
{"visible": True, "__type__": "update"},
|
||||
)
|
||||
|
||||
|
||||
# Default-GUI
|
||||
with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
with gr.Blocks(theme="HaleyCH/HaleyCH_Theme") as app:
|
||||
gr.HTML("<h1> The Mangio-RVC-Fork 💻 </h1>")
|
||||
gr.Markdown(
|
||||
value=i18n(
|
||||
@@ -1873,7 +1947,6 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
)
|
||||
with gr.Tabs():
|
||||
|
||||
with gr.TabItem(i18n("模型推理")):
|
||||
# Inference Preset Row
|
||||
# with gr.Row():
|
||||
@@ -1885,13 +1958,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
|
||||
# Other RVC stuff
|
||||
with gr.Row():
|
||||
|
||||
# sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=check_for_name())
|
||||
sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value='')
|
||||
sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value="")
|
||||
# input_audio_path2
|
||||
|
||||
|
||||
refresh_button = gr.Button(i18n("Refresh voice list, index path and audio files"), variant="primary")
|
||||
refresh_button = gr.Button(
|
||||
i18n("Refresh voice list, index path and audio files"),
|
||||
variant="primary",
|
||||
)
|
||||
clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
|
||||
spk_item = gr.Slider(
|
||||
minimum=0,
|
||||
@@ -1914,21 +1988,38 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
|
||||
)
|
||||
input_audio0 = gr.Textbox(
|
||||
label=i18n("Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:"),
|
||||
value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/" + "audio.wav",
|
||||
label=i18n(
|
||||
"Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:"
|
||||
),
|
||||
value=os.path.abspath(os.getcwd()).replace("\\", "/")
|
||||
+ "/audios/"
|
||||
+ "audio.wav",
|
||||
)
|
||||
input_audio1 = gr.Dropdown(
|
||||
label=i18n("Auto detect audio path and select from the dropdown:"),
|
||||
label=i18n(
|
||||
"Auto detect audio path and select from the dropdown:"
|
||||
),
|
||||
choices=sorted(audio_paths),
|
||||
value=get_audios(),
|
||||
interactive=True,
|
||||
)
|
||||
input_audio1.change(fn=choveraudio,inputs=[],outputs=[input_audio0])
|
||||
input_audio1.change(
|
||||
fn=choveraudio, inputs=[], outputs=[input_audio0]
|
||||
)
|
||||
f0method0 = gr.Radio(
|
||||
label=i18n(
|
||||
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
|
||||
),
|
||||
choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"], # Fork Feature. Add Crepe-Tiny
|
||||
choices=[
|
||||
"pm",
|
||||
"harvest",
|
||||
"dio",
|
||||
"crepe",
|
||||
"crepe-tiny",
|
||||
"mangio-crepe",
|
||||
"mangio-crepe-tiny",
|
||||
"rmvpe",
|
||||
], # Fork Feature. Add Crepe-Tiny
|
||||
value="rmvpe",
|
||||
interactive=True,
|
||||
)
|
||||
@@ -1938,7 +2029,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
step=1,
|
||||
label=i18n("crepe_hop_length"),
|
||||
value=120,
|
||||
interactive=True
|
||||
interactive=True,
|
||||
)
|
||||
filter_radius0 = gr.Slider(
|
||||
minimum=0,
|
||||
@@ -1964,11 +2055,10 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
# sid0.select(fn=match_index, inputs=sid0, outputs=file_index2)
|
||||
|
||||
|
||||
|
||||
|
||||
refresh_button.click(
|
||||
fn=change_choices, inputs=[], outputs=[sid0, file_index2, input_audio1]
|
||||
fn=change_choices,
|
||||
inputs=[],
|
||||
outputs=[sid0, file_index2, input_audio1],
|
||||
)
|
||||
# file_big_npy1 = gr.Textbox(
|
||||
# label=i18n("特征文件路径"),
|
||||
@@ -2017,12 +2107,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
|
||||
formant_preset = gr.Dropdown(
|
||||
value='',
|
||||
value="",
|
||||
choices=get_fshift_presets(),
|
||||
label="browse presets for formanting",
|
||||
visible=False,
|
||||
)
|
||||
formant_refresh_button = gr.Button(value='\U0001f504', visible=False,variant='primary')
|
||||
formant_refresh_button = gr.Button(
|
||||
value="\U0001f504", visible=False, variant="primary"
|
||||
)
|
||||
# formant_refresh_button = ToolButton( elem_id='1')
|
||||
# create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets")
|
||||
|
||||
@@ -2045,11 +2137,41 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
interactive=True,
|
||||
)
|
||||
|
||||
formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre])
|
||||
formant_preset.change(
|
||||
fn=preset_apply,
|
||||
inputs=[formant_preset, qfrency, tmbre],
|
||||
outputs=[qfrency, tmbre],
|
||||
)
|
||||
frmntbut = gr.Button("Apply", variant="primary", visible=False)
|
||||
formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
|
||||
frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
|
||||
formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
|
||||
formanting.change(
|
||||
fn=formant_enabled,
|
||||
inputs=[
|
||||
formanting,
|
||||
qfrency,
|
||||
tmbre,
|
||||
frmntbut,
|
||||
formant_preset,
|
||||
formant_refresh_button,
|
||||
],
|
||||
outputs=[
|
||||
formanting,
|
||||
qfrency,
|
||||
tmbre,
|
||||
frmntbut,
|
||||
formant_preset,
|
||||
formant_refresh_button,
|
||||
],
|
||||
)
|
||||
frmntbut.click(
|
||||
fn=formant_apply,
|
||||
inputs=[qfrency, tmbre],
|
||||
outputs=[qfrency, tmbre],
|
||||
)
|
||||
formant_refresh_button.click(
|
||||
fn=update_fshift_presets,
|
||||
inputs=[formant_preset, qfrency, tmbre],
|
||||
outputs=[formant_preset, qfrency, tmbre],
|
||||
)
|
||||
##formant_refresh_button.click(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre])
|
||||
##formant_refresh_button.click(fn=update_fshift_presets, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre])
|
||||
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
|
||||
@@ -2074,7 +2196,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
resample_sr0,
|
||||
rms_mix_rate0,
|
||||
protect0,
|
||||
crepe_hop_length
|
||||
crepe_hop_length,
|
||||
],
|
||||
[vc_output1, vc_output2],
|
||||
)
|
||||
@@ -2116,7 +2238,11 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
value=get_index(),
|
||||
interactive=True,
|
||||
)
|
||||
sid0.select(fn=match_index, inputs=[sid0], outputs=[file_index2, file_index4])
|
||||
sid0.select(
|
||||
fn=match_index,
|
||||
inputs=[sid0],
|
||||
outputs=[file_index2, file_index4],
|
||||
)
|
||||
refresh_button.click(
|
||||
fn=lambda: change_choices()[1],
|
||||
inputs=[],
|
||||
@@ -2163,7 +2289,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
with gr.Column():
|
||||
dir_input = gr.Textbox(
|
||||
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
|
||||
value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/",
|
||||
value=os.path.abspath(os.getcwd()).replace("\\", "/")
|
||||
+ "/audios/",
|
||||
)
|
||||
inputs = gr.File(
|
||||
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
|
||||
@@ -2226,7 +2353,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
with gr.Column():
|
||||
dir_wav_input = gr.Textbox(
|
||||
label=i18n("输入待处理音频文件夹路径"),
|
||||
value=((os.getcwd()).replace('\\', '/') + "/audios/")
|
||||
value=((os.getcwd()).replace("\\", "/") + "/audios/"),
|
||||
)
|
||||
wav_inputs = gr.File(
|
||||
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
|
||||
@@ -2312,7 +2439,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
with gr.Row():
|
||||
trainset_dir4 = gr.Textbox(
|
||||
label=i18n("输入训练文件夹路径"), value=os.path.abspath(os.getcwd()) + "\\datasets\\"
|
||||
label=i18n("输入训练文件夹路径"),
|
||||
value=os.path.abspath(os.getcwd()) + "\\datasets\\",
|
||||
)
|
||||
spk_id5 = gr.Slider(
|
||||
minimum=0,
|
||||
@@ -2342,7 +2470,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
label=i18n(
|
||||
"选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢"
|
||||
),
|
||||
choices=["pm", "harvest", "dio", "crepe", "mangio-crepe", "rmvpe"], # Fork feature: Crepe on f0 extraction for training.
|
||||
choices=[
|
||||
"pm",
|
||||
"harvest",
|
||||
"dio",
|
||||
"crepe",
|
||||
"mangio-crepe",
|
||||
"rmvpe",
|
||||
], # Fork feature: Crepe on f0 extraction for training.
|
||||
value="rmvpe",
|
||||
interactive=True,
|
||||
)
|
||||
@@ -2352,13 +2487,21 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
step=1,
|
||||
label=i18n("crepe_hop_length"),
|
||||
value=64,
|
||||
interactive=True
|
||||
interactive=True,
|
||||
)
|
||||
but2 = gr.Button(i18n("特征提取"), variant="primary")
|
||||
info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
|
||||
but2.click(
|
||||
extract_f0_feature,
|
||||
[gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length],
|
||||
[
|
||||
gpus6,
|
||||
np7,
|
||||
f0method8,
|
||||
if_f0_3,
|
||||
exp_dir1,
|
||||
version19,
|
||||
extraction_crepe_hop_length,
|
||||
],
|
||||
[info2],
|
||||
)
|
||||
with gr.Group():
|
||||
@@ -2443,13 +2586,20 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
butstop = gr.Button(
|
||||
"Stop Training",
|
||||
variant='primary',
|
||||
variant="primary",
|
||||
visible=False,
|
||||
)
|
||||
but3 = gr.Button(i18n("训练模型"), variant="primary", visible=True)
|
||||
but3.click(fn=stoptraining, inputs=[gr.Number(value=0, visible=False)], outputs=[but3, butstop])
|
||||
butstop.click(fn=stoptraining, inputs=[gr.Number(value=1, visible=False)], outputs=[butstop, but3])
|
||||
|
||||
but3.click(
|
||||
fn=stoptraining,
|
||||
inputs=[gr.Number(value=0, visible=False)],
|
||||
outputs=[but3, butstop],
|
||||
)
|
||||
butstop.click(
|
||||
fn=stoptraining,
|
||||
inputs=[gr.Number(value=1, visible=False)],
|
||||
outputs=[butstop, but3],
|
||||
)
|
||||
|
||||
but4 = gr.Button(i18n("训练特征索引"), variant="primary")
|
||||
# but5 = gr.Button(i18n("一键训练"), variant="primary")
|
||||
@@ -2478,8 +2628,6 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
|
||||
but4.click(train_index, [exp_dir1, version19], info3)
|
||||
|
||||
|
||||
|
||||
# but5.click(
|
||||
# train1key,
|
||||
# [
|
||||
@@ -2601,7 +2749,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
ckpt_path2 = gr.Textbox(
|
||||
lines=3,
|
||||
label=i18n("模型路径"),
|
||||
value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/logs/[YOUR_MODEL]/G_23333.pth",
|
||||
value=os.path.abspath(os.getcwd()).replace("\\", "/")
|
||||
+ "/logs/[YOUR_MODEL]/G_23333.pth",
|
||||
interactive=True,
|
||||
)
|
||||
save_name = gr.Textbox(
|
||||
@@ -2665,7 +2814,6 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
except:
|
||||
gr.Markdown(traceback.format_exc())
|
||||
|
||||
|
||||
# region Mangio Preset Handler Region
|
||||
def save_preset(
|
||||
preset_name,
|
||||
@@ -2682,45 +2830,44 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
resample_sr,
|
||||
rms_mix_rate,
|
||||
protect,
|
||||
f0_file
|
||||
f0_file,
|
||||
):
|
||||
data = None
|
||||
with open('../inference-presets.json', 'r') as file:
|
||||
with open("../inference-presets.json", "r") as file:
|
||||
data = json.load(file)
|
||||
preset_json = {
|
||||
'name': preset_name,
|
||||
'model': sid0,
|
||||
'transpose': vc_transform,
|
||||
'audio_file': input_audio0,
|
||||
'auto_audio_file': input_audio1,
|
||||
'f0_method': f0method,
|
||||
'crepe_hop_length': crepe_hop_length,
|
||||
'median_filtering': filter_radius,
|
||||
'feature_path': file_index1,
|
||||
'auto_feature_path': file_index2,
|
||||
'search_feature_ratio': index_rate,
|
||||
'resample': resample_sr,
|
||||
'volume_envelope': rms_mix_rate,
|
||||
'protect_voiceless': protect,
|
||||
'f0_file_path': f0_file
|
||||
"name": preset_name,
|
||||
"model": sid0,
|
||||
"transpose": vc_transform,
|
||||
"audio_file": input_audio0,
|
||||
"auto_audio_file": input_audio1,
|
||||
"f0_method": f0method,
|
||||
"crepe_hop_length": crepe_hop_length,
|
||||
"median_filtering": filter_radius,
|
||||
"feature_path": file_index1,
|
||||
"auto_feature_path": file_index2,
|
||||
"search_feature_ratio": index_rate,
|
||||
"resample": resample_sr,
|
||||
"volume_envelope": rms_mix_rate,
|
||||
"protect_voiceless": protect,
|
||||
"f0_file_path": f0_file,
|
||||
}
|
||||
data['presets'].append(preset_json)
|
||||
with open('../inference-presets.json', 'w') as file:
|
||||
data["presets"].append(preset_json)
|
||||
with open("../inference-presets.json", "w") as file:
|
||||
json.dump(data, file)
|
||||
file.flush()
|
||||
print("Saved Preset %s into inference-presets.json!" % preset_name)
|
||||
|
||||
|
||||
def on_preset_changed(preset_name):
|
||||
print("Changed Preset to %s!" % preset_name)
|
||||
data = None
|
||||
with open('../inference-presets.json', 'r') as file:
|
||||
with open("../inference-presets.json", "r") as file:
|
||||
data = json.load(file)
|
||||
|
||||
print("Searching for " + preset_name)
|
||||
returning_preset = None
|
||||
for preset in data['presets']:
|
||||
if(preset['name'] == preset_name):
|
||||
for preset in data["presets"]:
|
||||
if preset["name"] == preset_name:
|
||||
print("Found a preset")
|
||||
returning_preset = preset
|
||||
# return all new input values
|
||||
@@ -2794,7 +2941,9 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
# with gr.TabItem(i18n("点击查看交流、问题反馈群号")):
|
||||
# gr.Markdown(value=i18n("xxxxx"))
|
||||
|
||||
if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE)
|
||||
if (
|
||||
config.iscolab or config.paperspace
|
||||
): # Share gradio link for colab and paperspace (FORK FEATURE)
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(share=True)
|
||||
else:
|
||||
app.queue(concurrency_count=511, max_size=1022).launch(
|
||||
@@ -2805,7 +2954,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
|
||||
# endregion
|
||||
''' #End of Default-GUI
|
||||
""" #End of Default-GUI
|
||||
|
||||
with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
gr.HTML("<h1> The Mangio-RVC-Fork 💻 </h1>")
|
||||
@@ -3735,4 +3884,4 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:
|
||||
)
|
||||
|
||||
#endregion
|
||||
'''
|
||||
"""
|
||||
|
||||
31
my_utils.py
31
my_utils.py
@@ -1,12 +1,15 @@
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
|
||||
# import praatio
|
||||
# import praatio.praat_scripts
|
||||
import os
|
||||
|
||||
# from os.path import join
|
||||
|
||||
# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
|
||||
|
||||
|
||||
def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
||||
try:
|
||||
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
||||
@@ -15,15 +18,13 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
||||
file = (
|
||||
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||
) # 防止小白拷路径头尾带了空格和"和回车
|
||||
file_formanted = (
|
||||
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||
)
|
||||
with open('formanting.txt', 'r') as fvf:
|
||||
file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||
with open("formanting.txt", "r") as fvf:
|
||||
content = fvf.readlines()
|
||||
if 'True' in content[0].split('\n')[0]:
|
||||
if "True" in content[0].split("\n")[0]:
|
||||
# print("true")
|
||||
DoFormant = True
|
||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
||||
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||
|
||||
else:
|
||||
# print("not true")
|
||||
@@ -34,24 +35,30 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
||||
# print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
|
||||
print("formanting...")
|
||||
|
||||
os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
|
||||
os.system(
|
||||
'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"'
|
||||
% (file, Quefrency, Timbre, file_formanted)
|
||||
)
|
||||
print("formanted!")
|
||||
# filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
|
||||
# file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
|
||||
|
||||
out, _ = (
|
||||
ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
|
||||
ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
|
||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
.run(
|
||||
cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
|
||||
)
|
||||
)
|
||||
|
||||
os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
|
||||
os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
|
||||
else:
|
||||
|
||||
out, _ = (
|
||||
ffmpeg.input(file, threads=0)
|
||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
||||
.run(
|
||||
cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to load audio: {e}")
|
||||
|
||||
@@ -571,7 +571,7 @@ def train_and_evaluate(
|
||||
|
||||
with open("stop.txt", "r+") as tostop:
|
||||
content = tostop.read()
|
||||
if 'stop' in content:
|
||||
if "stop" in content:
|
||||
logger.info("Stop Button was pressed. The program is closed.")
|
||||
if hasattr(net_g, "module"):
|
||||
ckpt = net_g.module.state_dict()
|
||||
@@ -581,7 +581,13 @@ def train_and_evaluate(
|
||||
"saving final ckpt:%s"
|
||||
% (
|
||||
savee(
|
||||
ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
|
||||
ckpt,
|
||||
hps.sample_rate,
|
||||
hps.if_f0,
|
||||
hps.name,
|
||||
epoch,
|
||||
hps.version,
|
||||
hps,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -24,9 +24,10 @@ Timbre = 0.0
|
||||
mutex = multiprocessing.Lock()
|
||||
f = open("%s/preprocess.log" % exp_dir, "a+")
|
||||
|
||||
with open('formanting.txt', 'r') as fvf:
|
||||
with open("formanting.txt", "r") as fvf:
|
||||
content = fvf.readlines()
|
||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
||||
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||
|
||||
|
||||
def println(strr):
|
||||
mutex.acquire()
|
||||
@@ -109,7 +110,9 @@ class PreProcess:
|
||||
println("%s->%s" % (path, traceback.format_exc()))
|
||||
|
||||
def pipeline_mp(self, infos, thread_n):
|
||||
for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n):
|
||||
for path, idx0 in tqdm.tqdm(
|
||||
infos, position=thread_n, leave=True, desc="thread:%s" % thread_n
|
||||
):
|
||||
self.pipeline(path, idx0)
|
||||
|
||||
def pipeline_mp_inp_dir(self, inp_root, n_p):
|
||||
|
||||
@@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
|
||||
|
||||
input_audio_path2wav = {}
|
||||
|
||||
|
||||
@lru_cache
|
||||
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
||||
audio = input_audio_path2wav[input_audio_path]
|
||||
@@ -74,7 +75,9 @@ class VC(object):
|
||||
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
|
||||
# Get cuda device
|
||||
if torch.cuda.is_available():
|
||||
return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast
|
||||
return torch.device(
|
||||
f"cuda:{index % torch.cuda.device_count()}"
|
||||
) # Very fast
|
||||
elif torch.backends.mps.is_available():
|
||||
return torch.device("mps")
|
||||
# Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
|
||||
@@ -91,7 +94,9 @@ class VC(object):
|
||||
hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
|
||||
model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
|
||||
):
|
||||
x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float.
|
||||
x = x.astype(
|
||||
np.float32
|
||||
) # fixes the F.conv2D exception. We needed to convert double to float.
|
||||
x /= np.quantile(np.abs(x), 0.999)
|
||||
torch_device = self.get_optimal_torch_device()
|
||||
audio = torch.from_numpy(x).to(torch_device, copy=True)
|
||||
@@ -109,7 +114,7 @@ class VC(object):
|
||||
model,
|
||||
batch_size=hop_length * 2,
|
||||
device=torch_device,
|
||||
pad=True
|
||||
pad=True,
|
||||
)
|
||||
p_len = p_len or x.shape[0] // hop_length
|
||||
# Resize the pitch for final f0
|
||||
@@ -118,7 +123,7 @@ class VC(object):
|
||||
target = np.interp(
|
||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||
np.arange(0, len(source)),
|
||||
source
|
||||
source,
|
||||
)
|
||||
f0 = np.nan_to_num(target)
|
||||
return f0 # Resized f0
|
||||
@@ -153,7 +158,7 @@ class VC(object):
|
||||
|
||||
# Fork Feature: Compute pYIN f0 method
|
||||
def get_f0_pyin_computation(self, x, f0_min, f0_max):
|
||||
y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True)
|
||||
y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
|
||||
f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
|
||||
f0 = f0[1:] # Get rid of extra first frame
|
||||
return f0
|
||||
@@ -173,9 +178,9 @@ class VC(object):
|
||||
):
|
||||
# Get various f0 methods from input to use in the computation stack
|
||||
s = methods_str
|
||||
s = s.split('hybrid')[1]
|
||||
s = s.replace('[', '').replace(']', '')
|
||||
methods = s.split('+')
|
||||
s = s.split("hybrid")[1]
|
||||
s = s.replace("[", "").replace("]", "")
|
||||
methods = s.split("+")
|
||||
f0_computation_stack = []
|
||||
|
||||
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
||||
@@ -207,9 +212,13 @@ class VC(object):
|
||||
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||
f0 = f0[1:] # Get rid of extra first frame
|
||||
elif method == "mangio-crepe":
|
||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
|
||||
f0 = self.get_f0_crepe_computation(
|
||||
x, f0_min, f0_max, p_len, crepe_hop_length
|
||||
)
|
||||
elif method == "mangio-crepe-tiny":
|
||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
|
||||
f0 = self.get_f0_crepe_computation(
|
||||
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
||||
)
|
||||
elif method == "harvest":
|
||||
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
|
||||
if filter_radius > 2:
|
||||
@@ -221,7 +230,7 @@ class VC(object):
|
||||
fs=self.sr,
|
||||
f0_ceil=f0_max,
|
||||
f0_floor=f0_min,
|
||||
frame_period=10
|
||||
frame_period=10,
|
||||
)
|
||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
||||
f0 = signal.medfilt(f0, 3)
|
||||
@@ -286,7 +295,7 @@ class VC(object):
|
||||
fs=self.sr,
|
||||
f0_ceil=f0_max,
|
||||
f0_floor=f0_min,
|
||||
frame_period=10
|
||||
frame_period=10,
|
||||
)
|
||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
||||
f0 = signal.medfilt(f0, 3)
|
||||
@@ -295,12 +304,17 @@ class VC(object):
|
||||
elif f0_method == "crepe-tiny":
|
||||
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||
elif f0_method == "mangio-crepe":
|
||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
|
||||
f0 = self.get_f0_crepe_computation(
|
||||
x, f0_min, f0_max, p_len, crepe_hop_length
|
||||
)
|
||||
elif f0_method == "mangio-crepe-tiny":
|
||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
|
||||
f0 = self.get_f0_crepe_computation(
|
||||
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
||||
)
|
||||
elif f0_method == "rmvpe":
|
||||
if hasattr(self, "model_rmvpe") == False:
|
||||
from rmvpe import RMVPE
|
||||
|
||||
print("loading rmvpe model")
|
||||
self.model_rmvpe = RMVPE(
|
||||
"rmvpe.pt", is_half=self.is_half, device=self.device
|
||||
@@ -319,7 +333,7 @@ class VC(object):
|
||||
p_len,
|
||||
filter_radius,
|
||||
crepe_hop_length,
|
||||
time_step
|
||||
time_step,
|
||||
)
|
||||
|
||||
f0 *= pow(2, f0_up_key / 12)
|
||||
|
||||
Reference in New Issue
Block a user