mirror of
https://github.com/Mangio621/Mangio-RVC-Fork.git
synced 2025-12-16 11:37:44 +01:00
Merge pull request #49 from Mangio621/TRUE-FIXED-FP16
Fix training times by fixing fp16 detection
This commit is contained in:
88
config.py
88
config.py
@@ -1,19 +1,73 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
import torch
|
import torch
|
||||||
|
import json
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
|
|
||||||
|
global usefp16
|
||||||
|
usefp16 = False
|
||||||
|
|
||||||
|
|
||||||
def use_fp32_config():
|
def use_fp32_config():
|
||||||
for config_file in ["32k.json", "40k.json", "48k.json"]:
|
usefp16 = False
|
||||||
with open(f"configs/{config_file}", "r") as f:
|
device_capability = 0
|
||||||
strr = f.read().replace("true", "false")
|
if torch.cuda.is_available():
|
||||||
with open(f"configs/{config_file}", "w") as f:
|
device = torch.device("cuda:0") # Assuming you have only one GPU (index 0).
|
||||||
f.write(strr)
|
device_capability = torch.cuda.get_device_capability(device)[0]
|
||||||
with open("trainset_preprocess_pipeline_print.py", "r") as f:
|
if device_capability >= 7:
|
||||||
strr = f.read().replace("3.7", "3.0")
|
usefp16 = True
|
||||||
with open("trainset_preprocess_pipeline_print.py", "w") as f:
|
for config_file in ["32k.json", "40k.json", "48k.json"]:
|
||||||
f.write(strr)
|
with open(f"configs/{config_file}", "r") as d:
|
||||||
|
data = json.load(d)
|
||||||
|
|
||||||
|
if "train" in data and "fp16_run" in data["train"]:
|
||||||
|
data["train"]["fp16_run"] = True
|
||||||
|
|
||||||
|
with open(f"configs/{config_file}", "w") as d:
|
||||||
|
json.dump(data, d, indent=4)
|
||||||
|
|
||||||
|
print(f"Set fp16_run to true in {config_file}")
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
strr = f.read()
|
||||||
|
|
||||||
|
strr = strr.replace("3.0", "3.7")
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
f.write(strr)
|
||||||
|
else:
|
||||||
|
for config_file in ["32k.json", "40k.json", "48k.json"]:
|
||||||
|
with open(f"configs/{config_file}", "r") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
if "train" in data and "fp16_run" in data["train"]:
|
||||||
|
data["train"]["fp16_run"] = False
|
||||||
|
|
||||||
|
with open(f"configs/{config_file}", "w") as d:
|
||||||
|
json.dump(data, d, indent=4)
|
||||||
|
|
||||||
|
print(f"Set fp16_run to false in {config_file}")
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
strr = f.read()
|
||||||
|
|
||||||
|
strr = strr.replace("3.7", "3.0")
|
||||||
|
|
||||||
|
with open(
|
||||||
|
"trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
f.write(strr)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
"CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed."
|
||||||
|
)
|
||||||
|
return (usefp16, device_capability)
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
@@ -32,7 +86,7 @@ class Config:
|
|||||||
self.paperspace,
|
self.paperspace,
|
||||||
self.is_cli,
|
self.is_cli,
|
||||||
) = self.arg_parse()
|
) = self.arg_parse()
|
||||||
|
|
||||||
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -50,11 +104,15 @@ class Config:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Do not open in browser automatically",
|
help="Do not open in browser automatically",
|
||||||
)
|
)
|
||||||
parser.add_argument( # Fork Feature. Paperspace integration for web UI
|
parser.add_argument( # Fork Feature. Paperspace integration for web UI
|
||||||
"--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems."
|
"--paperspace",
|
||||||
|
action="store_true",
|
||||||
|
help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.",
|
||||||
)
|
)
|
||||||
parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
|
parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
|
||||||
"--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!"
|
"--is_cli",
|
||||||
|
action="store_true",
|
||||||
|
help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
|
||||||
)
|
)
|
||||||
cmd_opts = parser.parse_args()
|
cmd_opts = parser.parse_args()
|
||||||
|
|
||||||
@@ -95,9 +153,9 @@ class Config:
|
|||||||
):
|
):
|
||||||
print("Found GPU", self.gpu_name, ", force to fp32")
|
print("Found GPU", self.gpu_name, ", force to fp32")
|
||||||
self.is_half = False
|
self.is_half = False
|
||||||
use_fp32_config()
|
|
||||||
else:
|
else:
|
||||||
print("Found GPU", self.gpu_name)
|
print("Found GPU", self.gpu_name)
|
||||||
|
use_fp32_config()
|
||||||
self.gpu_mem = int(
|
self.gpu_mem = int(
|
||||||
torch.cuda.get_device_properties(i_device).total_memory
|
torch.cuda.get_device_properties(i_device).total_memory
|
||||||
/ 1024
|
/ 1024
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ sys.path.append(now_dir)
|
|||||||
from my_utils import load_audio
|
from my_utils import load_audio
|
||||||
import pyworld
|
import pyworld
|
||||||
import numpy as np, logging
|
import numpy as np, logging
|
||||||
import torchcrepe # Fork Feature. Crepe algo for training and preprocess
|
import torchcrepe # Fork Feature. Crepe algo for training and preprocess
|
||||||
import torch
|
import torch
|
||||||
from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe.
|
from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe.
|
||||||
import scipy.signal as signal # Fork Feature hybrid inference
|
import scipy.signal as signal # Fork Feature hybrid inference
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||||
@@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
|
|||||||
|
|
||||||
DoFormant = False
|
DoFormant = False
|
||||||
|
|
||||||
with open('formanting.txt', 'r') as fvf:
|
with open("formanting.txt", "r") as fvf:
|
||||||
content = fvf.readlines()
|
content = fvf.readlines()
|
||||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def printt(strr):
|
def printt(strr):
|
||||||
@@ -32,7 +32,7 @@ def printt(strr):
|
|||||||
|
|
||||||
n_p = int(sys.argv[2])
|
n_p = int(sys.argv[2])
|
||||||
f0method = sys.argv[3]
|
f0method = sys.argv[3]
|
||||||
extraction_crepe_hop_length = 0
|
extraction_crepe_hop_length = 0
|
||||||
try:
|
try:
|
||||||
extraction_crepe_hop_length = int(sys.argv[4])
|
extraction_crepe_hop_length = int(sys.argv[4])
|
||||||
except:
|
except:
|
||||||
@@ -53,11 +53,11 @@ class FeatureInput(object):
|
|||||||
self.f0_min = 50.0
|
self.f0_min = 50.0
|
||||||
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
|
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
|
||||||
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
|
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
|
||||||
|
|
||||||
# EXPERIMENTAL. PROBABLY BUGGY
|
# EXPERIMENTAL. PROBABLY BUGGY
|
||||||
def get_f0_hybrid_computation(
|
def get_f0_hybrid_computation(
|
||||||
self,
|
self,
|
||||||
methods_str,
|
methods_str,
|
||||||
x,
|
x,
|
||||||
f0_min,
|
f0_min,
|
||||||
f0_max,
|
f0_max,
|
||||||
@@ -67,9 +67,9 @@ class FeatureInput(object):
|
|||||||
):
|
):
|
||||||
# Get various f0 methods from input to use in the computation stack
|
# Get various f0 methods from input to use in the computation stack
|
||||||
s = methods_str
|
s = methods_str
|
||||||
s = s.split('hybrid')[1]
|
s = s.split("hybrid")[1]
|
||||||
s = s.replace('[', '').replace(']', '')
|
s = s.replace("[", "").replace("]", "")
|
||||||
methods = s.split('+')
|
methods = s.split("+")
|
||||||
f0_computation_stack = []
|
f0_computation_stack = []
|
||||||
|
|
||||||
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
||||||
@@ -99,7 +99,9 @@ class FeatureInput(object):
|
|||||||
torch_device_index = 0
|
torch_device_index = 0
|
||||||
torch_device = None
|
torch_device = None
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
torch_device = torch.device(
|
||||||
|
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||||
|
)
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
torch_device = torch.device("mps")
|
torch_device = torch.device("mps")
|
||||||
else:
|
else:
|
||||||
@@ -123,7 +125,7 @@ class FeatureInput(object):
|
|||||||
f0 = torchcrepe.filter.mean(f0, 3)
|
f0 = torchcrepe.filter.mean(f0, 3)
|
||||||
f0[pd < 0.1] = 0
|
f0[pd < 0.1] = 0
|
||||||
f0 = f0[0].cpu().numpy()
|
f0 = f0[0].cpu().numpy()
|
||||||
f0 = f0[1:] # Get rid of extra first frame
|
f0 = f0[1:] # Get rid of extra first frame
|
||||||
elif method == "mangio-crepe":
|
elif method == "mangio-crepe":
|
||||||
# print("Performing crepe pitch extraction. (EXPERIMENTAL)")
|
# print("Performing crepe pitch extraction. (EXPERIMENTAL)")
|
||||||
# print("CREPE PITCH EXTRACTION HOP LENGTH: " + str(crepe_hop_length))
|
# print("CREPE PITCH EXTRACTION HOP LENGTH: " + str(crepe_hop_length))
|
||||||
@@ -132,7 +134,9 @@ class FeatureInput(object):
|
|||||||
torch_device_index = 0
|
torch_device_index = 0
|
||||||
torch_device = None
|
torch_device = None
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
torch_device = torch.device(
|
||||||
|
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||||
|
)
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
torch_device = torch.device("mps")
|
torch_device = torch.device("mps")
|
||||||
else:
|
else:
|
||||||
@@ -156,7 +160,7 @@ class FeatureInput(object):
|
|||||||
"full",
|
"full",
|
||||||
batch_size=crepe_hop_length * 2,
|
batch_size=crepe_hop_length * 2,
|
||||||
device=torch_device,
|
device=torch_device,
|
||||||
pad=True
|
pad=True,
|
||||||
)
|
)
|
||||||
p_len = p_len or x.shape[0] // crepe_hop_length
|
p_len = p_len or x.shape[0] // crepe_hop_length
|
||||||
# Resize the pitch
|
# Resize the pitch
|
||||||
@@ -165,7 +169,7 @@ class FeatureInput(object):
|
|||||||
target = np.interp(
|
target = np.interp(
|
||||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||||
np.arange(0, len(source)),
|
np.arange(0, len(source)),
|
||||||
source
|
source,
|
||||||
)
|
)
|
||||||
f0 = np.nan_to_num(target)
|
f0 = np.nan_to_num(target)
|
||||||
elif method == "harvest":
|
elif method == "harvest":
|
||||||
@@ -191,12 +195,12 @@ class FeatureInput(object):
|
|||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
f0 = f0[1:]
|
f0 = f0[1:]
|
||||||
f0_computation_stack.append(f0)
|
f0_computation_stack.append(f0)
|
||||||
|
|
||||||
for fc in f0_computation_stack:
|
for fc in f0_computation_stack:
|
||||||
print(len(fc))
|
print(len(fc))
|
||||||
|
|
||||||
# print("Calculating hybrid median f0 from the stack of: %s" % str(methods))
|
# print("Calculating hybrid median f0 from the stack of: %s" % str(methods))
|
||||||
|
|
||||||
f0_median_hybrid = None
|
f0_median_hybrid = None
|
||||||
if len(f0_computation_stack) == 1:
|
if len(f0_computation_stack) == 1:
|
||||||
f0_median_hybrid = f0_computation_stack[0]
|
f0_median_hybrid = f0_computation_stack[0]
|
||||||
@@ -236,10 +240,9 @@ class FeatureInput(object):
|
|||||||
elif f0_method == "rmvpe":
|
elif f0_method == "rmvpe":
|
||||||
if hasattr(self, "model_rmvpe") == False:
|
if hasattr(self, "model_rmvpe") == False:
|
||||||
from rmvpe import RMVPE
|
from rmvpe import RMVPE
|
||||||
|
|
||||||
print("loading rmvpe model")
|
print("loading rmvpe model")
|
||||||
self.model_rmvpe = RMVPE(
|
self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0")
|
||||||
"rmvpe.pt", is_half=False, device="cuda:0"
|
|
||||||
)
|
|
||||||
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
|
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
|
||||||
elif f0_method == "dio":
|
elif f0_method == "dio":
|
||||||
f0, t = pyworld.dio(
|
f0, t = pyworld.dio(
|
||||||
@@ -250,12 +253,16 @@ class FeatureInput(object):
|
|||||||
frame_period=1000 * self.hop / self.fs,
|
frame_period=1000 * self.hop / self.fs,
|
||||||
)
|
)
|
||||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
|
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
|
||||||
elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction
|
elif (
|
||||||
|
f0_method == "crepe"
|
||||||
|
): # Fork Feature: Added crepe f0 for f0 feature extraction
|
||||||
# Pick a batch size that doesn't cause memory errors on your gpu
|
# Pick a batch size that doesn't cause memory errors on your gpu
|
||||||
torch_device_index = 0
|
torch_device_index = 0
|
||||||
torch_device = None
|
torch_device = None
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
torch_device = torch.device(
|
||||||
|
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||||
|
)
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
torch_device = torch.device("mps")
|
torch_device = torch.device("mps")
|
||||||
else:
|
else:
|
||||||
@@ -287,7 +294,9 @@ class FeatureInput(object):
|
|||||||
torch_device_index = 0
|
torch_device_index = 0
|
||||||
torch_device = None
|
torch_device = None
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
|
torch_device = torch.device(
|
||||||
|
f"cuda:{torch_device_index % torch.cuda.device_count()}"
|
||||||
|
)
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
torch_device = torch.device("mps")
|
torch_device = torch.device("mps")
|
||||||
else:
|
else:
|
||||||
@@ -311,7 +320,7 @@ class FeatureInput(object):
|
|||||||
"full",
|
"full",
|
||||||
batch_size=crepe_hop_length * 2,
|
batch_size=crepe_hop_length * 2,
|
||||||
device=torch_device,
|
device=torch_device,
|
||||||
pad=True
|
pad=True,
|
||||||
)
|
)
|
||||||
p_len = p_len or x.shape[0] // crepe_hop_length
|
p_len = p_len or x.shape[0] // crepe_hop_length
|
||||||
# Resize the pitch
|
# Resize the pitch
|
||||||
@@ -320,20 +329,20 @@ class FeatureInput(object):
|
|||||||
target = np.interp(
|
target = np.interp(
|
||||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||||
np.arange(0, len(source)),
|
np.arange(0, len(source)),
|
||||||
source
|
source,
|
||||||
)
|
)
|
||||||
f0 = np.nan_to_num(target)
|
f0 = np.nan_to_num(target)
|
||||||
elif "hybrid" in f0_method: # EXPERIMENTAL
|
elif "hybrid" in f0_method: # EXPERIMENTAL
|
||||||
# Perform hybrid median pitch estimation
|
# Perform hybrid median pitch estimation
|
||||||
time_step = 160 / 16000 * 1000
|
time_step = 160 / 16000 * 1000
|
||||||
f0 = self.get_f0_hybrid_computation(
|
f0 = self.get_f0_hybrid_computation(
|
||||||
f0_method,
|
f0_method,
|
||||||
x,
|
x,
|
||||||
self.f0_min,
|
self.f0_min,
|
||||||
self.f0_max,
|
self.f0_max,
|
||||||
p_len,
|
p_len,
|
||||||
crepe_hop_length,
|
crepe_hop_length,
|
||||||
time_step
|
time_step,
|
||||||
)
|
)
|
||||||
# Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...
|
# Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...
|
||||||
|
|
||||||
@@ -362,14 +371,19 @@ class FeatureInput(object):
|
|||||||
with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
|
with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
|
||||||
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
|
for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
|
||||||
try:
|
try:
|
||||||
pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length))
|
pbar.set_description(
|
||||||
|
"thread:%s, f0ing, Hop-Length:%s"
|
||||||
|
% (thread_n, crepe_hop_length)
|
||||||
|
)
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
if (
|
if (
|
||||||
os.path.exists(opt_path1 + ".npy") == True
|
os.path.exists(opt_path1 + ".npy") == True
|
||||||
and os.path.exists(opt_path2 + ".npy") == True
|
and os.path.exists(opt_path2 + ".npy") == True
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length)
|
featur_pit = self.compute_f0(
|
||||||
|
inp_path, f0_method, crepe_hop_length
|
||||||
|
)
|
||||||
np.save(
|
np.save(
|
||||||
opt_path2,
|
opt_path2,
|
||||||
featur_pit,
|
featur_pit,
|
||||||
@@ -382,7 +396,9 @@ class FeatureInput(object):
|
|||||||
allow_pickle=False,
|
allow_pickle=False,
|
||||||
) # ori
|
) # ori
|
||||||
except:
|
except:
|
||||||
printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
|
printt(
|
||||||
|
"f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
@@ -411,12 +427,7 @@ if __name__ == "__main__":
|
|||||||
for i in range(n_p):
|
for i in range(n_p):
|
||||||
p = Process(
|
p = Process(
|
||||||
target=featureInput.go,
|
target=featureInput.go,
|
||||||
args=(
|
args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i),
|
||||||
paths[i::n_p],
|
|
||||||
f0method,
|
|
||||||
extraction_crepe_hop_length,
|
|
||||||
i
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
ps.append(p)
|
ps.append(p)
|
||||||
p.start()
|
p.start()
|
||||||
|
|||||||
43
gui_v0.py
43
gui_v0.py
@@ -51,8 +51,10 @@ class RVC:
|
|||||||
self.window = 160
|
self.window = 160
|
||||||
|
|
||||||
# Get Torch Device
|
# Get Torch Device
|
||||||
if(torch.cuda.is_available()):
|
if torch.cuda.is_available():
|
||||||
self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}")
|
self.torch_device = torch.device(
|
||||||
|
f"cuda:{0 % torch.cuda.device_count()}"
|
||||||
|
)
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
self.torch_device = torch.device("mps")
|
self.torch_device = torch.device("mps")
|
||||||
else:
|
else:
|
||||||
@@ -141,7 +143,7 @@ class RVC:
|
|||||||
|
|
||||||
def get_f0(self, x, f0_up_key, inp_f0=None):
|
def get_f0(self, x, f0_up_key, inp_f0=None):
|
||||||
# Calculate Padding and f0 details here
|
# Calculate Padding and f0 details here
|
||||||
p_len = x.shape[0] // 512 # For Now This probs doesn't work
|
p_len = x.shape[0] // 512 # For Now This probs doesn't work
|
||||||
x_pad = 1
|
x_pad = 1
|
||||||
f0_min = 50
|
f0_min = 50
|
||||||
f0_max = 1100
|
f0_max = 1100
|
||||||
@@ -150,11 +152,11 @@ class RVC:
|
|||||||
|
|
||||||
f0 = 0
|
f0 = 0
|
||||||
# Here, check f0_methods and get their computations
|
# Here, check f0_methods and get their computations
|
||||||
if(self.f0_method == 'harvest'):
|
if self.f0_method == "harvest":
|
||||||
f0 = self.get_harvest_computation(x, f0_min, f0_max)
|
f0 = self.get_harvest_computation(x, f0_min, f0_max)
|
||||||
elif(self.f0_method == 'reg-crepe'):
|
elif self.f0_method == "reg-crepe":
|
||||||
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
|
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
|
||||||
elif(self.f0_method == 'reg-crepe-tiny'):
|
elif self.f0_method == "reg-crepe-tiny":
|
||||||
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")
|
f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||||
|
|
||||||
# Calculate f0_course and f0_bak here
|
# Calculate f0_course and f0_bak here
|
||||||
@@ -300,7 +302,7 @@ class GUI:
|
|||||||
with open("values1.json", "r") as j:
|
with open("values1.json", "r") as j:
|
||||||
data = json.load(j)
|
data = json.load(j)
|
||||||
except:
|
except:
|
||||||
# Injecting f0_method into the json data
|
# Injecting f0_method into the json data
|
||||||
with open("values1.json", "w") as j:
|
with open("values1.json", "w") as j:
|
||||||
data = {
|
data = {
|
||||||
"pth_path": "",
|
"pth_path": "",
|
||||||
@@ -328,11 +330,7 @@ class GUI:
|
|||||||
[
|
[
|
||||||
sg.Frame(
|
sg.Frame(
|
||||||
title="Proudly forked by Mangio621",
|
title="Proudly forked by Mangio621",
|
||||||
layout=[
|
layout=[[sg.Image("./mangio_utils/lol.png")]],
|
||||||
[
|
|
||||||
sg.Image('./mangio_utils/lol.png')
|
|
||||||
]
|
|
||||||
]
|
|
||||||
),
|
),
|
||||||
sg.Frame(
|
sg.Frame(
|
||||||
title=i18n("加载模型"),
|
title=i18n("加载模型"),
|
||||||
@@ -384,14 +382,16 @@ class GUI:
|
|||||||
),
|
),
|
||||||
],
|
],
|
||||||
],
|
],
|
||||||
)
|
),
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
# Mangio f0 Selection frame Here
|
# Mangio f0 Selection frame Here
|
||||||
sg.Frame(
|
sg.Frame(
|
||||||
layout=[
|
layout=[
|
||||||
[
|
[
|
||||||
sg.Radio("Harvest", "f0_method", key="harvest", default=True),
|
sg.Radio(
|
||||||
|
"Harvest", "f0_method", key="harvest", default=True
|
||||||
|
),
|
||||||
sg.Radio("Crepe", "f0_method", key="reg-crepe"),
|
sg.Radio("Crepe", "f0_method", key="reg-crepe"),
|
||||||
sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
|
sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
|
||||||
]
|
]
|
||||||
@@ -536,20 +536,21 @@ class GUI:
|
|||||||
if event == "stop_vc" and self.flag_vc == True:
|
if event == "stop_vc" and self.flag_vc == True:
|
||||||
self.flag_vc = False
|
self.flag_vc = False
|
||||||
|
|
||||||
# Function that returns the used f0 method in string format "harvest"
|
# Function that returns the used f0 method in string format "harvest"
|
||||||
def get_f0_method_from_radios(self, values):
|
def get_f0_method_from_radios(self, values):
|
||||||
f0_array = [
|
f0_array = [
|
||||||
{"name": "harvest", "val": values['harvest']},
|
{"name": "harvest", "val": values["harvest"]},
|
||||||
{"name": "reg-crepe", "val": values['reg-crepe']},
|
{"name": "reg-crepe", "val": values["reg-crepe"]},
|
||||||
{"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']},
|
{"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]},
|
||||||
]
|
]
|
||||||
# Filter through to find a true value
|
# Filter through to find a true value
|
||||||
used_f0 = ""
|
used_f0 = ""
|
||||||
for f0 in f0_array:
|
for f0 in f0_array:
|
||||||
if(f0['val'] == True):
|
if f0["val"] == True:
|
||||||
used_f0 = f0['name']
|
used_f0 = f0["name"]
|
||||||
break
|
break
|
||||||
if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
|
if used_f0 == "":
|
||||||
|
used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
|
||||||
return used_f0
|
return used_f0
|
||||||
|
|
||||||
def set_values(self, values):
|
def set_values(self, values):
|
||||||
|
|||||||
687
infer-web.py
687
infer-web.py
File diff suppressed because it is too large
Load Diff
61
my_utils.py
61
my_utils.py
@@ -1,11 +1,14 @@
|
|||||||
import ffmpeg
|
import ffmpeg
|
||||||
import numpy as np
|
import numpy as np
|
||||||
#import praatio
|
|
||||||
#import praatio.praat_scripts
|
|
||||||
import os
|
|
||||||
#from os.path import join
|
|
||||||
|
|
||||||
#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
|
# import praatio
|
||||||
|
# import praatio.praat_scripts
|
||||||
|
import os
|
||||||
|
|
||||||
|
# from os.path import join
|
||||||
|
|
||||||
|
# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
|
||||||
|
|
||||||
|
|
||||||
def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
||||||
try:
|
try:
|
||||||
@@ -15,43 +18,47 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
|
|||||||
file = (
|
file = (
|
||||||
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||||
) # 防止小白拷路径头尾带了空格和"和回车
|
) # 防止小白拷路径头尾带了空格和"和回车
|
||||||
file_formanted = (
|
file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||||
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
with open("formanting.txt", "r") as fvf:
|
||||||
)
|
|
||||||
with open('formanting.txt', 'r') as fvf:
|
|
||||||
content = fvf.readlines()
|
content = fvf.readlines()
|
||||||
if 'True' in content[0].split('\n')[0]:
|
if "True" in content[0].split("\n")[0]:
|
||||||
#print("true")
|
# print("true")
|
||||||
DoFormant = True
|
DoFormant = True
|
||||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
#print("not true")
|
# print("not true")
|
||||||
DoFormant = False
|
DoFormant = False
|
||||||
|
|
||||||
if DoFormant:
|
if DoFormant:
|
||||||
#os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
|
# os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
|
||||||
#print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
|
# print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
|
||||||
print("formanting...")
|
print("formanting...")
|
||||||
|
|
||||||
os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
|
os.system(
|
||||||
|
'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"'
|
||||||
|
% (file, Quefrency, Timbre, file_formanted)
|
||||||
|
)
|
||||||
print("formanted!")
|
print("formanted!")
|
||||||
#filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
|
# filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
|
||||||
#file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
|
# file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
|
||||||
|
|
||||||
out, _ = (
|
out, _ = (
|
||||||
ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
|
ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
|
||||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
.run(
|
||||||
|
cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
|
os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
|
||||||
else:
|
else:
|
||||||
|
|
||||||
out, _ = (
|
out, _ = (
|
||||||
ffmpeg.input(file, threads=0)
|
ffmpeg.input(file, threads=0)
|
||||||
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
||||||
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
.run(
|
||||||
|
cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
|
||||||
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"Failed to load audio: {e}")
|
raise RuntimeError(f"Failed to load audio: {e}")
|
||||||
|
|||||||
@@ -568,10 +568,10 @@ def train_and_evaluate(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
with open("stop.txt", "r+") as tostop:
|
with open("stop.txt", "r+") as tostop:
|
||||||
content = tostop.read()
|
content = tostop.read()
|
||||||
if 'stop' in content:
|
if "stop" in content:
|
||||||
logger.info("Stop Button was pressed. The program is closed.")
|
logger.info("Stop Button was pressed. The program is closed.")
|
||||||
if hasattr(net_g, "module"):
|
if hasattr(net_g, "module"):
|
||||||
ckpt = net_g.module.state_dict()
|
ckpt = net_g.module.state_dict()
|
||||||
@@ -581,15 +581,21 @@ def train_and_evaluate(
|
|||||||
"saving final ckpt:%s"
|
"saving final ckpt:%s"
|
||||||
% (
|
% (
|
||||||
savee(
|
savee(
|
||||||
ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
|
ckpt,
|
||||||
|
hps.sample_rate,
|
||||||
|
hps.if_f0,
|
||||||
|
hps.name,
|
||||||
|
epoch,
|
||||||
|
hps.version,
|
||||||
|
hps,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
tostop.truncate(0)
|
tostop.truncate(0)
|
||||||
tostop.writelines("not")
|
tostop.writelines("not")
|
||||||
os._exit(2333333)
|
os._exit(2333333)
|
||||||
|
|
||||||
if rank == 0:
|
if rank == 0:
|
||||||
logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record()))
|
logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record()))
|
||||||
if epoch >= hps.total_epoch and rank == 0:
|
if epoch >= hps.total_epoch and rank == 0:
|
||||||
|
|||||||
@@ -24,9 +24,10 @@ Timbre = 0.0
|
|||||||
mutex = multiprocessing.Lock()
|
mutex = multiprocessing.Lock()
|
||||||
f = open("%s/preprocess.log" % exp_dir, "a+")
|
f = open("%s/preprocess.log" % exp_dir, "a+")
|
||||||
|
|
||||||
with open('formanting.txt', 'r') as fvf:
|
with open("formanting.txt", "r") as fvf:
|
||||||
content = fvf.readlines()
|
content = fvf.readlines()
|
||||||
Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
|
Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
|
||||||
|
|
||||||
|
|
||||||
def println(strr):
|
def println(strr):
|
||||||
mutex.acquire()
|
mutex.acquire()
|
||||||
@@ -104,12 +105,14 @@ class PreProcess:
|
|||||||
idx1 += 1
|
idx1 += 1
|
||||||
break
|
break
|
||||||
self.norm_write(tmp_audio, idx0, idx1)
|
self.norm_write(tmp_audio, idx0, idx1)
|
||||||
#println("%s->Suc." % path)
|
# println("%s->Suc." % path)
|
||||||
except:
|
except:
|
||||||
println("%s->%s" % (path, traceback.format_exc()))
|
println("%s->%s" % (path, traceback.format_exc()))
|
||||||
|
|
||||||
def pipeline_mp(self, infos, thread_n):
|
def pipeline_mp(self, infos, thread_n):
|
||||||
for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n):
|
for path, idx0 in tqdm.tqdm(
|
||||||
|
infos, position=thread_n, leave=True, desc="thread:%s" % thread_n
|
||||||
|
):
|
||||||
self.pipeline(path, idx0)
|
self.pipeline(path, idx0)
|
||||||
|
|
||||||
def pipeline_mp_inp_dir(self, inp_root, n_p):
|
def pipeline_mp_inp_dir(self, inp_root, n_p):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import numpy as np, parselmouth, torch, pdb, sys, os
|
import numpy as np, parselmouth, torch, pdb, sys, os
|
||||||
from time import time as ttime
|
from time import time as ttime
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
|
import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
import scipy.signal as signal
|
import scipy.signal as signal
|
||||||
import pyworld, os, traceback, faiss, librosa, torchcrepe
|
import pyworld, os, traceback, faiss, librosa, torchcrepe
|
||||||
@@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
|
|||||||
|
|
||||||
input_audio_path2wav = {}
|
input_audio_path2wav = {}
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
|
||||||
audio = input_audio_path2wav[input_audio_path]
|
audio = input_audio_path2wav[input_audio_path]
|
||||||
@@ -74,24 +75,28 @@ class VC(object):
|
|||||||
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
|
def get_optimal_torch_device(self, index: int = 0) -> torch.device:
|
||||||
# Get cuda device
|
# Get cuda device
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast
|
return torch.device(
|
||||||
|
f"cuda:{index % torch.cuda.device_count()}"
|
||||||
|
) # Very fast
|
||||||
elif torch.backends.mps.is_available():
|
elif torch.backends.mps.is_available():
|
||||||
return torch.device("mps")
|
return torch.device("mps")
|
||||||
# Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
|
# Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
|
||||||
# Else wise return the "cpu" as a torch device,
|
# Else wise return the "cpu" as a torch device,
|
||||||
return torch.device("cpu")
|
return torch.device("cpu")
|
||||||
|
|
||||||
# Fork Feature: Compute f0 with the crepe method
|
# Fork Feature: Compute f0 with the crepe method
|
||||||
def get_f0_crepe_computation(
|
def get_f0_crepe_computation(
|
||||||
self,
|
self,
|
||||||
x,
|
x,
|
||||||
f0_min,
|
f0_min,
|
||||||
f0_max,
|
f0_max,
|
||||||
p_len,
|
p_len,
|
||||||
hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
|
hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
|
||||||
model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
|
model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
|
||||||
):
|
):
|
||||||
x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float.
|
x = x.astype(
|
||||||
|
np.float32
|
||||||
|
) # fixes the F.conv2D exception. We needed to convert double to float.
|
||||||
x /= np.quantile(np.abs(x), 0.999)
|
x /= np.quantile(np.abs(x), 0.999)
|
||||||
torch_device = self.get_optimal_torch_device()
|
torch_device = self.get_optimal_torch_device()
|
||||||
audio = torch.from_numpy(x).to(torch_device, copy=True)
|
audio = torch.from_numpy(x).to(torch_device, copy=True)
|
||||||
@@ -109,7 +114,7 @@ class VC(object):
|
|||||||
model,
|
model,
|
||||||
batch_size=hop_length * 2,
|
batch_size=hop_length * 2,
|
||||||
device=torch_device,
|
device=torch_device,
|
||||||
pad=True
|
pad=True,
|
||||||
)
|
)
|
||||||
p_len = p_len or x.shape[0] // hop_length
|
p_len = p_len or x.shape[0] // hop_length
|
||||||
# Resize the pitch for final f0
|
# Resize the pitch for final f0
|
||||||
@@ -118,17 +123,17 @@ class VC(object):
|
|||||||
target = np.interp(
|
target = np.interp(
|
||||||
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
np.arange(0, len(source) * p_len, len(source)) / p_len,
|
||||||
np.arange(0, len(source)),
|
np.arange(0, len(source)),
|
||||||
source
|
source,
|
||||||
)
|
)
|
||||||
f0 = np.nan_to_num(target)
|
f0 = np.nan_to_num(target)
|
||||||
return f0 # Resized f0
|
return f0 # Resized f0
|
||||||
|
|
||||||
def get_f0_official_crepe_computation(
|
def get_f0_official_crepe_computation(
|
||||||
self,
|
self,
|
||||||
x,
|
x,
|
||||||
f0_min,
|
f0_min,
|
||||||
f0_max,
|
f0_max,
|
||||||
model="full",
|
model="full",
|
||||||
):
|
):
|
||||||
# Pick a batch size that doesn't cause memory errors on your gpu
|
# Pick a batch size that doesn't cause memory errors on your gpu
|
||||||
batch_size = 512
|
batch_size = 512
|
||||||
@@ -153,15 +158,15 @@ class VC(object):
|
|||||||
|
|
||||||
# Fork Feature: Compute pYIN f0 method
|
# Fork Feature: Compute pYIN f0 method
|
||||||
def get_f0_pyin_computation(self, x, f0_min, f0_max):
|
def get_f0_pyin_computation(self, x, f0_min, f0_max):
|
||||||
y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True)
|
y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
|
||||||
f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
|
f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
|
||||||
f0 = f0[1:] # Get rid of extra first frame
|
f0 = f0[1:] # Get rid of extra first frame
|
||||||
return f0
|
return f0
|
||||||
|
|
||||||
# Fork Feature: Acquire median hybrid f0 estimation calculation
|
# Fork Feature: Acquire median hybrid f0 estimation calculation
|
||||||
def get_f0_hybrid_computation(
|
def get_f0_hybrid_computation(
|
||||||
self,
|
self,
|
||||||
methods_str,
|
methods_str,
|
||||||
input_audio_path,
|
input_audio_path,
|
||||||
x,
|
x,
|
||||||
f0_min,
|
f0_min,
|
||||||
@@ -173,9 +178,9 @@ class VC(object):
|
|||||||
):
|
):
|
||||||
# Get various f0 methods from input to use in the computation stack
|
# Get various f0 methods from input to use in the computation stack
|
||||||
s = methods_str
|
s = methods_str
|
||||||
s = s.split('hybrid')[1]
|
s = s.split("hybrid")[1]
|
||||||
s = s.replace('[', '').replace(']', '')
|
s = s.replace("[", "").replace("]", "")
|
||||||
methods = s.split('+')
|
methods = s.split("+")
|
||||||
f0_computation_stack = []
|
f0_computation_stack = []
|
||||||
|
|
||||||
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
print("Calculating f0 pitch estimations for methods: %s" % str(methods))
|
||||||
@@ -202,35 +207,39 @@ class VC(object):
|
|||||||
)
|
)
|
||||||
elif method == "crepe":
|
elif method == "crepe":
|
||||||
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
|
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
|
||||||
f0 = f0[1:] # Get rid of extra first frame
|
f0 = f0[1:] # Get rid of extra first frame
|
||||||
elif method == "crepe-tiny":
|
elif method == "crepe-tiny":
|
||||||
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||||
f0 = f0[1:] # Get rid of extra first frame
|
f0 = f0[1:] # Get rid of extra first frame
|
||||||
elif method == "mangio-crepe":
|
elif method == "mangio-crepe":
|
||||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
|
f0 = self.get_f0_crepe_computation(
|
||||||
|
x, f0_min, f0_max, p_len, crepe_hop_length
|
||||||
|
)
|
||||||
elif method == "mangio-crepe-tiny":
|
elif method == "mangio-crepe-tiny":
|
||||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
|
f0 = self.get_f0_crepe_computation(
|
||||||
|
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
||||||
|
)
|
||||||
elif method == "harvest":
|
elif method == "harvest":
|
||||||
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
|
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
|
||||||
if filter_radius > 2:
|
if filter_radius > 2:
|
||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
f0 = f0[1:] # Get rid of first frame.
|
f0 = f0[1:] # Get rid of first frame.
|
||||||
elif method == "dio": # Potentially buggy?
|
elif method == "dio": # Potentially buggy?
|
||||||
f0, t = pyworld.dio(
|
f0, t = pyworld.dio(
|
||||||
x.astype(np.double),
|
x.astype(np.double),
|
||||||
fs=self.sr,
|
fs=self.sr,
|
||||||
f0_ceil=f0_max,
|
f0_ceil=f0_max,
|
||||||
f0_floor=f0_min,
|
f0_floor=f0_min,
|
||||||
frame_period=10
|
frame_period=10,
|
||||||
)
|
)
|
||||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
f0 = f0[1:]
|
f0 = f0[1:]
|
||||||
#elif method == "pyin": Not Working just yet
|
# elif method == "pyin": Not Working just yet
|
||||||
# f0 = self.get_f0_pyin_computation(x, f0_min, f0_max)
|
# f0 = self.get_f0_pyin_computation(x, f0_min, f0_max)
|
||||||
# Push method to the stack
|
# Push method to the stack
|
||||||
f0_computation_stack.append(f0)
|
f0_computation_stack.append(f0)
|
||||||
|
|
||||||
for fc in f0_computation_stack:
|
for fc in f0_computation_stack:
|
||||||
print(len(fc))
|
print(len(fc))
|
||||||
|
|
||||||
@@ -280,13 +289,13 @@ class VC(object):
|
|||||||
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
|
f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
|
||||||
if filter_radius > 2:
|
if filter_radius > 2:
|
||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
elif f0_method == "dio": # Potentially Buggy?
|
elif f0_method == "dio": # Potentially Buggy?
|
||||||
f0, t = pyworld.dio(
|
f0, t = pyworld.dio(
|
||||||
x.astype(np.double),
|
x.astype(np.double),
|
||||||
fs=self.sr,
|
fs=self.sr,
|
||||||
f0_ceil=f0_max,
|
f0_ceil=f0_max,
|
||||||
f0_floor=f0_min,
|
f0_floor=f0_min,
|
||||||
frame_period=10
|
frame_period=10,
|
||||||
)
|
)
|
||||||
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
|
||||||
f0 = signal.medfilt(f0, 3)
|
f0 = signal.medfilt(f0, 3)
|
||||||
@@ -295,12 +304,17 @@ class VC(object):
|
|||||||
elif f0_method == "crepe-tiny":
|
elif f0_method == "crepe-tiny":
|
||||||
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
|
||||||
elif f0_method == "mangio-crepe":
|
elif f0_method == "mangio-crepe":
|
||||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
|
f0 = self.get_f0_crepe_computation(
|
||||||
|
x, f0_min, f0_max, p_len, crepe_hop_length
|
||||||
|
)
|
||||||
elif f0_method == "mangio-crepe-tiny":
|
elif f0_method == "mangio-crepe-tiny":
|
||||||
f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
|
f0 = self.get_f0_crepe_computation(
|
||||||
|
x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
|
||||||
|
)
|
||||||
elif f0_method == "rmvpe":
|
elif f0_method == "rmvpe":
|
||||||
if hasattr(self, "model_rmvpe") == False:
|
if hasattr(self, "model_rmvpe") == False:
|
||||||
from rmvpe import RMVPE
|
from rmvpe import RMVPE
|
||||||
|
|
||||||
print("loading rmvpe model")
|
print("loading rmvpe model")
|
||||||
self.model_rmvpe = RMVPE(
|
self.model_rmvpe = RMVPE(
|
||||||
"rmvpe.pt", is_half=self.is_half, device=self.device
|
"rmvpe.pt", is_half=self.is_half, device=self.device
|
||||||
@@ -311,7 +325,7 @@ class VC(object):
|
|||||||
# Perform hybrid median pitch estimation
|
# Perform hybrid median pitch estimation
|
||||||
input_audio_path2wav[input_audio_path] = x.astype(np.double)
|
input_audio_path2wav[input_audio_path] = x.astype(np.double)
|
||||||
f0 = self.get_f0_hybrid_computation(
|
f0 = self.get_f0_hybrid_computation(
|
||||||
f0_method,
|
f0_method,
|
||||||
input_audio_path,
|
input_audio_path,
|
||||||
x,
|
x,
|
||||||
f0_min,
|
f0_min,
|
||||||
@@ -319,7 +333,7 @@ class VC(object):
|
|||||||
p_len,
|
p_len,
|
||||||
filter_radius,
|
filter_radius,
|
||||||
crepe_hop_length,
|
crepe_hop_length,
|
||||||
time_step
|
time_step,
|
||||||
)
|
)
|
||||||
|
|
||||||
f0 *= pow(2, f0_up_key / 12)
|
f0 *= pow(2, f0_up_key / 12)
|
||||||
|
|||||||
Reference in New Issue
Block a user