mirror of
https://github.com/Mangio621/Mangio-RVC-Fork.git
synced 2025-12-16 11:37:44 +01:00
181 lines
6.0 KiB
Python
181 lines
6.0 KiB
Python
# Fork Feature Mangio RVC Fork. Infer Audio with just the CLI
|
|
|
|
import torch, os, traceback, sys, warnings, shutil, numpy as np
|
|
|
|
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
|
from fairseq import checkpoint_utils
|
|
from vc_infer_pipeline import VC
|
|
from config import Config
|
|
from my_utils import load_audio
|
|
|
|
# Fork Feature. Write an audio file
|
|
from scipy.io.wavfile import write
|
|
|
|
config = Config(is_gui=False)
|
|
|
|
|
|
weight_root = 'weights'
|
|
|
|
n_spk = None # Set from get_vc
|
|
tgt_sr = 0 # Set from get_vc
|
|
net_g = None # Set from get_vc
|
|
vc = None # Set from get_vc
|
|
cpt = None # Set from get_vc
|
|
|
|
hubert_model = None # Set from vc_single
|
|
|
|
def get_hubert():
|
|
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
|
["hubert_base.pt"],
|
|
suffix="",
|
|
)
|
|
hubert_model = models[0]
|
|
hubert_model = hubert_model.to(config.device)
|
|
if config.is_half:
|
|
hubert_model = hubert_model.half()
|
|
else:
|
|
hubert_model = hubert_model.float()
|
|
hubert_model.eval()
|
|
return hubert_model
|
|
|
|
def get_vc(sid):
|
|
global n_spk, tgt_sr, net_g, vc, cpt
|
|
if sid == []:
|
|
global hubert_model
|
|
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
|
|
print("clean_empty_cache")
|
|
del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
|
|
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
###楼下不这么折腾清理不干净
|
|
if_f0 = cpt.get("f0", 1)
|
|
if if_f0 == 1:
|
|
net_g = SynthesizerTrnMs256NSFsid(
|
|
*cpt["config"], is_half=config.is_half
|
|
)
|
|
else:
|
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
|
del net_g, cpt
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
cpt = None
|
|
return {"visible": False, "__type__": "update"}
|
|
person = "%s/%s" % (weight_root, sid)
|
|
print("loading %s" % person)
|
|
cpt = torch.load(person, map_location="cpu")
|
|
tgt_sr = cpt["config"][-1]
|
|
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
|
if_f0 = cpt.get("f0", 1)
|
|
if if_f0 == 1:
|
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
|
else:
|
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
|
del net_g.enc_q
|
|
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
|
net_g.eval().to(config.device)
|
|
if config.is_half:
|
|
net_g = net_g.half()
|
|
else:
|
|
net_g = net_g.float()
|
|
vc = VC(tgt_sr, config)
|
|
n_spk = cpt["config"][-3]
|
|
print("Mangio-RVC-Fork Infer-CLI: Model has been loaded...")
|
|
return {"visible": True, "maximum": n_spk, "__type__": "update"}
|
|
|
|
def vc_single(
|
|
sid,
|
|
input_audio,
|
|
f0_up_key,
|
|
f0_file,
|
|
f0_method,
|
|
file_index,
|
|
# file_big_npy,
|
|
index_rate,
|
|
crepe_hop_length,
|
|
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
|
|
global tgt_sr, net_g, vc, hubert_model, cpt
|
|
if input_audio is None:
|
|
return "You need to upload an audio", None
|
|
f0_up_key = int(f0_up_key)
|
|
try:
|
|
audio = load_audio(input_audio, 16000)
|
|
times = [0, 0, 0]
|
|
if hubert_model == None:
|
|
hubert_model = get_hubert()
|
|
if_f0 = cpt.get("f0", 1)
|
|
file_index = (
|
|
file_index.strip(" ")
|
|
.strip('"')
|
|
.strip("\n")
|
|
.strip('"')
|
|
.strip(" ")
|
|
.replace("trained", "added")
|
|
) # 防止小白写错,自动帮他替换掉
|
|
# file_big_npy = (
|
|
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
|
# )
|
|
audio_opt = vc.pipeline(
|
|
hubert_model,
|
|
net_g,
|
|
sid,
|
|
audio,
|
|
times,
|
|
f0_up_key,
|
|
f0_method,
|
|
file_index,
|
|
# file_big_npy,
|
|
index_rate,
|
|
if_f0,
|
|
crepe_hop_length,
|
|
f0_file=f0_file,
|
|
)
|
|
print(
|
|
"npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep=""
|
|
)
|
|
return "Success", (tgt_sr, audio_opt)
|
|
except:
|
|
info = traceback.format_exc()
|
|
print(info)
|
|
return info, (None, None)
|
|
|
|
def start_inference():
|
|
# Get Essential Paths first
|
|
model_name = str(sys.argv[1]) # MyModel.pth
|
|
source_audio_path = str(sys.argv[2]) # Source Audio Path
|
|
output_file_name = str(sys.argv[3]) # Output audio path e.g outputs/conversion_out.wav
|
|
feature_index_path = str(sys.argv[4]) # Feature Index file path
|
|
f0_file = None # Not implemented yet. To be implemented later on
|
|
|
|
# Get parameters for inference
|
|
speaker_id = int(sys.argv[5]) # 0
|
|
transposition = float(sys.argv[6]) # 0.0 float
|
|
f0_method = str(sys.argv[7]) # harvest
|
|
crepe_hop_length = int(sys.argv[8]) # 128
|
|
feature_ratio = float(sys.argv[9]) # 0.78
|
|
|
|
# Get VC first. set global vc to VC from pipeline script
|
|
print("Mangio-RVC-Fork Infer-CLI: Starting the inference...")
|
|
vc_data = get_vc(model_name)
|
|
print(vc_data)
|
|
print("Mangio-RVC-Fork Infer-CLI: Performing inference...")
|
|
conversion_data = vc_single(
|
|
speaker_id,
|
|
source_audio_path,
|
|
transposition,
|
|
f0_file,
|
|
f0_method,
|
|
feature_index_path,
|
|
feature_ratio,
|
|
crepe_hop_length
|
|
)
|
|
if(conversion_data[0] == "Success"):
|
|
print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name))
|
|
# Go ahead with output
|
|
write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1])
|
|
print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name))
|
|
else:
|
|
print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ")
|
|
print(conversion_data[0])
|
|
|
|
start_inference() |