mirror of
https://github.com/Mangio621/Mangio-RVC-Fork.git
synced 2025-12-16 11:37:44 +01:00
Added CLI functionality for inference! No need to use the GUI for inference. Added an audio-outputs directory
This commit is contained in:
17
README.md
17
README.md
@@ -52,6 +52,7 @@ Special thanks to discord user @kalomaze#2983 for creating a temporary colab not
|
|||||||
+ Added CLI functionality
|
+ Added CLI functionality
|
||||||
+ added train-index-cli.py to train the feature index without the GUI
|
+ added train-index-cli.py to train the feature index without the GUI
|
||||||
+ added extract-small-model.py to extract the small model without the GUI
|
+ added extract-small-model.py to extract the small model without the GUI
|
||||||
|
+ added infer-cli.py to do inference without the GUI.
|
||||||
|
|
||||||
## This repository has the following features too:
|
## This repository has the following features too:
|
||||||
+ Reduce tone leakage by replacing source feature to training-set feature using top1 retrieval;
|
+ Reduce tone leakage by replacing source feature to training-set feature using top1 retrieval;
|
||||||
@@ -193,6 +194,22 @@ python train-index-cli.py mi-test
|
|||||||
python extract-small-model-cli.py logs/G_99750.pth MyModel 40k 1 "This is a cool model."
|
python extract-small-model-cli.py logs/G_99750.pth MyModel 40k 1 "This is a cool model."
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Inference without the GUI (Voice Conversion)
|
||||||
|
```bash
|
||||||
|
# + Mangio-RVC-Fork Feature. Infer audio with just the CLI
|
||||||
|
# Arguments
|
||||||
|
# arg1 = model name in weights folder. (mi-test.pth)
|
||||||
|
# arg2 = source file path (.wav)
|
||||||
|
# arg3 = output file name to be placed in ./audio-outputs (myoutput.wav).
|
||||||
|
# arg4 = feature index file path. (E:\added_IVF3042_Flat_nprobe_1.index)
|
||||||
|
# arg5 = speaker ID (0)
|
||||||
|
# arg6 = transposition. (12 = 12 semitones up)
|
||||||
|
# arg7 = f0 method. (harvest, pm, crepe, dio, crepe-tiny)
|
||||||
|
# arg8 = crepe hop length. Use 128. (applies to crepe f0 method only)
|
||||||
|
# arg9 = feature index ratio (0.78)
|
||||||
|
python infer-cli.py mi-test.pth E:\my-source-file.wav conversion_output.wav E:\added_IVF3042_Flat_nprobe_1.index 0 -2 pm 128 0.78
|
||||||
|
```
|
||||||
|
|
||||||
# Running the Tensorboard 📉
|
# Running the Tensorboard 📉
|
||||||
```bash
|
```bash
|
||||||
cd Mangio-RVC-Fork
|
cd Mangio-RVC-Fork
|
||||||
|
|||||||
19
config.py
19
config.py
@@ -4,20 +4,21 @@ from multiprocessing import cpu_count
|
|||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
def __init__(self):
|
def __init__(self, is_gui=True):
|
||||||
self.device = "cuda:0"
|
self.device = "cuda:0"
|
||||||
self.is_half = True
|
self.is_half = True
|
||||||
self.n_cpu = 0
|
self.n_cpu = 0
|
||||||
self.gpu_name = None
|
self.gpu_name = None
|
||||||
self.gpu_mem = None
|
self.gpu_mem = None
|
||||||
(
|
if(is_gui):
|
||||||
self.python_cmd,
|
(
|
||||||
self.listen_port,
|
self.python_cmd,
|
||||||
self.iscolab,
|
self.listen_port,
|
||||||
self.noparallel,
|
self.iscolab,
|
||||||
self.noautoopen,
|
self.noparallel,
|
||||||
self.paperspace,
|
self.noautoopen,
|
||||||
) = self.arg_parse()
|
self.paperspace,
|
||||||
|
) = self.arg_parse()
|
||||||
|
|
||||||
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
# Fork Feature Mangio RVC Fork. Extract the small model from a checkpoint with CLI.
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
from train.process_ckpt import extract_small_model
|
from train.process_ckpt import extract_small_model
|
||||||
|
|
||||||
|
|||||||
181
infer-cli.py
Normal file
181
infer-cli.py
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
# Fork Feature Mangio RVC Fork. Infer Audio with just the CLI
|
||||||
|
|
||||||
|
import torch, os, traceback, sys, warnings, shutil, numpy as np
|
||||||
|
|
||||||
|
from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
|
||||||
|
from fairseq import checkpoint_utils
|
||||||
|
from vc_infer_pipeline import VC
|
||||||
|
from config import Config
|
||||||
|
from my_utils import load_audio
|
||||||
|
|
||||||
|
# Fork Feature. Write an audio file
|
||||||
|
from scipy.io.wavfile import write
|
||||||
|
|
||||||
|
config = Config(is_gui=False)
|
||||||
|
|
||||||
|
|
||||||
|
weight_root = 'weights'
|
||||||
|
|
||||||
|
n_spk = None # Set from get_vc
|
||||||
|
tgt_sr = 0 # Set from get_vc
|
||||||
|
net_g = None # Set from get_vc
|
||||||
|
vc = None # Set from get_vc
|
||||||
|
cpt = None # Set from get_vc
|
||||||
|
|
||||||
|
hubert_model = None # Set from vc_single
|
||||||
|
|
||||||
|
def get_hubert():
|
||||||
|
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
||||||
|
["hubert_base.pt"],
|
||||||
|
suffix="",
|
||||||
|
)
|
||||||
|
hubert_model = models[0]
|
||||||
|
hubert_model = hubert_model.to(config.device)
|
||||||
|
if config.is_half:
|
||||||
|
hubert_model = hubert_model.half()
|
||||||
|
else:
|
||||||
|
hubert_model = hubert_model.float()
|
||||||
|
hubert_model.eval()
|
||||||
|
return hubert_model
|
||||||
|
|
||||||
|
def get_vc(sid):
|
||||||
|
global n_spk, tgt_sr, net_g, vc, cpt
|
||||||
|
if sid == []:
|
||||||
|
global hubert_model
|
||||||
|
if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
|
||||||
|
print("clean_empty_cache")
|
||||||
|
del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
|
||||||
|
hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
###楼下不这么折腾清理不干净
|
||||||
|
if_f0 = cpt.get("f0", 1)
|
||||||
|
if if_f0 == 1:
|
||||||
|
net_g = SynthesizerTrnMs256NSFsid(
|
||||||
|
*cpt["config"], is_half=config.is_half
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||||
|
del net_g, cpt
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
cpt = None
|
||||||
|
return {"visible": False, "__type__": "update"}
|
||||||
|
person = "%s/%s" % (weight_root, sid)
|
||||||
|
print("loading %s" % person)
|
||||||
|
cpt = torch.load(person, map_location="cpu")
|
||||||
|
tgt_sr = cpt["config"][-1]
|
||||||
|
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
||||||
|
if_f0 = cpt.get("f0", 1)
|
||||||
|
if if_f0 == 1:
|
||||||
|
net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
|
||||||
|
else:
|
||||||
|
net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
|
||||||
|
del net_g.enc_q
|
||||||
|
print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净, 真奇葩
|
||||||
|
net_g.eval().to(config.device)
|
||||||
|
if config.is_half:
|
||||||
|
net_g = net_g.half()
|
||||||
|
else:
|
||||||
|
net_g = net_g.float()
|
||||||
|
vc = VC(tgt_sr, config)
|
||||||
|
n_spk = cpt["config"][-3]
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Model has been loaded...")
|
||||||
|
return {"visible": True, "maximum": n_spk, "__type__": "update"}
|
||||||
|
|
||||||
|
def vc_single(
|
||||||
|
sid,
|
||||||
|
input_audio,
|
||||||
|
f0_up_key,
|
||||||
|
f0_file,
|
||||||
|
f0_method,
|
||||||
|
file_index,
|
||||||
|
# file_big_npy,
|
||||||
|
index_rate,
|
||||||
|
crepe_hop_length,
|
||||||
|
): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
|
||||||
|
global tgt_sr, net_g, vc, hubert_model, cpt
|
||||||
|
if input_audio is None:
|
||||||
|
return "You need to upload an audio", None
|
||||||
|
f0_up_key = int(f0_up_key)
|
||||||
|
try:
|
||||||
|
audio = load_audio(input_audio, 16000)
|
||||||
|
times = [0, 0, 0]
|
||||||
|
if hubert_model == None:
|
||||||
|
hubert_model = get_hubert()
|
||||||
|
if_f0 = cpt.get("f0", 1)
|
||||||
|
file_index = (
|
||||||
|
file_index.strip(" ")
|
||||||
|
.strip('"')
|
||||||
|
.strip("\n")
|
||||||
|
.strip('"')
|
||||||
|
.strip(" ")
|
||||||
|
.replace("trained", "added")
|
||||||
|
) # 防止小白写错,自动帮他替换掉
|
||||||
|
# file_big_npy = (
|
||||||
|
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||||
|
# )
|
||||||
|
audio_opt = vc.pipeline(
|
||||||
|
hubert_model,
|
||||||
|
net_g,
|
||||||
|
sid,
|
||||||
|
audio,
|
||||||
|
times,
|
||||||
|
f0_up_key,
|
||||||
|
f0_method,
|
||||||
|
file_index,
|
||||||
|
# file_big_npy,
|
||||||
|
index_rate,
|
||||||
|
if_f0,
|
||||||
|
crepe_hop_length,
|
||||||
|
f0_file=f0_file,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
"npy: ", times[0], "s, f0: ", times[1], "s, infer: ", times[2], "s", sep=""
|
||||||
|
)
|
||||||
|
return "Success", (tgt_sr, audio_opt)
|
||||||
|
except:
|
||||||
|
info = traceback.format_exc()
|
||||||
|
print(info)
|
||||||
|
return info, (None, None)
|
||||||
|
|
||||||
|
def start_inference():
|
||||||
|
# Get Essential Paths first
|
||||||
|
model_name = str(sys.argv[1]) # MyModel.pth
|
||||||
|
source_audio_path = str(sys.argv[2]) # Source Audio Path
|
||||||
|
output_file_name = str(sys.argv[3]) # Output audio path e.g outputs/conversion_out.wav
|
||||||
|
feature_index_path = str(sys.argv[4]) # Feature Index file path
|
||||||
|
f0_file = None # Not implemented yet. To be implemented later on
|
||||||
|
|
||||||
|
# Get parameters for inference
|
||||||
|
speaker_id = int(sys.argv[5]) # 0
|
||||||
|
transposition = float(sys.argv[6]) # 0.0 float
|
||||||
|
f0_method = str(sys.argv[7]) # harvest
|
||||||
|
crepe_hop_length = int(sys.argv[8]) # 128
|
||||||
|
feature_ratio = float(sys.argv[9]) # 0.78
|
||||||
|
|
||||||
|
# Get VC first. set global vc to VC from pipeline script
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Starting the inference...")
|
||||||
|
vc_data = get_vc(model_name)
|
||||||
|
print(vc_data)
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Performing inference...")
|
||||||
|
conversion_data = vc_single(
|
||||||
|
speaker_id,
|
||||||
|
source_audio_path,
|
||||||
|
transposition,
|
||||||
|
f0_file,
|
||||||
|
f0_method,
|
||||||
|
feature_index_path,
|
||||||
|
feature_ratio,
|
||||||
|
crepe_hop_length
|
||||||
|
)
|
||||||
|
if(conversion_data[0] == "Success"):
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name))
|
||||||
|
# Go ahead with output
|
||||||
|
write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1])
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name))
|
||||||
|
else:
|
||||||
|
print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ")
|
||||||
|
print(conversion_data[0])
|
||||||
|
|
||||||
|
start_inference()
|
||||||
Reference in New Issue
Block a user