Merge pull request #49 from Mangio621/TRUE-FIXED-FP16

Fix training times by fixing fp16 detection
2025-12-16 11:37:44 +01:00 · 2023-07-22 22:48:22 -05:00
parent b280c8c7bc 9ff9503b43
commit dc607459fc
8 changed files with 673 additions and 424 deletions
--- a/config.py
+++ b/config.py
@@ -1,19 +1,73 @@
 import argparse
 import sys
 import torch
 import json
 from multiprocessing import cpu_count
 global usefp16
 usefp16 = False
 def use_fp32_config():
-    for config_file in ["32k.json", "40k.json", "48k.json"]:
+    usefp16 = False
-        with open(f"configs/{config_file}", "r") as f:
+    device_capability = 0
-            strr = f.read().replace("true", "false")
+    if torch.cuda.is_available():
-        with open(f"configs/{config_file}", "w") as f:
+        device = torch.device("cuda:0")  # Assuming you have only one GPU (index 0).
-            f.write(strr)
+        device_capability = torch.cuda.get_device_capability(device)[0]
-    with open("trainset_preprocess_pipeline_print.py", "r") as f:
+        if device_capability >= 7:
-        strr = f.read().replace("3.7", "3.0")
+            usefp16 = True
-    with open("trainset_preprocess_pipeline_print.py", "w") as f:
+            for config_file in ["32k.json", "40k.json", "48k.json"]:
-        f.write(strr)
+                with open(f"configs/{config_file}", "r") as d:
                    data = json.load(d)
                if "train" in data and "fp16_run" in data["train"]:
                    data["train"]["fp16_run"] = True
                with open(f"configs/{config_file}", "w") as d:
                    json.dump(data, d, indent=4)
                print(f"Set fp16_run to true in {config_file}")
            with open(
                "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
            ) as f:
                strr = f.read()
            strr = strr.replace("3.0", "3.7")
            with open(
                "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
            ) as f:
                f.write(strr)
        else:
            for config_file in ["32k.json", "40k.json", "48k.json"]:
                with open(f"configs/{config_file}", "r") as f:
                    data = json.load(f)
                if "train" in data and "fp16_run" in data["train"]:
                    data["train"]["fp16_run"] = False
                with open(f"configs/{config_file}", "w") as d:
                    json.dump(data, d, indent=4)
                print(f"Set fp16_run to false in {config_file}")
            with open(
                "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
            ) as f:
                strr = f.read()
            strr = strr.replace("3.7", "3.0")
            with open(
                "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
            ) as f:
                f.write(strr)
    else:
        print(
            "CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed."
        )
    return (usefp16, device_capability)
 class Config:
@@ -32,7 +86,7 @@ class Config:
            self.paperspace,
            self.is_cli,
        ) = self.arg_parse()
-        
+
        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
    @staticmethod
@@ -50,11 +104,15 @@ class Config:
            action="store_true",
            help="Do not open in browser automatically",
        )
-        parser.add_argument( # Fork Feature. Paperspace integration for web UI
+        parser.add_argument(  # Fork Feature. Paperspace integration for web UI
-            "--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems."
+            "--paperspace",
            action="store_true",
            help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.",
        )
-        parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py
+        parser.add_argument(  # Fork Feature. Embed a CLI into the infer-web.py
-            "--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!"
+            "--is_cli",
            action="store_true",
            help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
        )
        cmd_opts = parser.parse_args()
@@ -95,9 +153,9 @@ class Config:
            ):
                print("Found GPU", self.gpu_name, ", force to fp32")
                self.is_half = False
                use_fp32_config()
            else:
                print("Found GPU", self.gpu_name)
                use_fp32_config()
            self.gpu_mem = int(
                torch.cuda.get_device_properties(i_device).total_memory
                / 1024
--- a/extract_f0_print.py
+++ b/extract_f0_print.py
@@ -5,10 +5,10 @@ sys.path.append(now_dir)
 from my_utils import load_audio
 import pyworld
 import numpy as np, logging
-import torchcrepe # Fork Feature. Crepe algo for training and preprocess
+import torchcrepe  # Fork Feature. Crepe algo for training and preprocess
 import torch
-from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe.
+from torch import Tensor  # Fork Feature. Used for pitch prediction for torch crepe.
-import scipy.signal as signal # Fork Feature hybrid inference
+import scipy.signal as signal  # Fork Feature hybrid inference
 import tqdm
 logging.getLogger("numba").setLevel(logging.WARNING)
@@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
 DoFormant = False
-with open('formanting.txt', 'r') as fvf:
+with open("formanting.txt", "r") as fvf:
-    content = fvf.readlines()              
+    content = fvf.readlines()
-    Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+    Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
 def printt(strr):
@@ -32,7 +32,7 @@ def printt(strr):
 n_p = int(sys.argv[2])
 f0method = sys.argv[3]
-extraction_crepe_hop_length = 0 
+extraction_crepe_hop_length = 0
 try:
    extraction_crepe_hop_length = int(sys.argv[4])
 except:
@@ -53,11 +53,11 @@ class FeatureInput(object):
        self.f0_min = 50.0
        self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
        self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
-    
+
    # EXPERIMENTAL. PROBABLY BUGGY
    def get_f0_hybrid_computation(
-        self, 
+        self,
-        methods_str, 
+        methods_str,
        x,
        f0_min,
        f0_max,
@@ -67,9 +67,9 @@ class FeatureInput(object):
    ):
        # Get various f0 methods from input to use in the computation stack
        s = methods_str
-        s = s.split('hybrid')[1]
+        s = s.split("hybrid")[1]
-        s = s.replace('[', '').replace(']', '')
+        s = s.replace("[", "").replace("]", "")
-        methods = s.split('+')
+        methods = s.split("+")
        f0_computation_stack = []
        print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -99,7 +99,9 @@ class FeatureInput(object):
                torch_device_index = 0
                torch_device = None
                if torch.cuda.is_available():
-                    torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                    torch_device = torch.device(
                        f"cuda:{torch_device_index % torch.cuda.device_count()}"
                    )
                elif torch.backends.mps.is_available():
                    torch_device = torch.device("mps")
                else:
@@ -123,7 +125,7 @@ class FeatureInput(object):
                f0 = torchcrepe.filter.mean(f0, 3)
                f0[pd < 0.1] = 0
                f0 = f0[0].cpu().numpy()
-                f0 = f0[1:] # Get rid of extra first frame
+                f0 = f0[1:]  # Get rid of extra first frame
            elif method == "mangio-crepe":
                # print("Performing crepe pitch extraction. (EXPERIMENTAL)")
                # print("CREPE PITCH EXTRACTION HOP LENGTH: " + str(crepe_hop_length))
@@ -132,7 +134,9 @@ class FeatureInput(object):
                torch_device_index = 0
                torch_device = None
                if torch.cuda.is_available():
-                    torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                    torch_device = torch.device(
                        f"cuda:{torch_device_index % torch.cuda.device_count()}"
                    )
                elif torch.backends.mps.is_available():
                    torch_device = torch.device("mps")
                else:
@@ -156,7 +160,7 @@ class FeatureInput(object):
                    "full",
                    batch_size=crepe_hop_length * 2,
                    device=torch_device,
-                    pad=True                
+                    pad=True,
                )
                p_len = p_len or x.shape[0] // crepe_hop_length
                # Resize the pitch
@@ -165,7 +169,7 @@ class FeatureInput(object):
                target = np.interp(
                    np.arange(0, len(source) * p_len, len(source)) / p_len,
                    np.arange(0, len(source)),
-                    source
+                    source,
                )
                f0 = np.nan_to_num(target)
            elif method == "harvest":
@@ -191,12 +195,12 @@ class FeatureInput(object):
                f0 = signal.medfilt(f0, 3)
                f0 = f0[1:]
            f0_computation_stack.append(f0)
-        
+
        for fc in f0_computation_stack:
            print(len(fc))
        # print("Calculating hybrid median f0 from the stack of: %s" % str(methods))
-        
+
        f0_median_hybrid = None
        if len(f0_computation_stack) == 1:
            f0_median_hybrid = f0_computation_stack[0]
@@ -236,10 +240,9 @@ class FeatureInput(object):
        elif f0_method == "rmvpe":
            if hasattr(self, "model_rmvpe") == False:
                from rmvpe import RMVPE
                print("loading rmvpe model")
-                self.model_rmvpe = RMVPE(
+                self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0")
                    "rmvpe.pt", is_half=False, device="cuda:0"
                )
            f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
        elif f0_method == "dio":
            f0, t = pyworld.dio(
@@ -250,12 +253,16 @@ class FeatureInput(object):
                frame_period=1000 * self.hop / self.fs,
            )
            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
-        elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction
+        elif (
            f0_method == "crepe"
        ):  # Fork Feature: Added crepe f0 for f0 feature extraction
            # Pick a batch size that doesn't cause memory errors on your gpu
            torch_device_index = 0
            torch_device = None
            if torch.cuda.is_available():
-                torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                torch_device = torch.device(
                    f"cuda:{torch_device_index % torch.cuda.device_count()}"
                )
            elif torch.backends.mps.is_available():
                torch_device = torch.device("mps")
            else:
@@ -287,7 +294,9 @@ class FeatureInput(object):
            torch_device_index = 0
            torch_device = None
            if torch.cuda.is_available():
-                torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                torch_device = torch.device(
                    f"cuda:{torch_device_index % torch.cuda.device_count()}"
                )
            elif torch.backends.mps.is_available():
                torch_device = torch.device("mps")
            else:
@@ -311,7 +320,7 @@ class FeatureInput(object):
                "full",
                batch_size=crepe_hop_length * 2,
                device=torch_device,
-                pad=True                
+                pad=True,
            )
            p_len = p_len or x.shape[0] // crepe_hop_length
            # Resize the pitch
@@ -320,20 +329,20 @@ class FeatureInput(object):
            target = np.interp(
                np.arange(0, len(source) * p_len, len(source)) / p_len,
                np.arange(0, len(source)),
-                source
+                source,
            )
            f0 = np.nan_to_num(target)
-        elif "hybrid" in f0_method: # EXPERIMENTAL
+        elif "hybrid" in f0_method:  # EXPERIMENTAL
            # Perform hybrid median pitch estimation
            time_step = 160 / 16000 * 1000
            f0 = self.get_f0_hybrid_computation(
-                f0_method, 
+                f0_method,
                x,
                self.f0_min,
                self.f0_max,
                p_len,
                crepe_hop_length,
-                time_step
+                time_step,
            )
        # Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...
@@ -362,14 +371,19 @@ class FeatureInput(object):
            with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
                for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
                    try:
-                        pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length))
+                        pbar.set_description(
                            "thread:%s, f0ing, Hop-Length:%s"
                            % (thread_n, crepe_hop_length)
                        )
                        pbar.update(1)
                        if (
                            os.path.exists(opt_path1 + ".npy") == True
                            and os.path.exists(opt_path2 + ".npy") == True
                        ):
                            continue
-                        featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length)
+                        featur_pit = self.compute_f0(
                            inp_path, f0_method, crepe_hop_length
                        )
                        np.save(
                            opt_path2,
                            featur_pit,
@@ -382,7 +396,9 @@ class FeatureInput(object):
                            allow_pickle=False,
                        )  # ori
                    except:
-                        printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
+                        printt(
                            "f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())
                        )
 if __name__ == "__main__":
@@ -411,12 +427,7 @@ if __name__ == "__main__":
    for i in range(n_p):
        p = Process(
            target=featureInput.go,
-            args=(
+            args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i),
                paths[i::n_p],
                f0method,
                extraction_crepe_hop_length,
                i
            ),
        )
        ps.append(p)
        p.start()
--- a/gui_v0.py
+++ b/gui_v0.py
@@ -51,8 +51,10 @@ class RVC:
            self.window = 160
            # Get Torch Device
-            if(torch.cuda.is_available()):
+            if torch.cuda.is_available():
-                self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}")
+                self.torch_device = torch.device(
                    f"cuda:{0 % torch.cuda.device_count()}"
                )
            elif torch.backends.mps.is_available():
                self.torch_device = torch.device("mps")
            else:
@@ -141,7 +143,7 @@ class RVC:
    def get_f0(self, x, f0_up_key, inp_f0=None):
        # Calculate Padding and f0 details here
-        p_len = x.shape[0] // 512 # For Now This probs doesn't work
+        p_len = x.shape[0] // 512  # For Now This probs doesn't work
        x_pad = 1
        f0_min = 50
        f0_max = 1100
@@ -150,11 +152,11 @@ class RVC:
        f0 = 0
        # Here, check f0_methods and get their computations
-        if(self.f0_method == 'harvest'):
+        if self.f0_method == "harvest":
            f0 = self.get_harvest_computation(x, f0_min, f0_max)
-        elif(self.f0_method == 'reg-crepe'):
+        elif self.f0_method == "reg-crepe":
            f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
-        elif(self.f0_method == 'reg-crepe-tiny'):
+        elif self.f0_method == "reg-crepe-tiny":
            f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")
        # Calculate f0_course and f0_bak here
@@ -300,7 +302,7 @@ class GUI:
            with open("values1.json", "r") as j:
                data = json.load(j)
        except:
-            # Injecting f0_method into the json data 
+            # Injecting f0_method into the json data
            with open("values1.json", "w") as j:
                data = {
                    "pth_path": "",
@@ -328,11 +330,7 @@ class GUI:
            [
                sg.Frame(
                    title="Proudly forked by Mangio621",
-                    layout=[
+                    layout=[[sg.Image("./mangio_utils/lol.png")]],
                        [
                            sg.Image('./mangio_utils/lol.png')
                        ]
                    ]
                ),
                sg.Frame(
                    title=i18n("加载模型"),
@@ -384,14 +382,16 @@ class GUI:
                            ),
                        ],
                    ],
-                )
+                ),
            ],
            [
                # Mangio f0 Selection frame Here
                sg.Frame(
                    layout=[
                        [
-                            sg.Radio("Harvest", "f0_method", key="harvest", default=True),
+                            sg.Radio(
                                "Harvest", "f0_method", key="harvest", default=True
                            ),
                            sg.Radio("Crepe", "f0_method", key="reg-crepe"),
                            sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
                        ]
@@ -536,20 +536,21 @@ class GUI:
            if event == "stop_vc" and self.flag_vc == True:
                self.flag_vc = False
-    # Function that returns the used f0 method in string format "harvest" 
+    # Function that returns the used f0 method in string format "harvest"
    def get_f0_method_from_radios(self, values):
        f0_array = [
-            {"name": "harvest", "val": values['harvest']},
+            {"name": "harvest", "val": values["harvest"]},
-            {"name": "reg-crepe", "val": values['reg-crepe']},
+            {"name": "reg-crepe", "val": values["reg-crepe"]},
-            {"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']},
+            {"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]},
        ]
        # Filter through to find a true value
        used_f0 = ""
        for f0 in f0_array:
-            if(f0['val'] == True):
+            if f0["val"] == True:
-                used_f0 = f0['name']
+                used_f0 = f0["name"]
                break
-        if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
+        if used_f0 == "":
            used_f0 = "harvest"  # Default Harvest if used_f0 is empty somehow
        return used_f0
    def set_values(self, values):
--- a/infer-web.py
+++ b/infer-web.py
--- a/my_utils.py
+++ b/my_utils.py
@@ -1,11 +1,14 @@
 import ffmpeg
 import numpy as np
 #import praatio
 #import praatio.praat_scripts
 import os
 #from os.path import join
-#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
+# import praatio
 # import praatio.praat_scripts
 import os
 # from os.path import join
 # praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
 def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    try:
@@ -15,43 +18,47 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
        file = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )  # 防止小白拷路径头尾带了空格和"和回车
-        file_formanted = (
+        file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        with open("formanting.txt", "r") as fvf:
        )
        with open('formanting.txt', 'r') as fvf:
            content = fvf.readlines()
-            if 'True' in content[0].split('\n')[0]:
+            if "True" in content[0].split("\n")[0]:
-                #print("true")
+                # print("true")
                DoFormant = True
-                Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+                Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
-                
+
            else:
-                #print("not true")
+                # print("not true")
                DoFormant = False
-            
+
        if DoFormant:
-            #os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
+            # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
-            #print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
+            # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
            print("formanting...")
-            
+
-            os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
+            os.system(
                'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"'
                % (file, Quefrency, Timbre, file_formanted)
            )
            print("formanted!")
-            #filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
+            # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
-            #file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
+            # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
            out, _ = (
-                ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
+                ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
-                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )
-            
+
-            os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
+            os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
        else:
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
-                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")
--- a/train_nsf_sim_cache_sid_load_pretrain.py
+++ b/train_nsf_sim_cache_sid_load_pretrain.py
@@ -568,10 +568,10 @@ def train_and_evaluate(
                    ),
                )
            )
-    
+
    with open("stop.txt", "r+") as tostop:
        content = tostop.read()
-        if 'stop' in content:
+        if "stop" in content:
            logger.info("Stop Button was pressed. The program is closed.")
            if hasattr(net_g, "module"):
                ckpt = net_g.module.state_dict()
@@ -581,15 +581,21 @@ def train_and_evaluate(
                "saving final ckpt:%s"
                % (
                    savee(
-                        ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
+                        ckpt,
                        hps.sample_rate,
                        hps.if_f0,
                        hps.name,
                        epoch,
                        hps.version,
                        hps,
                    )
                )
            )
-            
+
            tostop.truncate(0)
            tostop.writelines("not")
            os._exit(2333333)
-    
+
    if rank == 0:
        logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record()))
    if epoch >= hps.total_epoch and rank == 0:
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@@ -24,9 +24,10 @@ Timbre = 0.0
 mutex = multiprocessing.Lock()
 f = open("%s/preprocess.log" % exp_dir, "a+")
-with open('formanting.txt', 'r') as fvf:
+with open("formanting.txt", "r") as fvf:
-    content = fvf.readlines()              
+    content = fvf.readlines()
-    Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+    Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
 def println(strr):
    mutex.acquire()
@@ -104,12 +105,14 @@ class PreProcess:
                        idx1 += 1
                        break
                self.norm_write(tmp_audio, idx0, idx1)
-            #println("%s->Suc." % path)
+            # println("%s->Suc." % path)
        except:
            println("%s->%s" % (path, traceback.format_exc()))
    def pipeline_mp(self, infos, thread_n):
-        for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n):
+        for path, idx0 in tqdm.tqdm(
            infos, position=thread_n, leave=True, desc="thread:%s" % thread_n
        ):
            self.pipeline(path, idx0)
    def pipeline_mp_inp_dir(self, inp_root, n_p):
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@@ -1,7 +1,7 @@
 import numpy as np, parselmouth, torch, pdb, sys, os
 from time import time as ttime
 import torch.nn.functional as F
-import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
+import torchcrepe  # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe)
 from torch import Tensor
 import scipy.signal as signal
 import pyworld, os, traceback, faiss, librosa, torchcrepe
@@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
 input_audio_path2wav = {}
@lru_cache
 def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
    audio = input_audio_path2wav[input_audio_path]
@@ -74,24 +75,28 @@ class VC(object):
    def get_optimal_torch_device(self, index: int = 0) -> torch.device:
        # Get cuda device
        if torch.cuda.is_available():
-            return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast
+            return torch.device(
                f"cuda:{index % torch.cuda.device_count()}"
            )  # Very fast
        elif torch.backends.mps.is_available():
            return torch.device("mps")
        # Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
-        # Else wise return the "cpu" as a torch device, 
+        # Else wise return the "cpu" as a torch device,
        return torch.device("cpu")
    # Fork Feature: Compute f0 with the crepe method
    def get_f0_crepe_computation(
-            self, 
+        self,
-            x, 
+        x,
-            f0_min,
+        f0_min,
-            f0_max,
+        f0_max,
-            p_len,
+        p_len,
-            hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
+        hop_length=160,  # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
-            model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full
+        model="full",  # Either use crepe-tiny "tiny" or crepe "full". Default is full
    ):
-        x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float.
+        x = x.astype(
            np.float32
        )  # fixes the F.conv2D exception. We needed to convert double to float.
        x /= np.quantile(np.abs(x), 0.999)
        torch_device = self.get_optimal_torch_device()
        audio = torch.from_numpy(x).to(torch_device, copy=True)
@@ -109,7 +114,7 @@ class VC(object):
            model,
            batch_size=hop_length * 2,
            device=torch_device,
-            pad=True
+            pad=True,
        )
        p_len = p_len or x.shape[0] // hop_length
        # Resize the pitch for final f0
@@ -118,17 +123,17 @@ class VC(object):
        target = np.interp(
            np.arange(0, len(source) * p_len, len(source)) / p_len,
            np.arange(0, len(source)),
-            source
+            source,
        )
        f0 = np.nan_to_num(target)
-        return f0 # Resized f0
+        return f0  # Resized f0
-    
+
    def get_f0_official_crepe_computation(
-            self,
+        self,
-            x,
+        x,
-            f0_min,
+        f0_min,
-            f0_max,
+        f0_max,
-            model="full",
+        model="full",
    ):
        # Pick a batch size that doesn't cause memory errors on your gpu
        batch_size = 512
@@ -153,15 +158,15 @@ class VC(object):
    # Fork Feature: Compute pYIN f0 method
    def get_f0_pyin_computation(self, x, f0_min, f0_max):
-        y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True)
+        y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
        f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
-        f0 = f0[1:] # Get rid of extra first frame
+        f0 = f0[1:]  # Get rid of extra first frame
        return f0
    # Fork Feature: Acquire median hybrid f0 estimation calculation
    def get_f0_hybrid_computation(
-        self, 
+        self,
-        methods_str, 
+        methods_str,
        input_audio_path,
        x,
        f0_min,
@@ -173,9 +178,9 @@ class VC(object):
    ):
        # Get various f0 methods from input to use in the computation stack
        s = methods_str
-        s = s.split('hybrid')[1]
+        s = s.split("hybrid")[1]
-        s = s.replace('[', '').replace(']', '')
+        s = s.replace("[", "").replace("]", "")
-        methods = s.split('+')
+        methods = s.split("+")
        f0_computation_stack = []
        print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -202,35 +207,39 @@ class VC(object):
                    )
            elif method == "crepe":
                f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
-                f0 = f0[1:] # Get rid of extra first frame
+                f0 = f0[1:]  # Get rid of extra first frame
            elif method == "crepe-tiny":
                f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
-                f0 = f0[1:] # Get rid of extra first frame
+                f0 = f0[1:]  # Get rid of extra first frame
            elif method == "mangio-crepe":
-                f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+                f0 = self.get_f0_crepe_computation(
                    x, f0_min, f0_max, p_len, crepe_hop_length
                )
            elif method == "mangio-crepe-tiny":
-                f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+                f0 = self.get_f0_crepe_computation(
                    x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
                )
            elif method == "harvest":
                f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
                if filter_radius > 2:
                    f0 = signal.medfilt(f0, 3)
-                f0 = f0[1:] # Get rid of first frame.
+                f0 = f0[1:]  # Get rid of first frame.
-            elif method == "dio": # Potentially buggy?
+            elif method == "dio":  # Potentially buggy?
                f0, t = pyworld.dio(
                    x.astype(np.double),
                    fs=self.sr,
                    f0_ceil=f0_max,
                    f0_floor=f0_min,
-                    frame_period=10
+                    frame_period=10,
                )
                f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
                f0 = signal.medfilt(f0, 3)
                f0 = f0[1:]
-            #elif method == "pyin": Not Working just yet
+            # elif method == "pyin": Not Working just yet
            #    f0 = self.get_f0_pyin_computation(x, f0_min, f0_max)
            # Push method to the stack
            f0_computation_stack.append(f0)
-        
+
        for fc in f0_computation_stack:
            print(len(fc))
@@ -280,13 +289,13 @@ class VC(object):
            f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
            if filter_radius > 2:
                f0 = signal.medfilt(f0, 3)
-        elif f0_method == "dio": # Potentially Buggy?
+        elif f0_method == "dio":  # Potentially Buggy?
            f0, t = pyworld.dio(
                x.astype(np.double),
                fs=self.sr,
                f0_ceil=f0_max,
                f0_floor=f0_min,
-                frame_period=10
+                frame_period=10,
            )
            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
            f0 = signal.medfilt(f0, 3)
@@ -295,12 +304,17 @@ class VC(object):
        elif f0_method == "crepe-tiny":
            f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
        elif f0_method == "mangio-crepe":
-            f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+            f0 = self.get_f0_crepe_computation(
                x, f0_min, f0_max, p_len, crepe_hop_length
            )
        elif f0_method == "mangio-crepe-tiny":
-            f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+            f0 = self.get_f0_crepe_computation(
                x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
            )
        elif f0_method == "rmvpe":
            if hasattr(self, "model_rmvpe") == False:
                from rmvpe import RMVPE
                print("loading rmvpe model")
                self.model_rmvpe = RMVPE(
                    "rmvpe.pt", is_half=self.is_half, device=self.device
@@ -311,7 +325,7 @@ class VC(object):
            # Perform hybrid median pitch estimation
            input_audio_path2wav[input_audio_path] = x.astype(np.double)
            f0 = self.get_f0_hybrid_computation(
-                f0_method, 
+                f0_method,
                input_audio_path,
                x,
                f0_min,
@@ -319,7 +333,7 @@ class VC(object):
                p_len,
                filter_radius,
                crepe_hop_length,
-                time_step
+                time_step,
            )
        f0 *= pow(2, f0_up_key / 12)