Merge pull request #49 from Mangio621/TRUE-FIXED-FP16

Fix training times by fixing fp16 detection
2025-12-16 11:37:44 +01:00 · 2023-07-22 22:48:22 -05:00
parent b280c8c7bc 9ff9503b43
commit dc607459fc
8 changed files with 673 additions and 424 deletions
--- a/config.py
+++ b/config.py
@@ -1,19 +1,73 @@
 import argparse
 import sys
 import torch
+import json
 from multiprocessing import cpu_count

+global usefp16
+usefp16 = False
+

 def use_fp32_config():
+    usefp16 = False
+    device_capability = 0
+    if torch.cuda.is_available():
+        device = torch.device("cuda:0")  # Assuming you have only one GPU (index 0).
+        device_capability = torch.cuda.get_device_capability(device)[0]
+        if device_capability >= 7:
+            usefp16 = True
+            for config_file in ["32k.json", "40k.json", "48k.json"]:
+                with open(f"configs/{config_file}", "r") as d:
+                    data = json.load(d)
+
+                if "train" in data and "fp16_run" in data["train"]:
+                    data["train"]["fp16_run"] = True
+
+                with open(f"configs/{config_file}", "w") as d:
+                    json.dump(data, d, indent=4)
+
+                print(f"Set fp16_run to true in {config_file}")
+
+            with open(
+                "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
+            ) as f:
+                strr = f.read()
+
+            strr = strr.replace("3.0", "3.7")
+
+            with open(
+                "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
+            ) as f:
+                f.write(strr)
+        else:
            for config_file in ["32k.json", "40k.json", "48k.json"]:
                with open(f"configs/{config_file}", "r") as f:
-            strr = f.read().replace("true", "false")
-        with open(f"configs/{config_file}", "w") as f:
-            f.write(strr)
-    with open("trainset_preprocess_pipeline_print.py", "r") as f:
-        strr = f.read().replace("3.7", "3.0")
-    with open("trainset_preprocess_pipeline_print.py", "w") as f:
+                    data = json.load(f)
+
+                if "train" in data and "fp16_run" in data["train"]:
+                    data["train"]["fp16_run"] = False
+
+                with open(f"configs/{config_file}", "w") as d:
+                    json.dump(data, d, indent=4)
+
+                print(f"Set fp16_run to false in {config_file}")
+
+            with open(
+                "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8"
+            ) as f:
+                strr = f.read()
+
+            strr = strr.replace("3.7", "3.0")
+
+            with open(
+                "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8"
+            ) as f:
                f.write(strr)
+    else:
+        print(
+            "CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed."
+        )
+    return (usefp16, device_capability)


 class Config:
@@ -51,10 +105,14 @@ class Config:
            help="Do not open in browser automatically",
        )
        parser.add_argument(  # Fork Feature. Paperspace integration for web UI
-            "--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems."
+            "--paperspace",
+            action="store_true",
+            help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.",
        )
        parser.add_argument(  # Fork Feature. Embed a CLI into the infer-web.py
-            "--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!"
+            "--is_cli",
+            action="store_true",
+            help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!",
        )
        cmd_opts = parser.parse_args()

@@ -95,9 +153,9 @@ class Config:
            ):
                print("Found GPU", self.gpu_name, ", force to fp32")
                self.is_half = False
-                use_fp32_config()
            else:
                print("Found GPU", self.gpu_name)
+                use_fp32_config()
            self.gpu_mem = int(
                torch.cuda.get_device_properties(i_device).total_memory
                / 1024
--- a/extract_f0_print.py
+++ b/extract_f0_print.py
@@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+")

 DoFormant = False

-with open('formanting.txt', 'r') as fvf:
+with open("formanting.txt", "r") as fvf:
    content = fvf.readlines()
-    Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+    Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]


 def printt(strr):
@@ -67,9 +67,9 @@ class FeatureInput(object):
    ):
        # Get various f0 methods from input to use in the computation stack
        s = methods_str
-        s = s.split('hybrid')[1]
-        s = s.replace('[', '').replace(']', '')
-        methods = s.split('+')
+        s = s.split("hybrid")[1]
+        s = s.replace("[", "").replace("]", "")
+        methods = s.split("+")
        f0_computation_stack = []

        print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -99,7 +99,9 @@ class FeatureInput(object):
                torch_device_index = 0
                torch_device = None
                if torch.cuda.is_available():
-                    torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                    torch_device = torch.device(
+                        f"cuda:{torch_device_index % torch.cuda.device_count()}"
+                    )
                elif torch.backends.mps.is_available():
                    torch_device = torch.device("mps")
                else:
@@ -132,7 +134,9 @@ class FeatureInput(object):
                torch_device_index = 0
                torch_device = None
                if torch.cuda.is_available():
-                    torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                    torch_device = torch.device(
+                        f"cuda:{torch_device_index % torch.cuda.device_count()}"
+                    )
                elif torch.backends.mps.is_available():
                    torch_device = torch.device("mps")
                else:
@@ -156,7 +160,7 @@ class FeatureInput(object):
                    "full",
                    batch_size=crepe_hop_length * 2,
                    device=torch_device,
-                    pad=True                
+                    pad=True,
                )
                p_len = p_len or x.shape[0] // crepe_hop_length
                # Resize the pitch
@@ -165,7 +169,7 @@ class FeatureInput(object):
                target = np.interp(
                    np.arange(0, len(source) * p_len, len(source)) / p_len,
                    np.arange(0, len(source)),
-                    source
+                    source,
                )
                f0 = np.nan_to_num(target)
            elif method == "harvest":
@@ -236,10 +240,9 @@ class FeatureInput(object):
        elif f0_method == "rmvpe":
            if hasattr(self, "model_rmvpe") == False:
                from rmvpe import RMVPE
+
                print("loading rmvpe model")
-                self.model_rmvpe = RMVPE(
-                    "rmvpe.pt", is_half=False, device="cuda:0"
-                )
+                self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0")
            f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
        elif f0_method == "dio":
            f0, t = pyworld.dio(
@@ -250,12 +253,16 @@ class FeatureInput(object):
                frame_period=1000 * self.hop / self.fs,
            )
            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs)
-        elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction
+        elif (
+            f0_method == "crepe"
+        ):  # Fork Feature: Added crepe f0 for f0 feature extraction
            # Pick a batch size that doesn't cause memory errors on your gpu
            torch_device_index = 0
            torch_device = None
            if torch.cuda.is_available():
-                torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                torch_device = torch.device(
+                    f"cuda:{torch_device_index % torch.cuda.device_count()}"
+                )
            elif torch.backends.mps.is_available():
                torch_device = torch.device("mps")
            else:
@@ -287,7 +294,9 @@ class FeatureInput(object):
            torch_device_index = 0
            torch_device = None
            if torch.cuda.is_available():
-                torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}")
+                torch_device = torch.device(
+                    f"cuda:{torch_device_index % torch.cuda.device_count()}"
+                )
            elif torch.backends.mps.is_available():
                torch_device = torch.device("mps")
            else:
@@ -311,7 +320,7 @@ class FeatureInput(object):
                "full",
                batch_size=crepe_hop_length * 2,
                device=torch_device,
-                pad=True                
+                pad=True,
            )
            p_len = p_len or x.shape[0] // crepe_hop_length
            # Resize the pitch
@@ -320,7 +329,7 @@ class FeatureInput(object):
            target = np.interp(
                np.arange(0, len(source) * p_len, len(source)) / p_len,
                np.arange(0, len(source)),
-                source
+                source,
            )
            f0 = np.nan_to_num(target)
        elif "hybrid" in f0_method:  # EXPERIMENTAL
@@ -333,7 +342,7 @@ class FeatureInput(object):
                self.f0_max,
                p_len,
                crepe_hop_length,
-                time_step
+                time_step,
            )
        # Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL...

@@ -362,14 +371,19 @@ class FeatureInput(object):
            with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar:
                for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths):
                    try:
-                        pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length))
+                        pbar.set_description(
+                            "thread:%s, f0ing, Hop-Length:%s"
+                            % (thread_n, crepe_hop_length)
+                        )
                        pbar.update(1)
                        if (
                            os.path.exists(opt_path1 + ".npy") == True
                            and os.path.exists(opt_path2 + ".npy") == True
                        ):
                            continue
-                        featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length)
+                        featur_pit = self.compute_f0(
+                            inp_path, f0_method, crepe_hop_length
+                        )
                        np.save(
                            opt_path2,
                            featur_pit,
@@ -382,7 +396,9 @@ class FeatureInput(object):
                            allow_pickle=False,
                        )  # ori
                    except:
-                        printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()))
+                        printt(
+                            "f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())
+                        )


 if __name__ == "__main__":
@@ -411,12 +427,7 @@ if __name__ == "__main__":
    for i in range(n_p):
        p = Process(
            target=featureInput.go,
-            args=(
-                paths[i::n_p],
-                f0method,
-                extraction_crepe_hop_length,
-                i
-            ),
+            args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i),
        )
        ps.append(p)
        p.start()
--- a/gui_v0.py
+++ b/gui_v0.py
@@ -51,8 +51,10 @@ class RVC:
            self.window = 160

            # Get Torch Device
-            if(torch.cuda.is_available()):
-                self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}")
+            if torch.cuda.is_available():
+                self.torch_device = torch.device(
+                    f"cuda:{0 % torch.cuda.device_count()}"
+                )
            elif torch.backends.mps.is_available():
                self.torch_device = torch.device("mps")
            else:
@@ -150,11 +152,11 @@ class RVC:

        f0 = 0
        # Here, check f0_methods and get their computations
-        if(self.f0_method == 'harvest'):
+        if self.f0_method == "harvest":
            f0 = self.get_harvest_computation(x, f0_min, f0_max)
-        elif(self.f0_method == 'reg-crepe'):
+        elif self.f0_method == "reg-crepe":
            f0 = self.get_regular_crepe_computation(x, f0_min, f0_max)
-        elif(self.f0_method == 'reg-crepe-tiny'):
+        elif self.f0_method == "reg-crepe-tiny":
            f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny")

        # Calculate f0_course and f0_bak here
@@ -328,11 +330,7 @@ class GUI:
            [
                sg.Frame(
                    title="Proudly forked by Mangio621",
-                    layout=[
-                        [
-                            sg.Image('./mangio_utils/lol.png')
-                        ]
-                    ]
+                    layout=[[sg.Image("./mangio_utils/lol.png")]],
                ),
                sg.Frame(
                    title=i18n("加载模型"),
@@ -384,14 +382,16 @@ class GUI:
                            ),
                        ],
                    ],
-                )
+                ),
            ],
            [
                # Mangio f0 Selection frame Here
                sg.Frame(
                    layout=[
                        [
-                            sg.Radio("Harvest", "f0_method", key="harvest", default=True),
+                            sg.Radio(
+                                "Harvest", "f0_method", key="harvest", default=True
+                            ),
                            sg.Radio("Crepe", "f0_method", key="reg-crepe"),
                            sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"),
                        ]
@@ -539,17 +539,18 @@ class GUI:
    # Function that returns the used f0 method in string format "harvest"
    def get_f0_method_from_radios(self, values):
        f0_array = [
-            {"name": "harvest", "val": values['harvest']},
-            {"name": "reg-crepe", "val": values['reg-crepe']},
-            {"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']},
+            {"name": "harvest", "val": values["harvest"]},
+            {"name": "reg-crepe", "val": values["reg-crepe"]},
+            {"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]},
        ]
        # Filter through to find a true value
        used_f0 = ""
        for f0 in f0_array:
-            if(f0['val'] == True):
-                used_f0 = f0['name']
+            if f0["val"] == True:
+                used_f0 = f0["name"]
                break
-        if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow
+        if used_f0 == "":
+            used_f0 = "harvest"  # Default Harvest if used_f0 is empty somehow
        return used_f0

    def set_values(self, values):
--- a/infer-web.py
+++ b/infer-web.py
--- a/my_utils.py
+++ b/my_utils.py
@@ -1,11 +1,14 @@
 import ffmpeg
 import numpy as np
-#import praatio
-#import praatio.praat_scripts
-import os
-#from os.path import join

-#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
+# import praatio
+# import praatio.praat_scripts
+import os
+
+# from os.path import join
+
+# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")
+

 def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    try:
@@ -15,43 +18,47 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
        file = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )  # 防止小白拷路径头尾带了空格和"和回车
-        file_formanted = (
-            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
-        )
-        with open('formanting.txt', 'r') as fvf:
+        file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
+        with open("formanting.txt", "r") as fvf:
            content = fvf.readlines()
-            if 'True' in content[0].split('\n')[0]:
-                #print("true")
+            if "True" in content[0].split("\n")[0]:
+                # print("true")
                DoFormant = True
-                Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+                Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]

            else:
-                #print("not true")
+                # print("not true")
                DoFormant = False

        if DoFormant:
-            #os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
-            #print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
+            # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
+            # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
            print("formanting...")

-            os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
+            os.system(
+                'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"'
+                % (file, Quefrency, Timbre, file_formanted)
+            )
            print("formanted!")
-            #filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
-            #file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
+            # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
+            # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')

            out, _ = (
-                ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
+                ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
-                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                .run(
+                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+                )
            )

-            os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
+            os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
        else:
-            
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
-                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+                .run(
+                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+                )
            )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")
--- a/train_nsf_sim_cache_sid_load_pretrain.py
+++ b/train_nsf_sim_cache_sid_load_pretrain.py
@@ -571,7 +571,7 @@ def train_and_evaluate(

    with open("stop.txt", "r+") as tostop:
        content = tostop.read()
-        if 'stop' in content:
+        if "stop" in content:
            logger.info("Stop Button was pressed. The program is closed.")
            if hasattr(net_g, "module"):
                ckpt = net_g.module.state_dict()
@@ -581,7 +581,13 @@ def train_and_evaluate(
                "saving final ckpt:%s"
                % (
                    savee(
-                        ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps
+                        ckpt,
+                        hps.sample_rate,
+                        hps.if_f0,
+                        hps.name,
+                        epoch,
+                        hps.version,
+                        hps,
                    )
                )
            )
--- a/trainset_preprocess_pipeline_print.py
+++ b/trainset_preprocess_pipeline_print.py
@@ -24,9 +24,10 @@ Timbre = 0.0
 mutex = multiprocessing.Lock()
 f = open("%s/preprocess.log" % exp_dir, "a+")

-with open('formanting.txt', 'r') as fvf:
+with open("formanting.txt", "r") as fvf:
    content = fvf.readlines()
-    Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
+    Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0]
+

 def println(strr):
    mutex.acquire()
@@ -104,12 +105,14 @@ class PreProcess:
                        idx1 += 1
                        break
                self.norm_write(tmp_audio, idx0, idx1)
-            #println("%s->Suc." % path)
+            # println("%s->Suc." % path)
        except:
            println("%s->%s" % (path, traceback.format_exc()))

    def pipeline_mp(self, infos, thread_n):
-        for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n):
+        for path, idx0 in tqdm.tqdm(
+            infos, position=thread_n, leave=True, desc="thread:%s" % thread_n
+        ):
            self.pipeline(path, idx0)

    def pipeline_mp_inp_dir(self, inp_root, n_p):
--- a/vc_infer_pipeline.py
+++ b/vc_infer_pipeline.py
@@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)

 input_audio_path2wav = {}

+
@lru_cache
 def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
    audio = input_audio_path2wav[input_audio_path]
@@ -74,7 +75,9 @@ class VC(object):
    def get_optimal_torch_device(self, index: int = 0) -> torch.device:
        # Get cuda device
        if torch.cuda.is_available():
-            return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast
+            return torch.device(
+                f"cuda:{index % torch.cuda.device_count()}"
+            )  # Very fast
        elif torch.backends.mps.is_available():
            return torch.device("mps")
        # Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library
@@ -91,7 +94,9 @@ class VC(object):
        hop_length=160,  # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time.
        model="full",  # Either use crepe-tiny "tiny" or crepe "full". Default is full
    ):
-        x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float.
+        x = x.astype(
+            np.float32
+        )  # fixes the F.conv2D exception. We needed to convert double to float.
        x /= np.quantile(np.abs(x), 0.999)
        torch_device = self.get_optimal_torch_device()
        audio = torch.from_numpy(x).to(torch_device, copy=True)
@@ -109,7 +114,7 @@ class VC(object):
            model,
            batch_size=hop_length * 2,
            device=torch_device,
-            pad=True
+            pad=True,
        )
        p_len = p_len or x.shape[0] // hop_length
        # Resize the pitch for final f0
@@ -118,7 +123,7 @@ class VC(object):
        target = np.interp(
            np.arange(0, len(source) * p_len, len(source)) / p_len,
            np.arange(0, len(source)),
-            source
+            source,
        )
        f0 = np.nan_to_num(target)
        return f0  # Resized f0
@@ -153,7 +158,7 @@ class VC(object):

    # Fork Feature: Compute pYIN f0 method
    def get_f0_pyin_computation(self, x, f0_min, f0_max):
-        y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True)
+        y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
        f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
        f0 = f0[1:]  # Get rid of extra first frame
        return f0
@@ -173,9 +178,9 @@ class VC(object):
    ):
        # Get various f0 methods from input to use in the computation stack
        s = methods_str
-        s = s.split('hybrid')[1]
-        s = s.replace('[', '').replace(']', '')
-        methods = s.split('+')
+        s = s.split("hybrid")[1]
+        s = s.replace("[", "").replace("]", "")
+        methods = s.split("+")
        f0_computation_stack = []

        print("Calculating f0 pitch estimations for methods: %s" % str(methods))
@@ -207,9 +212,13 @@ class VC(object):
                f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
                f0 = f0[1:]  # Get rid of extra first frame
            elif method == "mangio-crepe":
-                f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+                f0 = self.get_f0_crepe_computation(
+                    x, f0_min, f0_max, p_len, crepe_hop_length
+                )
            elif method == "mangio-crepe-tiny":
-                f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+                f0 = self.get_f0_crepe_computation(
+                    x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
+                )
            elif method == "harvest":
                f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
                if filter_radius > 2:
@@ -221,12 +230,12 @@ class VC(object):
                    fs=self.sr,
                    f0_ceil=f0_max,
                    f0_floor=f0_min,
-                    frame_period=10
+                    frame_period=10,
                )
                f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
                f0 = signal.medfilt(f0, 3)
                f0 = f0[1:]
-            #elif method == "pyin": Not Working just yet
+            # elif method == "pyin": Not Working just yet
            #    f0 = self.get_f0_pyin_computation(x, f0_min, f0_max)
            # Push method to the stack
            f0_computation_stack.append(f0)
@@ -286,7 +295,7 @@ class VC(object):
                fs=self.sr,
                f0_ceil=f0_max,
                f0_floor=f0_min,
-                frame_period=10
+                frame_period=10,
            )
            f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
            f0 = signal.medfilt(f0, 3)
@@ -295,12 +304,17 @@ class VC(object):
        elif f0_method == "crepe-tiny":
            f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny")
        elif f0_method == "mangio-crepe":
-            f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
+            f0 = self.get_f0_crepe_computation(
+                x, f0_min, f0_max, p_len, crepe_hop_length
+            )
        elif f0_method == "mangio-crepe-tiny":
-            f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny")
+            f0 = self.get_f0_crepe_computation(
+                x, f0_min, f0_max, p_len, crepe_hop_length, "tiny"
+            )
        elif f0_method == "rmvpe":
            if hasattr(self, "model_rmvpe") == False:
                from rmvpe import RMVPE
+
                print("loading rmvpe model")
                self.model_rmvpe = RMVPE(
                    "rmvpe.pt", is_half=self.is_half, device=self.device
@@ -319,7 +333,7 @@ class VC(object):
                p_len,
                filter_radius,
                crepe_hop_length,
-                time_step
+                time_step,
            )

        f0 *= pow(2, f0_up_key / 12)