diff --git a/README.md b/README.md index 95b1410..b64d43d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Features: +# 7/22 Changelog: - Experimental Formant Shift using StftPitchShift(tried using praat with praatio but to no avail) - Added `Stop Training` button when training, no need to restart RVC every time you want to stop the training of a model! - Auto-detect Index path for models selected + Auto-detect paths, no more default values like this: `E:\codes\py39\vits_vc_gpu_train\logs\mi-test-1key\total_fea.npy`, We're getting Root Dir and subfolders using @@ -12,87 +12,6 @@ os.path.abspath(os.getcwd()) - Auto-open TensorBoard localhost URL when `tensor-launch.py` is executed - RMVPE implemented in both inferencing and training (the one in `Training` tab doesn't work properly though, requires some additional work to do) -## Installation: - -1. Simply either extract directly or use git clone - -2. Run `installstft.bat`. It'll automatically: - - Upgrade/Downgrade Gradio if its version isn't 3.34.0; - - Install `rmvpe.pt` if it hasn't been already installed; - - Install `StftPitchShift` if it hasn't been already installed; - - - -3. Done! You're good to go and use the RVC-WebUI Tweaked by me for you to use :) - -## Change Gradio Theme: - -- [OPTIONAL] Change Gradio's theme: - 1. Open `infer-web.py` in any code/text editing software (e.g. `notepad++`, `notepad`, `vscode`, etc) - - 2a. Press Ctrl+F and search for `with gr.Blocks(`, select the one that's not fully commented - - 2b. Go to line `1842`, you'll see the `with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:` - - 3. Go to [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery): - - 3.1 Select any theme you like (e.g. [this one](https://huggingface.co/spaces/freddyaboulton/dracula_revamped)) - - 3.2 Look at the top of the page - - ![image](https://github.com/alexlnkp/Mangio-RVC-Tweaks/assets/79400603/59e3e6a9-bdda-4ede-8161-00ee957c1715) - - 3.3 Copy theme variable(in this case, it's `theme='freddyaboulton/dracula_revamped'`) - - 4. Replace `theme='HaleyCH/HaleyCH_Theme'` in `infer-web.py` with any value of a theme from [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery) - -### Current Todo-list: - -- [x] Fix `Unload voice to save GPU memory` button Traceback -- [ ] Add Accordions so people with Firefox browser get a much more compact GUI rather than [This](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/assets/79400603/67e0cc08-82a2-4dc3-86cf-e23d1dcad9f8). -- [ ] Fix weird way Median Filtering value inputted in a slider is utilized -- [ ] Replace regular refresh buttons with these tiny ones from [AUTOMATIC'S1111 Stable DIffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui) -![image](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/assets/79400603/fe37051e-4c95-4d30-9254-87d44436bb9e) -- [ ] Add a way to change the Gradio's theme from WebUI itself, like in [AUTOMATIC'S1111 Stable DIffusion](https://github.com/AUTOMATIC1111/stable-diffusion-webui) -![image](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/assets/79400603/7b93e167-564a-41d3-9a76-ded20063cdc3) -- [ ] Implement Praat in the GUI for f0 curve file manipulation and easier usage - - -# Screenshots: - -- ## Inference Tab: - -![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/107aa15a-4e8d-4f77-a327-45f35a235fcf) - -- ## UVR Tab: - -![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/7e57242a-4950-40c8-bf2a-8f77e992af26) - -- ## Training Tab: - -![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/a19ce156-5532-4761-aa06-8a537f80c368) - -- ## Ckpt-Processing Tab: - -![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/0cdc285e-a184-48f3-92a7-65f6120caf2f) - -The rest of the tabs are left untouched code-wise. - - - -# Formant Shift: - -![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/300ebce2-36c7-4761-b1dd-b31403ad2cd1) - -- ### Click `Apply` button every time you change the values for inferencing. - -- ### As the name implies, you can only use `wav` files so far, also it is very slow, so be patient. - -- ### If you added a new `preset.txt` in the `\formantshiftcfg\` folder, click button with refresh emoji - -- ### If the preset you selected somehow got edited, by pressing refresh emoji button you'll update values, by grabbing them from the file - -

Mangio-RVC-Fork with v2 Support! 💻

@@ -343,6 +262,41 @@ make tensorboard ``` Then click the tensorboard link it provides and refresh the data. +## Change Gradio Theme: + +- [OPTIONAL] Change Gradio's theme: + 1. Open `infer-web.py` in any code/text editing software (e.g. `notepad++`, `notepad`, `vscode`, etc) + + 2a. Press Ctrl+F and search for `with gr.Blocks(`, select the one that's not fully commented + + 2b. Go to line `1842`, you'll see the `with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app:` + + 3. Go to [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery): + + 3.1 Select any theme you like (e.g. [this one](https://huggingface.co/spaces/freddyaboulton/dracula_revamped)) + + 3.2 Look at the top of the page + + ![image](https://github.com/alexlnkp/Mangio-RVC-Tweaks/assets/79400603/59e3e6a9-bdda-4ede-8161-00ee957c1715) + + 3.3 Copy theme variable(in this case, it's `theme='freddyaboulton/dracula_revamped'`) + + 4. Replace `theme='HaleyCH/HaleyCH_Theme'` in `infer-web.py` with any value of a theme from [Gradio Theme Gallery](https://huggingface.co/spaces/gradio/theme-gallery) + + +# Formant Shift Explanation + +![image](https://github.com/Mangio621/Mangio-RVC-Fork/assets/79400603/300ebce2-36c7-4761-b1dd-b31403ad2cd1) + +- ### Click `Apply` button every time you change the values for inferencing. + +- ### As the name implies, you can only use `wav` files so far, also it is very slow, so be patient. + +- ### If you added a new `preset.txt` in the `\formantshiftcfg\` folder, click button with refresh emoji + +- ### If the preset you selected somehow got edited, by pressing refresh emoji button you'll update values, by grabbing them from the file + + # Other If you are using Windows, you can download and extract `RVC-beta.7z` to use RVC directly and use `go-web.bat` to start Webui. diff --git a/audios/.gitignore b/audios/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/config.py b/config.py index 2f64e13..5b72235 100644 --- a/config.py +++ b/config.py @@ -1,19 +1,73 @@ import argparse import sys import torch +import json from multiprocessing import cpu_count +global usefp16 +usefp16 = False + def use_fp32_config(): - for config_file in ["32k.json", "40k.json", "48k.json"]: - with open(f"configs/{config_file}", "r") as f: - strr = f.read().replace("true", "false") - with open(f"configs/{config_file}", "w") as f: - f.write(strr) - with open("trainset_preprocess_pipeline_print.py", "r") as f: - strr = f.read().replace("3.7", "3.0") - with open("trainset_preprocess_pipeline_print.py", "w") as f: - f.write(strr) + usefp16 = False + device_capability = 0 + if torch.cuda.is_available(): + device = torch.device("cuda:0") # Assuming you have only one GPU (index 0). + device_capability = torch.cuda.get_device_capability(device)[0] + if device_capability >= 7: + usefp16 = True + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as d: + data = json.load(d) + + if "train" in data and "fp16_run" in data["train"]: + data["train"]["fp16_run"] = True + + with open(f"configs/{config_file}", "w") as d: + json.dump(data, d, indent=4) + + print(f"Set fp16_run to true in {config_file}") + + with open( + "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8" + ) as f: + strr = f.read() + + strr = strr.replace("3.0", "3.7") + + with open( + "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8" + ) as f: + f.write(strr) + else: + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as f: + data = json.load(f) + + if "train" in data and "fp16_run" in data["train"]: + data["train"]["fp16_run"] = False + + with open(f"configs/{config_file}", "w") as d: + json.dump(data, d, indent=4) + + print(f"Set fp16_run to false in {config_file}") + + with open( + "trainset_preprocess_pipeline_print.py", "r", encoding="utf-8" + ) as f: + strr = f.read() + + strr = strr.replace("3.7", "3.0") + + with open( + "trainset_preprocess_pipeline_print.py", "w", encoding="utf-8" + ) as f: + f.write(strr) + else: + print( + "CUDA is not available. Make sure you have an NVIDIA GPU and CUDA installed." + ) + return (usefp16, device_capability) class Config: @@ -32,7 +86,7 @@ class Config: self.paperspace, self.is_cli, ) = self.arg_parse() - + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() @staticmethod @@ -50,11 +104,15 @@ class Config: action="store_true", help="Do not open in browser automatically", ) - parser.add_argument( # Fork Feature. Paperspace integration for web UI - "--paperspace", action="store_true", help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems." + parser.add_argument( # Fork Feature. Paperspace integration for web UI + "--paperspace", + action="store_true", + help="Note that this argument just shares a gradio link for the web UI. Thus can be used on other non-local CLI systems.", ) - parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py - "--is_cli", action="store_true", help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!" + parser.add_argument( # Fork Feature. Embed a CLI into the infer-web.py + "--is_cli", + action="store_true", + help="Use the CLI instead of setting up a gradio UI. This flag will launch an RVC text interface where you can execute functions from infer-web.py!", ) cmd_opts = parser.parse_args() @@ -95,9 +153,9 @@ class Config: ): print("Found GPU", self.gpu_name, ", force to fp32") self.is_half = False - use_fp32_config() else: print("Found GPU", self.gpu_name) + use_fp32_config() self.gpu_mem = int( torch.cuda.get_device_properties(i_device).total_memory / 1024 diff --git a/extract_f0_print.py b/extract_f0_print.py index 3290125..4be7f5e 100644 --- a/extract_f0_print.py +++ b/extract_f0_print.py @@ -5,10 +5,10 @@ sys.path.append(now_dir) from my_utils import load_audio import pyworld import numpy as np, logging -import torchcrepe # Fork Feature. Crepe algo for training and preprocess +import torchcrepe # Fork Feature. Crepe algo for training and preprocess import torch -from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe. -import scipy.signal as signal # Fork Feature hybrid inference +from torch import Tensor # Fork Feature. Used for pitch prediction for torch crepe. +import scipy.signal as signal # Fork Feature hybrid inference import tqdm logging.getLogger("numba").setLevel(logging.WARNING) @@ -19,9 +19,9 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+") DoFormant = False -with open('formanting.txt', 'r') as fvf: - content = fvf.readlines() - Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0] +with open("formanting.txt", "r") as fvf: + content = fvf.readlines() + Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0] def printt(strr): @@ -32,7 +32,7 @@ def printt(strr): n_p = int(sys.argv[2]) f0method = sys.argv[3] -extraction_crepe_hop_length = 0 +extraction_crepe_hop_length = 0 try: extraction_crepe_hop_length = int(sys.argv[4]) except: @@ -53,11 +53,11 @@ class FeatureInput(object): self.f0_min = 50.0 self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) - + # EXPERIMENTAL. PROBABLY BUGGY def get_f0_hybrid_computation( - self, - methods_str, + self, + methods_str, x, f0_min, f0_max, @@ -67,9 +67,9 @@ class FeatureInput(object): ): # Get various f0 methods from input to use in the computation stack s = methods_str - s = s.split('hybrid')[1] - s = s.replace('[', '').replace(']', '') - methods = s.split('+') + s = s.split("hybrid")[1] + s = s.replace("[", "").replace("]", "") + methods = s.split("+") f0_computation_stack = [] print("Calculating f0 pitch estimations for methods: %s" % str(methods)) @@ -99,7 +99,9 @@ class FeatureInput(object): torch_device_index = 0 torch_device = None if torch.cuda.is_available(): - torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}") + torch_device = torch.device( + f"cuda:{torch_device_index % torch.cuda.device_count()}" + ) elif torch.backends.mps.is_available(): torch_device = torch.device("mps") else: @@ -123,7 +125,7 @@ class FeatureInput(object): f0 = torchcrepe.filter.mean(f0, 3) f0[pd < 0.1] = 0 f0 = f0[0].cpu().numpy() - f0 = f0[1:] # Get rid of extra first frame + f0 = f0[1:] # Get rid of extra first frame elif method == "mangio-crepe": # print("Performing crepe pitch extraction. (EXPERIMENTAL)") # print("CREPE PITCH EXTRACTION HOP LENGTH: " + str(crepe_hop_length)) @@ -132,7 +134,9 @@ class FeatureInput(object): torch_device_index = 0 torch_device = None if torch.cuda.is_available(): - torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}") + torch_device = torch.device( + f"cuda:{torch_device_index % torch.cuda.device_count()}" + ) elif torch.backends.mps.is_available(): torch_device = torch.device("mps") else: @@ -156,7 +160,7 @@ class FeatureInput(object): "full", batch_size=crepe_hop_length * 2, device=torch_device, - pad=True + pad=True, ) p_len = p_len or x.shape[0] // crepe_hop_length # Resize the pitch @@ -165,7 +169,7 @@ class FeatureInput(object): target = np.interp( np.arange(0, len(source) * p_len, len(source)) / p_len, np.arange(0, len(source)), - source + source, ) f0 = np.nan_to_num(target) elif method == "harvest": @@ -191,12 +195,12 @@ class FeatureInput(object): f0 = signal.medfilt(f0, 3) f0 = f0[1:] f0_computation_stack.append(f0) - + for fc in f0_computation_stack: print(len(fc)) # print("Calculating hybrid median f0 from the stack of: %s" % str(methods)) - + f0_median_hybrid = None if len(f0_computation_stack) == 1: f0_median_hybrid = f0_computation_stack[0] @@ -236,10 +240,9 @@ class FeatureInput(object): elif f0_method == "rmvpe": if hasattr(self, "model_rmvpe") == False: from rmvpe import RMVPE + print("loading rmvpe model") - self.model_rmvpe = RMVPE( - "rmvpe.pt", is_half=False, device="cuda:0" - ) + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cuda:0") f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) elif f0_method == "dio": f0, t = pyworld.dio( @@ -250,12 +253,16 @@ class FeatureInput(object): frame_period=1000 * self.hop / self.fs, ) f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) - elif f0_method == "crepe": # Fork Feature: Added crepe f0 for f0 feature extraction + elif ( + f0_method == "crepe" + ): # Fork Feature: Added crepe f0 for f0 feature extraction # Pick a batch size that doesn't cause memory errors on your gpu torch_device_index = 0 torch_device = None if torch.cuda.is_available(): - torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}") + torch_device = torch.device( + f"cuda:{torch_device_index % torch.cuda.device_count()}" + ) elif torch.backends.mps.is_available(): torch_device = torch.device("mps") else: @@ -287,7 +294,9 @@ class FeatureInput(object): torch_device_index = 0 torch_device = None if torch.cuda.is_available(): - torch_device = torch.device(f"cuda:{torch_device_index % torch.cuda.device_count()}") + torch_device = torch.device( + f"cuda:{torch_device_index % torch.cuda.device_count()}" + ) elif torch.backends.mps.is_available(): torch_device = torch.device("mps") else: @@ -311,7 +320,7 @@ class FeatureInput(object): "full", batch_size=crepe_hop_length * 2, device=torch_device, - pad=True + pad=True, ) p_len = p_len or x.shape[0] // crepe_hop_length # Resize the pitch @@ -320,20 +329,20 @@ class FeatureInput(object): target = np.interp( np.arange(0, len(source) * p_len, len(source)) / p_len, np.arange(0, len(source)), - source + source, ) f0 = np.nan_to_num(target) - elif "hybrid" in f0_method: # EXPERIMENTAL + elif "hybrid" in f0_method: # EXPERIMENTAL # Perform hybrid median pitch estimation time_step = 160 / 16000 * 1000 f0 = self.get_f0_hybrid_computation( - f0_method, + f0_method, x, self.f0_min, self.f0_max, p_len, crepe_hop_length, - time_step + time_step, ) # Mangio-RVC-Fork Feature: Add hybrid f0 inference to feature extraction. EXPERIMENTAL... @@ -362,14 +371,19 @@ class FeatureInput(object): with tqdm.tqdm(total=len(paths), leave=True, position=thread_n) as pbar: for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): try: - pbar.set_description("thread:%s, f0ing, Hop-Length:%s" % (thread_n, crepe_hop_length)) + pbar.set_description( + "thread:%s, f0ing, Hop-Length:%s" + % (thread_n, crepe_hop_length) + ) pbar.update(1) if ( os.path.exists(opt_path1 + ".npy") == True and os.path.exists(opt_path2 + ".npy") == True ): continue - featur_pit = self.compute_f0(inp_path, f0_method, crepe_hop_length) + featur_pit = self.compute_f0( + inp_path, f0_method, crepe_hop_length + ) np.save( opt_path2, featur_pit, @@ -382,7 +396,9 @@ class FeatureInput(object): allow_pickle=False, ) # ori except: - printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + printt( + "f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc()) + ) if __name__ == "__main__": @@ -411,12 +427,7 @@ if __name__ == "__main__": for i in range(n_p): p = Process( target=featureInput.go, - args=( - paths[i::n_p], - f0method, - extraction_crepe_hop_length, - i - ), + args=(paths[i::n_p], f0method, extraction_crepe_hop_length, i), ) ps.append(p) p.start() diff --git a/gui_v0.py b/gui_v0.py index 2bd2e75..0c31844 100644 --- a/gui_v0.py +++ b/gui_v0.py @@ -51,8 +51,10 @@ class RVC: self.window = 160 # Get Torch Device - if(torch.cuda.is_available()): - self.torch_device = torch.device(f"cuda:{0 % torch.cuda.device_count()}") + if torch.cuda.is_available(): + self.torch_device = torch.device( + f"cuda:{0 % torch.cuda.device_count()}" + ) elif torch.backends.mps.is_available(): self.torch_device = torch.device("mps") else: @@ -141,7 +143,7 @@ class RVC: def get_f0(self, x, f0_up_key, inp_f0=None): # Calculate Padding and f0 details here - p_len = x.shape[0] // 512 # For Now This probs doesn't work + p_len = x.shape[0] // 512 # For Now This probs doesn't work x_pad = 1 f0_min = 50 f0_max = 1100 @@ -150,11 +152,11 @@ class RVC: f0 = 0 # Here, check f0_methods and get their computations - if(self.f0_method == 'harvest'): + if self.f0_method == "harvest": f0 = self.get_harvest_computation(x, f0_min, f0_max) - elif(self.f0_method == 'reg-crepe'): + elif self.f0_method == "reg-crepe": f0 = self.get_regular_crepe_computation(x, f0_min, f0_max) - elif(self.f0_method == 'reg-crepe-tiny'): + elif self.f0_method == "reg-crepe-tiny": f0 = self.get_regular_crepe_computation(x, f0_min, f0_max, "tiny") # Calculate f0_course and f0_bak here @@ -300,7 +302,7 @@ class GUI: with open("values1.json", "r") as j: data = json.load(j) except: - # Injecting f0_method into the json data + # Injecting f0_method into the json data with open("values1.json", "w") as j: data = { "pth_path": "", @@ -328,11 +330,7 @@ class GUI: [ sg.Frame( title="Proudly forked by Mangio621", - layout=[ - [ - sg.Image('./mangio_utils/lol.png') - ] - ] + layout=[[sg.Image("./mangio_utils/lol.png")]], ), sg.Frame( title=i18n("加载模型"), @@ -384,14 +382,16 @@ class GUI: ), ], ], - ) + ), ], [ # Mangio f0 Selection frame Here sg.Frame( layout=[ [ - sg.Radio("Harvest", "f0_method", key="harvest", default=True), + sg.Radio( + "Harvest", "f0_method", key="harvest", default=True + ), sg.Radio("Crepe", "f0_method", key="reg-crepe"), sg.Radio("Crepe Tiny", "f0_method", key="reg-crepe-tiny"), ] @@ -536,20 +536,21 @@ class GUI: if event == "stop_vc" and self.flag_vc == True: self.flag_vc = False - # Function that returns the used f0 method in string format "harvest" + # Function that returns the used f0 method in string format "harvest" def get_f0_method_from_radios(self, values): f0_array = [ - {"name": "harvest", "val": values['harvest']}, - {"name": "reg-crepe", "val": values['reg-crepe']}, - {"name": "reg-crepe-tiny", "val": values['reg-crepe-tiny']}, + {"name": "harvest", "val": values["harvest"]}, + {"name": "reg-crepe", "val": values["reg-crepe"]}, + {"name": "reg-crepe-tiny", "val": values["reg-crepe-tiny"]}, ] # Filter through to find a true value used_f0 = "" for f0 in f0_array: - if(f0['val'] == True): - used_f0 = f0['name'] + if f0["val"] == True: + used_f0 = f0["name"] break - if(used_f0 == ""): used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow + if used_f0 == "": + used_f0 = "harvest" # Default Harvest if used_f0 is empty somehow return used_f0 def set_values(self, values): diff --git a/infer-web.py b/infer-web.py index 68a2af3..ff88efa 100644 --- a/infer-web.py +++ b/infer-web.py @@ -1,7 +1,7 @@ import os import shutil import sys -import json # Mangio fork using json for preset saving +import json # Mangio fork using json for preset saving import signal @@ -13,6 +13,7 @@ import warnings import numpy as np import torch import re + os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" import logging @@ -60,11 +61,11 @@ DoFormant = False Quefrency = 8.0 Timbre = 1.2 -with open('formanting.txt', 'w+') as fsf: +with open("formanting.txt", "w+") as fsf: fsf.truncate(0) - fsf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) - + fsf.writelines([str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"]) + config = Config() i18n = I18nAuto() @@ -158,7 +159,7 @@ index_root = "./logs/" global audio_root audio_root = "audios" global input_audio_path0 -global input_audio_path1 +global input_audio_path1 names = [] for name in os.listdir(weight_root): if name.endswith(".pth"): @@ -166,77 +167,83 @@ for name in os.listdir(weight_root): index_paths = [] global indexes_list -indexes_list=[] +indexes_list = [] audio_paths = [] for root, dirs, files in os.walk(index_root, topdown=False): for name in files: if name.endswith(".index") and "trained" not in name: index_paths.append("%s\\%s" % (root, name)) - + for root, dirs, files in os.walk(audio_root, topdown=False): for name in files: - audio_paths.append("%s/%s" % (root, name)) - + uvr5_names = [] for name in os.listdir(weight_uvr5_root): if name.endswith(".pth") or "onnx" in name: uvr5_names.append(name.replace(".pth", "")) + def check_for_name(): if len(names) > 0: return sorted(names)[0] else: - return '' + return "" + def get_index(): - if check_for_name() != '': - chosen_model=sorted(names)[0].split(".")[0] - logs_path="./logs/"+chosen_model + if check_for_name() != "": + chosen_model = sorted(names)[0].split(".")[0] + logs_path = "./logs/" + chosen_model if os.path.exists(logs_path): for file in os.listdir(logs_path): if file.endswith(".index"): - return os.path.join(logs_path, file).replace('\\','/') - return '' + return os.path.join(logs_path, file).replace("\\", "/") + return "" else: - return '' + return "" + def get_indexes(): for dirpath, dirnames, filenames in os.walk("./logs/"): for filename in filenames: if filename.endswith(".index") and "trained" not in filename: - indexes_list.append(os.path.join(dirpath,filename).replace('\\','/')) + indexes_list.append(os.path.join(dirpath, filename).replace("\\", "/")) if len(indexes_list) > 0: return indexes_list else: - return '' + return "" + fshift_presets_list = [] + def get_fshift_presets(): fshift_presets_list = [] for dirpath, dirnames, filenames in os.walk("./formantshiftcfg/"): for filename in filenames: if filename.endswith(".txt"): - fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/')) - + fshift_presets_list.append( + os.path.join(dirpath, filename).replace("\\", "/") + ) + if len(fshift_presets_list) > 0: return fshift_presets_list else: - return '' + return "" def get_audios(): - if check_for_name() != '': - audios_path= '"' + os.path.abspath(os.getcwd()) + '/audios/' + if check_for_name() != "": + audios_path = '"' + os.path.abspath(os.getcwd()) + "/audios/" if os.path.exists(audios_path): for file in os.listdir(audios_path): print(audios_path.join(file) + '"') return os.path.join(audios_path, file + '"') - return '' + return "" else: - return '' + return "" def vc_single( @@ -261,12 +268,12 @@ def vc_single( return "You need to upload an audio", None f0_up_key = int(f0_up_key) try: - if input_audio_path0 == '': + if input_audio_path0 == "": audio = load_audio(input_audio_path1, 16000, DoFormant, Quefrency, Timbre) - + else: audio = load_audio(input_audio_path0, 16000, DoFormant, Quefrency, Timbre) - + audio_max = np.abs(audio).max() / 0.95 if audio_max > 1: audio /= audio_max @@ -378,7 +385,7 @@ def vc_multi( resample_sr, rms_mix_rate, protect, - crepe_hop_length + crepe_hop_length, ) if "Success" in info: try: @@ -522,7 +529,11 @@ def get_vc(sid, to_return_protect0, to_return_protect1): if torch.cuda.is_available(): torch.cuda.empty_cache() cpt = None - return ({"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}) + return ( + {"visible": False, "__type__": "update"}, + {"visible": False, "__type__": "update"}, + {"visible": False, "__type__": "update"}, + ) person = "%s/%s" % (weight_root, sid) print("loading %s" % person) cpt = torch.load(person, map_location="cpu") @@ -580,19 +591,23 @@ def change_choices(): names.append(name) index_paths = [] audio_paths = [] - audios_path=os.path.abspath(os.getcwd()) + "/audios/" + audios_path = os.path.abspath(os.getcwd()) + "/audios/" for root, dirs, files in os.walk(index_root, topdown=False): for name in files: if name.endswith(".index") and "trained" not in name: index_paths.append("%s/%s" % (root, name)) for file in os.listdir(audios_path): - audio_paths.append("%s/%s" % (audio_root, file)) - return {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(index_paths), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"} + audio_paths.append("%s/%s" % (audio_root, file)) + return ( + {"choices": sorted(names), "__type__": "update"}, + {"choices": sorted(index_paths), "__type__": "update"}, + {"choices": sorted(audio_paths), "__type__": "update"}, + ) def clean(): - return ({"value": "", "__type__": "update"}) - + return {"value": "", "__type__": "update"} + sr_dict = { "32k": 32000, @@ -624,17 +639,20 @@ def if_done_multi(done, ps): break done[0] = True -def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button): - - if (cbox): +def formant_enabled( + cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button +): + if cbox: DoFormant = True - with open('formanting.txt', 'w') as fxxf: + with open("formanting.txt", "w") as fxxf: fxxf.truncate(0) - fxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) - #print(f"is checked? - {cbox}\ngot {DoFormant}") - + fxxf.writelines( + [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"] + ) + # print(f"is checked? - {cbox}\ngot {DoFormant}") + return ( {"value": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, @@ -643,16 +661,16 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, ) - - + else: - DoFormant = False - with open('formanting.txt', 'w') as fxf: + with open("formanting.txt", "w") as fxf: fxf.truncate(0) - fxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) - #print(f"is checked? - {cbox}\ngot {DoFormant}") + fxf.writelines( + [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"] + ) + # print(f"is checked? - {cbox}\ngot {DoFormant}") return ( {"value": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, @@ -662,28 +680,33 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, ) - + def formant_apply(qfrency, tmbre): Quefrency = qfrency Timbre = tmbre DoFormant = True - - with open('formanting.txt', 'w') as fxxxf: + + with open("formanting.txt", "w") as fxxxf: fxxxf.truncate(0) - fxxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) - return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"}) + fxxxf.writelines( + [str(DoFormant) + "\n", str(Quefrency) + "\n", str(Timbre) + "\n"] + ) + return ( + {"value": Quefrency, "__type__": "update"}, + {"value": Timbre, "__type__": "update"}, + ) + def update_fshift_presets(preset, qfrency, tmbre): - qfrency, tmbre = preset_apply(preset, qfrency, tmbre) - - if (str(preset) != ''): - with open(str(preset), 'r') as p: + + if str(preset) != "": + with open(str(preset), "r") as p: content = p.readlines() - qfrency, tmbre = content[0].split('\n')[0], content[1] - + qfrency, tmbre = content[0].split("\n")[0], content[1] + formant_apply(qfrency, tmbre) else: pass @@ -1161,7 +1184,7 @@ def train1key( if_cache_gpu17, if_save_every_weights18, version19, - echl + echl, ): infos = [] @@ -1202,7 +1225,7 @@ def train1key( model_log_dir, np7, f0method8, - echl + echl, ) yield get_info_str(cmd) p = Popen(cmd, shell=True, cwd=now_dir) @@ -1434,7 +1457,6 @@ def export_onnx(ModelPath, ExportedPath): device = "cpu" # 导出时设备(不影响使用模型) - net_g = SynthesizerTrnMsNSFsidM( *cpt["config"], is_half=False, version=cpt.get("version", "v1") ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) @@ -1470,20 +1492,24 @@ def export_onnx(ModelPath, ExportedPath): return "Finished" -#region Mangio-RVC-Fork CLI App +# region Mangio-RVC-Fork CLI App import re as regex import scipy.io.wavfile as wavfile cli_current_page = "HOME" + def cli_split_command(com): exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)' split_array = regex.findall(exp, com) split_array = [group[0] if group[0] else group[1] for group in split_array] return split_array + def execute_generator_function(genObject): - for _ in genObject: pass + for _ in genObject: + pass + def cli_infer(com): # get VC first @@ -1492,7 +1518,7 @@ def cli_infer(com): source_audio_path = com[1] output_file_name = com[2] feature_index_path = com[3] - f0_file = None # Not Implemented Yet + f0_file = None # Not Implemented Yet # Get parameters for inference speaker_id = int(com[4]) @@ -1505,7 +1531,7 @@ def cli_infer(com): feature_ratio = float(com[11]) protection_amnt = float(com[12]) ##### - + print("Mangio-RVC-Fork Infer-CLI: Starting the inference...") vc_data = get_vc(model_name) print(vc_data) @@ -1524,16 +1550,27 @@ def cli_infer(com): resample, mix, protection_amnt, - crepe_hop_length, + crepe_hop_length, ) if "Success." in conversion_data[0]: - print("Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." % ('audio-outputs', output_file_name)) - wavfile.write('%s/%s' % ('audio-outputs', output_file_name), conversion_data[1][0], conversion_data[1][1]) - print("Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" % ('audio-outputs', output_file_name)) + print( + "Mangio-RVC-Fork Infer-CLI: Inference succeeded. Writing to %s/%s..." + % ("audio-outputs", output_file_name) + ) + wavfile.write( + "%s/%s" % ("audio-outputs", output_file_name), + conversion_data[1][0], + conversion_data[1][1], + ) + print( + "Mangio-RVC-Fork Infer-CLI: Finished! Saved output to %s/%s" + % ("audio-outputs", output_file_name) + ) else: print("Mangio-RVC-Fork Infer-CLI: Inference failed. Here's the traceback: ") print(conversion_data[0]) + def cli_pre_process(com): com = cli_split_command(com) model_name = com[0] @@ -1543,14 +1580,12 @@ def cli_pre_process(com): print("Mangio-RVC-Fork Pre-process: Starting...") generator = preprocess_dataset( - trainset_directory, - model_name, - sample_rate, - num_processes + trainset_directory, model_name, sample_rate, num_processes ) execute_generator_function(generator) print("Mangio-RVC-Fork Pre-process: Finished") + def cli_extract_feature(com): com = cli_split_command(com) model_name = com[0] @@ -1559,23 +1594,24 @@ def cli_extract_feature(com): has_pitch_guidance = True if (int(com[3]) == 1) else False f0_method = com[4] crepe_hop_length = int(com[5]) - version = com[6] # v1 or v2 - + version = com[6] # v1 or v2 + print("Mangio-RVC-CLI: Extract Feature Has Pitch: " + str(has_pitch_guidance)) print("Mangio-RVC-CLI: Extract Feature Version: " + str(version)) print("Mangio-RVC-Fork Feature Extraction: Starting...") generator = extract_f0_feature( - gpus, - num_processes, - f0_method, - has_pitch_guidance, - model_name, - version, - crepe_hop_length + gpus, + num_processes, + f0_method, + has_pitch_guidance, + model_name, + version, + crepe_hop_length, ) execute_generator_function(generator) print("Mangio-RVC-Fork Feature Extraction: Finished") + def cli_train(com): com = cli_split_command(com) model_name = com[0] @@ -1583,7 +1619,7 @@ def cli_train(com): has_pitch_guidance = True if (int(com[2]) == 1) else False speaker_id = int(com[3]) save_epoch_iteration = int(com[4]) - total_epoch = int(com[5]) # 10000 + total_epoch = int(com[5]) # 10000 batch_size = int(com[6]) gpu_card_slot_numbers = com[7] if_save_latest = i18n("是") if (int(com[8]) == 1) else i18n("否") @@ -1591,8 +1627,8 @@ def cli_train(com): if_save_every_weight = i18n("是") if (int(com[10]) == 1) else i18n("否") version = com[11] - pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/" - + pretrained_base = "pretrained/" if version == "v1" else "pretrained_v2/" + g_pretrained_path = "%sf0G%s.pth" % (pretrained_base, sample_rate) d_pretrained_path = "%sf0D%s.pth" % (pretrained_base, sample_rate) @@ -1611,21 +1647,20 @@ def cli_train(com): gpu_card_slot_numbers, if_cache_gpu, if_save_every_weight, - version + version, ) + def cli_train_feature(com): com = cli_split_command(com) model_name = com[0] version = com[1] print("Mangio-RVC-Fork Train Feature Index-CLI: Training... Please wait") - generator = train_index( - model_name, - version - ) + generator = train_index(model_name, version) execute_generator_function(generator) print("Mangio-RVC-Fork Train Feature Index-CLI: Done!") + def cli_extract_model(com): com = cli_split_command(com) model_path = com[0] @@ -1635,55 +1670,74 @@ def cli_extract_model(com): info = com[4] version = com[5] extract_small_model_process = extract_small_model( - model_path, - save_name, - sample_rate, - has_pitch_guidance, - info, - version + model_path, save_name, sample_rate, has_pitch_guidance, info, version ) if extract_small_model_process == "Success.": print("Mangio-RVC-Fork Extract Small Model: Success!") else: - print(str(extract_small_model_process)) + print(str(extract_small_model_process)) print("Mangio-RVC-Fork Extract Small Model: Failed!") def preset_apply(preset, qfer, tmbr): - if str(preset) != '': - with open(str(preset), 'r') as p: + if str(preset) != "": + with open(str(preset), "r") as p: content = p.readlines() - qfer, tmbr = content[0].split('\n')[0], content[1] - + qfer, tmbr = content[0].split("\n")[0], content[1] + formant_apply(qfer, tmbr) else: pass - return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"}) + return ( + {"value": qfer, "__type__": "update"}, + {"value": tmbr, "__type__": "update"}, + ) + def print_page_details(): if cli_current_page == "HOME": print(" go home : Takes you back to home with a navigation list.") print(" go infer : Takes you to inference command execution.\n") - print(" go pre-process : Takes you to training step.1) pre-process command execution.") - print(" go extract-feature : Takes you to training step.2) extract-feature command execution.") - print(" go train : Takes you to training step.3) being or continue training command execution.") - print(" go train-feature : Takes you to the train feature index command execution.\n") - print(" go extract-model : Takes you to the extract small model command execution.") + print( + " go pre-process : Takes you to training step.1) pre-process command execution." + ) + print( + " go extract-feature : Takes you to training step.2) extract-feature command execution." + ) + print( + " go train : Takes you to training step.3) being or continue training command execution." + ) + print( + " go train-feature : Takes you to the train feature index command execution.\n" + ) + print( + " go extract-model : Takes you to the extract small model command execution." + ) elif cli_current_page == "INFER": print(" arg 1) model name with .pth in ./weights: mi-test.pth") print(" arg 2) source audio path: myFolder\\MySource.wav") - print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav") - print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index") + print( + " arg 3) output file name to be placed in './audio-outputs': MyTest.wav" + ) + print( + " arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index" + ) print(" arg 5) speaker id: 0") print(" arg 6) transposition: 0") - print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)") + print( + " arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)" + ) print(" arg 8) crepe hop length: 160") print(" arg 9) harvest median filter radius: 3 (0-7)") print(" arg 10) post resample rate: 0") print(" arg 11) mix volume envelope: 1") print(" arg 12) feature index ratio: 0.78 (0-1)") - print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n") - print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33") + print( + " arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n" + ) + print( + "Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33" + ) elif cli_current_page == "PRE-PROCESS": print(" arg 1) Model folder name in ./logs: mi-test") print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set") @@ -1709,8 +1763,12 @@ def print_page_details(): print(" arg 7) Batch size: 8") print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)") - print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)") - print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)") + print( + " arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)" + ) + print( + " arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)" + ) print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n") print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2") elif cli_current_page == "TRAIN-FEATURE": @@ -1724,14 +1782,18 @@ def print_page_details(): print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") print(' arg 5) Model information: "My Model"') print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n") - print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2') + print( + 'Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2' + ) print("") + def change_page(page): global cli_current_page cli_current_page = page return 0 + def execute_command(com): if com == "go home": return change_page("HOME") @@ -1751,7 +1813,7 @@ def execute_command(com): if com[:3] == "go ": print("page '%s' does not exist!" % com[3:]) return 0 - + if cli_current_page == "INFER": cli_infer(com) elif cli_current_page == "PRE-PROCESS": @@ -1765,6 +1827,7 @@ def execute_command(com): elif cli_current_page == "EXTRACT-MODEL": cli_extract_model(com) + def cli_navigation_loop(): while True: print("You are currently in '%s':" % cli_current_page) @@ -1775,97 +1838,108 @@ def cli_navigation_loop(): except: print(traceback.format_exc()) -if(config.is_cli): + +if config.is_cli: print("\n\nMangio-RVC-Fork v2 CLI App!\n") - print("Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n") + print( + "Welcome to the CLI version of RVC. Please read the documentation on https://github.com/Mangio621/Mangio-RVC-Fork (README.MD) to understand how to use this app.\n" + ) cli_navigation_loop() -#endregion +# endregion + +# region RVC WebUI App -#region RVC WebUI App def get_presets(): data = None - with open('../inference-presets.json', 'r') as file: + with open("../inference-presets.json", "r") as file: data = json.load(file) preset_names = [] - for preset in data['presets']: - preset_names.append(preset['name']) - + for preset in data["presets"]: + preset_names.append(preset["name"]) + return preset_names + def match_index(sid0): picked = False - #folder = sid0.split('.')[0] - - #folder = re.split(r'. |_', sid0)[0] - folder = sid0.split('.')[0].split('_')[0] - #folder_test = sid0.split('.')[0].split('_')[0].split('-')[0] + # folder = sid0.split('.')[0] + + # folder = re.split(r'. |_', sid0)[0] + folder = sid0.split(".")[0].split("_")[0] + # folder_test = sid0.split('.')[0].split('_')[0].split('-')[0] parent_dir = "./logs/" + folder - #print(parent_dir) + # print(parent_dir) if os.path.exists(parent_dir): - #print('path exists') - for filename in os.listdir(parent_dir.replace('\\','/')): + # print('path exists') + for filename in os.listdir(parent_dir.replace("\\", "/")): if filename.endswith(".index"): for i in range(len(indexes_list)): - if indexes_list[i] == (os.path.join(("./logs/" + folder), filename).replace('\\','/')): - print('regular index found') + if indexes_list[i] == ( + os.path.join(("./logs/" + folder), filename).replace("\\", "/") + ): + print("regular index found") break else: - if indexes_list[i] == (os.path.join(("./logs/" + folder.lower()), filename).replace('\\','/')): - print('lowered index found') + if indexes_list[i] == ( + os.path.join( + ("./logs/" + folder.lower()), filename + ).replace("\\", "/") + ): + print("lowered index found") parent_dir = "./logs/" + folder.lower() break - #elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()): + # elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()): # print('8') # parent_dir = "./logs/" + folder.casefold() # break - #elif (indexes_list[i]) == ((os.path.join(("./logs/" + folder_test), filename).replace('\\','/'))): + # elif (indexes_list[i]) == ((os.path.join(("./logs/" + folder_test), filename).replace('\\','/'))): # parent_dir = "./logs/" + folder_test # print(parent_dir) # break - #elif (indexes_list[i]) == (os.path.join(("./logs/" + folder_test.lower()), filename).replace('\\','/')): + # elif (indexes_list[i]) == (os.path.join(("./logs/" + folder_test.lower()), filename).replace('\\','/')): # parent_dir = "./logs/" + folder_test # print(parent_dir) # break - #else: + # else: # #print('couldnt find index') # continue - - #print('all done') - index_path=os.path.join(parent_dir.replace('\\','/'), filename.replace('\\','/')).replace('\\','/') - #print(index_path) + + # print('all done') + index_path = os.path.join( + parent_dir.replace("\\", "/"), filename.replace("\\", "/") + ).replace("\\", "/") + # print(index_path) return (index_path, index_path) - else: - #print('nothing found') - return ('', '') + # print('nothing found') + return ("", "") + def choveraudio(): - return '' + return "" -def stoptraining(mim): +def stoptraining(mim): if int(mim) == 1: - with open("stop.txt", "w+") as tostops: - - - tostops.writelines('stop') - #p.terminate() - #p.kill() + tostops.writelines("stop") + # p.terminate() + # p.kill() os.kill(PID, signal.SIGTERM) else: pass - + return ( - {"visible": False, "__type__": "update"}, + {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}, ) - -#Default-GUI -with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: + + +# Default-GUI +with gr.Blocks(theme="HaleyCH/HaleyCH_Theme") as app: gr.HTML("

The Mangio-RVC-Fork 💻

") gr.Markdown( value=i18n( @@ -1873,7 +1947,6 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: ) ) with gr.Tabs(): - with gr.TabItem(i18n("模型推理")): # Inference Preset Row # with gr.Row(): @@ -1885,13 +1958,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: # Other RVC stuff with gr.Row(): - - #sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=check_for_name()) - sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value='') - #input_audio_path2 - - - refresh_button = gr.Button(i18n("Refresh voice list, index path and audio files"), variant="primary") + # sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value=check_for_name()) + sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names), value="") + # input_audio_path2 + + refresh_button = gr.Button( + i18n("Refresh voice list, index path and audio files"), + variant="primary", + ) clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") spk_item = gr.Slider( minimum=0, @@ -1914,21 +1988,38 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 ) input_audio0 = gr.Textbox( - label=i18n("Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:"), - value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/" + "audio.wav", + label=i18n( + "Add audio's name to the path to the audio file to be processed (default is the correct format example) Remove the path to use an audio from the dropdown list:" + ), + value=os.path.abspath(os.getcwd()).replace("\\", "/") + + "/audios/" + + "audio.wav", ) input_audio1 = gr.Dropdown( - label=i18n("Auto detect audio path and select from the dropdown:"), + label=i18n( + "Auto detect audio path and select from the dropdown:" + ), choices=sorted(audio_paths), value=get_audios(), interactive=True, ) - input_audio1.change(fn=choveraudio,inputs=[],outputs=[input_audio0]) + input_audio1.change( + fn=choveraudio, inputs=[], outputs=[input_audio0] + ) f0method0 = gr.Radio( label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" ), - choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"], # Fork Feature. Add Crepe-Tiny + choices=[ + "pm", + "harvest", + "dio", + "crepe", + "crepe-tiny", + "mangio-crepe", + "mangio-crepe-tiny", + "rmvpe", + ], # Fork Feature. Add Crepe-Tiny value="rmvpe", interactive=True, ) @@ -1938,7 +2029,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: step=1, label=i18n("crepe_hop_length"), value=120, - interactive=True + interactive=True, ) filter_radius0 = gr.Slider( minimum=0, @@ -1954,22 +2045,21 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: value="", interactive=True, ) - + file_index2 = gr.Dropdown( label="3. Path to your added.index file (if it didn't automatically find it.)", choices=get_indexes(), value=get_index(), interactive=True, allow_custom_value=True, - ) - #sid0.select(fn=match_index, inputs=sid0, outputs=file_index2) - - + ) + # sid0.select(fn=match_index, inputs=sid0, outputs=file_index2) - refresh_button.click( - fn=change_choices, inputs=[], outputs=[sid0, file_index2, input_audio1] - ) + fn=change_choices, + inputs=[], + outputs=[sid0, file_index2, input_audio1], + ) # file_big_npy1 = gr.Textbox( # label=i18n("特征文件路径"), # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", @@ -2015,26 +2105,28 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: interactive=True, visible=True, ) - + formant_preset = gr.Dropdown( - value='', + value="", choices=get_fshift_presets(), label="browse presets for formanting", visible=False, ) - formant_refresh_button = gr.Button(value='\U0001f504', visible=False,variant='primary') - #formant_refresh_button = ToolButton( elem_id='1') - #create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets") - + formant_refresh_button = gr.Button( + value="\U0001f504", visible=False, variant="primary" + ) + # formant_refresh_button = ToolButton( elem_id='1') + # create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets") + qfrency = gr.Slider( - value=Quefrency, - label="Quefrency for formant shifting", - minimum=-16.0, - maximum=16.0, - step=0.1, - visible=False, - interactive=True, - ) + value=Quefrency, + label="Quefrency for formant shifting", + minimum=-16.0, + maximum=16.0, + step=0.1, + visible=False, + interactive=True, + ) tmbre = gr.Slider( value=Timbre, label="Timbre for formant shifting", @@ -2044,12 +2136,42 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: visible=False, interactive=True, ) - - formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre]) + + formant_preset.change( + fn=preset_apply, + inputs=[formant_preset, qfrency, tmbre], + outputs=[qfrency, tmbre], + ) frmntbut = gr.Button("Apply", variant="primary", visible=False) - formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button]) - frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre]) - formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre]) + formanting.change( + fn=formant_enabled, + inputs=[ + formanting, + qfrency, + tmbre, + frmntbut, + formant_preset, + formant_refresh_button, + ], + outputs=[ + formanting, + qfrency, + tmbre, + frmntbut, + formant_preset, + formant_refresh_button, + ], + ) + frmntbut.click( + fn=formant_apply, + inputs=[qfrency, tmbre], + outputs=[qfrency, tmbre], + ) + formant_refresh_button.click( + fn=update_fshift_presets, + inputs=[formant_preset, qfrency, tmbre], + outputs=[formant_preset, qfrency, tmbre], + ) ##formant_refresh_button.click(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre]) ##formant_refresh_button.click(fn=update_fshift_presets, inputs=[formant_preset, qfrency, tmbre], outputs=[formant_preset, qfrency, tmbre]) f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")) @@ -2074,7 +2196,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: resample_sr0, rms_mix_rate0, protect0, - crepe_hop_length + crepe_hop_length, ], [vc_output1, vc_output2], ) @@ -2110,13 +2232,17 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: value="", interactive=True, ) - file_index4 = gr.Dropdown( #file index dropdown for batch + file_index4 = gr.Dropdown( # file index dropdown for batch label=i18n("自动检测index路径,下拉式选择(dropdown)"), choices=get_indexes(), value=get_index(), interactive=True, ) - sid0.select(fn=match_index, inputs=[sid0], outputs=[file_index2, file_index4]) + sid0.select( + fn=match_index, + inputs=[sid0], + outputs=[file_index2, file_index4], + ) refresh_button.click( fn=lambda: change_choices()[1], inputs=[], @@ -2163,7 +2289,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: with gr.Column(): dir_input = gr.Textbox( label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), - value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/audios/", + value=os.path.abspath(os.getcwd()).replace("\\", "/") + + "/audios/", ) inputs = gr.File( file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") @@ -2226,11 +2353,11 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: with gr.Column(): dir_wav_input = gr.Textbox( label=i18n("输入待处理音频文件夹路径"), - value=((os.getcwd()).replace('\\', '/') + "/audios/") + value=((os.getcwd()).replace("\\", "/") + "/audios/"), ) wav_inputs = gr.File( file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") - ) ##### + ) ##### with gr.Column(): model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names) agg = gr.Slider( @@ -2312,7 +2439,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: ) with gr.Row(): trainset_dir4 = gr.Textbox( - label=i18n("输入训练文件夹路径"), value=os.path.abspath(os.getcwd()) + "\\datasets\\" + label=i18n("输入训练文件夹路径"), + value=os.path.abspath(os.getcwd()) + "\\datasets\\", ) spk_id5 = gr.Slider( minimum=0, @@ -2342,7 +2470,14 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: label=i18n( "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢" ), - choices=["pm", "harvest", "dio", "crepe", "mangio-crepe", "rmvpe"], # Fork feature: Crepe on f0 extraction for training. + choices=[ + "pm", + "harvest", + "dio", + "crepe", + "mangio-crepe", + "rmvpe", + ], # Fork feature: Crepe on f0 extraction for training. value="rmvpe", interactive=True, ) @@ -2352,13 +2487,21 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: step=1, label=i18n("crepe_hop_length"), value=64, - interactive=True + interactive=True, ) but2 = gr.Button(i18n("特征提取"), variant="primary") info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) but2.click( extract_f0_feature, - [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length], + [ + gpus6, + np7, + f0method8, + if_f0_3, + exp_dir1, + version19, + extraction_crepe_hop_length, + ], [info2], ) with gr.Group(): @@ -2442,19 +2585,26 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: interactive=True, ) butstop = gr.Button( - "Stop Training", - variant='primary', - visible=False, + "Stop Training", + variant="primary", + visible=False, ) but3 = gr.Button(i18n("训练模型"), variant="primary", visible=True) - but3.click(fn=stoptraining, inputs=[gr.Number(value=0, visible=False)], outputs=[but3, butstop]) - butstop.click(fn=stoptraining, inputs=[gr.Number(value=1, visible=False)], outputs=[butstop, but3]) - - + but3.click( + fn=stoptraining, + inputs=[gr.Number(value=0, visible=False)], + outputs=[but3, butstop], + ) + butstop.click( + fn=stoptraining, + inputs=[gr.Number(value=1, visible=False)], + outputs=[butstop, but3], + ) + but4 = gr.Button(i18n("训练特征索引"), variant="primary") - #but5 = gr.Button(i18n("一键训练"), variant="primary") + # but5 = gr.Button(i18n("一键训练"), variant="primary") info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10) - + but3.click( click_train, [ @@ -2475,12 +2625,10 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: ], [info3], ) - + but4.click(train_index, [exp_dir1, version19], info3) - - - - #but5.click( + + # but5.click( # train1key, # [ # exp_dir1, @@ -2503,8 +2651,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: # extraction_crepe_hop_length # ], # info3, - #) - + # ) + with gr.TabItem(i18n("ckpt处理")): with gr.Group(): gr.Markdown(value=i18n("模型融合, 可用于测试音色融合")) @@ -2601,7 +2749,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: ckpt_path2 = gr.Textbox( lines=3, label=i18n("模型路径"), - value=os.path.abspath(os.getcwd()).replace('\\', '/') + "/logs/[YOUR_MODEL]/G_23333.pth", + value=os.path.abspath(os.getcwd()).replace("\\", "/") + + "/logs/[YOUR_MODEL]/G_23333.pth", interactive=True, ) save_name = gr.Textbox( @@ -2665,8 +2814,7 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: except: gr.Markdown(traceback.format_exc()) - - #region Mangio Preset Handler Region + # region Mangio Preset Handler Region def save_preset( preset_name, sid0, @@ -2682,45 +2830,44 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: resample_sr, rms_mix_rate, protect, - f0_file + f0_file, ): data = None - with open('../inference-presets.json', 'r') as file: + with open("../inference-presets.json", "r") as file: data = json.load(file) preset_json = { - 'name': preset_name, - 'model': sid0, - 'transpose': vc_transform, - 'audio_file': input_audio0, - 'auto_audio_file': input_audio1, - 'f0_method': f0method, - 'crepe_hop_length': crepe_hop_length, - 'median_filtering': filter_radius, - 'feature_path': file_index1, - 'auto_feature_path': file_index2, - 'search_feature_ratio': index_rate, - 'resample': resample_sr, - 'volume_envelope': rms_mix_rate, - 'protect_voiceless': protect, - 'f0_file_path': f0_file + "name": preset_name, + "model": sid0, + "transpose": vc_transform, + "audio_file": input_audio0, + "auto_audio_file": input_audio1, + "f0_method": f0method, + "crepe_hop_length": crepe_hop_length, + "median_filtering": filter_radius, + "feature_path": file_index1, + "auto_feature_path": file_index2, + "search_feature_ratio": index_rate, + "resample": resample_sr, + "volume_envelope": rms_mix_rate, + "protect_voiceless": protect, + "f0_file_path": f0_file, } - data['presets'].append(preset_json) - with open('../inference-presets.json', 'w') as file: + data["presets"].append(preset_json) + with open("../inference-presets.json", "w") as file: json.dump(data, file) file.flush() print("Saved Preset %s into inference-presets.json!" % preset_name) - def on_preset_changed(preset_name): print("Changed Preset to %s!" % preset_name) data = None - with open('../inference-presets.json', 'r') as file: + with open("../inference-presets.json", "r") as file: data = json.load(file) print("Searching for " + preset_name) returning_preset = None - for preset in data['presets']: - if(preset['name'] == preset_name): + for preset in data["presets"]: + if preset["name"] == preset_name: print("Found a preset") returning_preset = preset # return all new input values @@ -2740,11 +2887,11 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: # returning_preset['f0_file_path'] ) - # Preset State Changes - + # Preset State Changes + # This click calls save_preset that saves the preset into inference-presets.json with the preset name # mangio_preset_save_btn.click( - # fn=save_preset, + # fn=save_preset, # inputs=[ # mangio_preset_name_save, # sid0, @@ -2760,16 +2907,16 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: # rms_mix_rate0, # protect0, # f0_file - # ], + # ], # outputs=[] # ) # mangio_preset.change( - # on_preset_changed, + # on_preset_changed, # inputs=[ # # Pass inputs here # mangio_preset - # ], + # ], # outputs=[ # # Pass Outputs here. These refer to the gradio elements that we want to directly change # # sid0, @@ -2787,14 +2934,16 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: # # f0_file # ] # ) - #endregion + # endregion - # with gr.TabItem(i18n("招募音高曲线前端编辑器")): - # gr.Markdown(value=i18n("加开发群联系我xxxxx")) - # with gr.TabItem(i18n("点击查看交流、问题反馈群号")): - # gr.Markdown(value=i18n("xxxxx")) + # with gr.TabItem(i18n("招募音高曲线前端编辑器")): + # gr.Markdown(value=i18n("加开发群联系我xxxxx")) + # with gr.TabItem(i18n("点击查看交流、问题反馈群号")): + # gr.Markdown(value=i18n("xxxxx")) - if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE) + if ( + config.iscolab or config.paperspace + ): # Share gradio link for colab and paperspace (FORK FEATURE) app.queue(concurrency_count=511, max_size=1022).launch(share=True) else: app.queue(concurrency_count=511, max_size=1022).launch( @@ -2804,8 +2953,8 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: quiet=False, ) -#endregion -''' #End of Default-GUI +# endregion +""" #End of Default-GUI with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: gr.HTML("

The Mangio-RVC-Fork 💻

") @@ -3735,4 +3884,4 @@ with gr.Blocks(theme='HaleyCH/HaleyCH_Theme') as app: ) #endregion -''' \ No newline at end of file +""" diff --git a/my_utils.py b/my_utils.py index 1990d82..90eeb52 100644 --- a/my_utils.py +++ b/my_utils.py @@ -1,11 +1,14 @@ import ffmpeg import numpy as np -#import praatio -#import praatio.praat_scripts -import os -#from os.path import join -#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe") +# import praatio +# import praatio.praat_scripts +import os + +# from os.path import join + +# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe") + def load_audio(file, sr, DoFormant, Quefrency, Timbre): try: @@ -15,43 +18,47 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre): file = ( file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") ) # 防止小白拷路径头尾带了空格和"和回车 - file_formanted = ( - file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - ) - with open('formanting.txt', 'r') as fvf: + file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + with open("formanting.txt", "r") as fvf: content = fvf.readlines() - if 'True' in content[0].split('\n')[0]: - #print("true") + if "True" in content[0].split("\n")[0]: + # print("true") DoFormant = True - Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0] - + Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0] + else: - #print("not true") + # print("not true") DoFormant = False - + if DoFormant: - #os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}") - #print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted)) + # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}") + # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted)) print("formanting...") - - os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted)) + + os.system( + 'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' + % (file, Quefrency, Timbre, file_formanted) + ) print("formanted!") - #filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\') - #file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\') + # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\') + # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\') out, _ = ( - ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0) + ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0) .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) - .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + .run( + cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True + ) ) - - os.remove('%sFORMANTED%s' % (file_formanted, '.wav')) + + os.remove("%sFORMANTED%s" % (file_formanted, ".wav")) else: - out, _ = ( ffmpeg.input(file, threads=0) .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) - .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) + .run( + cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True + ) ) except Exception as e: raise RuntimeError(f"Failed to load audio: {e}") diff --git a/train_nsf_sim_cache_sid_load_pretrain.py b/train_nsf_sim_cache_sid_load_pretrain.py index 5bb3526..4ae7308 100644 --- a/train_nsf_sim_cache_sid_load_pretrain.py +++ b/train_nsf_sim_cache_sid_load_pretrain.py @@ -568,10 +568,10 @@ def train_and_evaluate( ), ) ) - + with open("stop.txt", "r+") as tostop: content = tostop.read() - if 'stop' in content: + if "stop" in content: logger.info("Stop Button was pressed. The program is closed.") if hasattr(net_g, "module"): ckpt = net_g.module.state_dict() @@ -581,15 +581,21 @@ def train_and_evaluate( "saving final ckpt:%s" % ( savee( - ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps + ckpt, + hps.sample_rate, + hps.if_f0, + hps.name, + epoch, + hps.version, + hps, ) ) ) - + tostop.truncate(0) tostop.writelines("not") os._exit(2333333) - + if rank == 0: logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record())) if epoch >= hps.total_epoch and rank == 0: diff --git a/trainset_preprocess_pipeline_print.py b/trainset_preprocess_pipeline_print.py index 185cf29..b0ef248 100644 --- a/trainset_preprocess_pipeline_print.py +++ b/trainset_preprocess_pipeline_print.py @@ -24,9 +24,10 @@ Timbre = 0.0 mutex = multiprocessing.Lock() f = open("%s/preprocess.log" % exp_dir, "a+") -with open('formanting.txt', 'r') as fvf: - content = fvf.readlines() - Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0] +with open("formanting.txt", "r") as fvf: + content = fvf.readlines() + Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0] + def println(strr): mutex.acquire() @@ -104,12 +105,14 @@ class PreProcess: idx1 += 1 break self.norm_write(tmp_audio, idx0, idx1) - #println("%s->Suc." % path) + # println("%s->Suc." % path) except: println("%s->%s" % (path, traceback.format_exc())) def pipeline_mp(self, infos, thread_n): - for path, idx0 in tqdm.tqdm(infos, position=thread_n, leave=True, desc="thread:%s" % thread_n): + for path, idx0 in tqdm.tqdm( + infos, position=thread_n, leave=True, desc="thread:%s" % thread_n + ): self.pipeline(path, idx0) def pipeline_mp_inp_dir(self, inp_root, n_p): diff --git a/vc_infer_pipeline.py b/vc_infer_pipeline.py index 05ce82c..43cd829 100644 --- a/vc_infer_pipeline.py +++ b/vc_infer_pipeline.py @@ -1,7 +1,7 @@ import numpy as np, parselmouth, torch, pdb, sys, os from time import time as ttime import torch.nn.functional as F -import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe) +import torchcrepe # Fork feature. Use the crepe f0 algorithm. New dependency (pip install torchcrepe) from torch import Tensor import scipy.signal as signal import pyworld, os, traceback, faiss, librosa, torchcrepe @@ -15,6 +15,7 @@ bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) input_audio_path2wav = {} + @lru_cache def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): audio = input_audio_path2wav[input_audio_path] @@ -74,24 +75,28 @@ class VC(object): def get_optimal_torch_device(self, index: int = 0) -> torch.device: # Get cuda device if torch.cuda.is_available(): - return torch.device(f"cuda:{index % torch.cuda.device_count()}") # Very fast + return torch.device( + f"cuda:{index % torch.cuda.device_count()}" + ) # Very fast elif torch.backends.mps.is_available(): return torch.device("mps") # Insert an else here to grab "xla" devices if available. TO DO later. Requires the torch_xla.core.xla_model library - # Else wise return the "cpu" as a torch device, + # Else wise return the "cpu" as a torch device, return torch.device("cpu") # Fork Feature: Compute f0 with the crepe method def get_f0_crepe_computation( - self, - x, - f0_min, - f0_max, - p_len, - hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time. - model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full + self, + x, + f0_min, + f0_max, + p_len, + hop_length=160, # 512 before. Hop length changes the speed that the voice jumps to a different dramatic pitch. Lower hop lengths means more pitch accuracy but longer inference time. + model="full", # Either use crepe-tiny "tiny" or crepe "full". Default is full ): - x = x.astype(np.float32) # fixes the F.conv2D exception. We needed to convert double to float. + x = x.astype( + np.float32 + ) # fixes the F.conv2D exception. We needed to convert double to float. x /= np.quantile(np.abs(x), 0.999) torch_device = self.get_optimal_torch_device() audio = torch.from_numpy(x).to(torch_device, copy=True) @@ -109,7 +114,7 @@ class VC(object): model, batch_size=hop_length * 2, device=torch_device, - pad=True + pad=True, ) p_len = p_len or x.shape[0] // hop_length # Resize the pitch for final f0 @@ -118,17 +123,17 @@ class VC(object): target = np.interp( np.arange(0, len(source) * p_len, len(source)) / p_len, np.arange(0, len(source)), - source + source, ) f0 = np.nan_to_num(target) - return f0 # Resized f0 - + return f0 # Resized f0 + def get_f0_official_crepe_computation( - self, - x, - f0_min, - f0_max, - model="full", + self, + x, + f0_min, + f0_max, + model="full", ): # Pick a batch size that doesn't cause memory errors on your gpu batch_size = 512 @@ -153,15 +158,15 @@ class VC(object): # Fork Feature: Compute pYIN f0 method def get_f0_pyin_computation(self, x, f0_min, f0_max): - y, sr = librosa.load('saudio/Sidney.wav', self.sr, mono=True) + y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True) f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max) - f0 = f0[1:] # Get rid of extra first frame + f0 = f0[1:] # Get rid of extra first frame return f0 # Fork Feature: Acquire median hybrid f0 estimation calculation def get_f0_hybrid_computation( - self, - methods_str, + self, + methods_str, input_audio_path, x, f0_min, @@ -173,9 +178,9 @@ class VC(object): ): # Get various f0 methods from input to use in the computation stack s = methods_str - s = s.split('hybrid')[1] - s = s.replace('[', '').replace(']', '') - methods = s.split('+') + s = s.split("hybrid")[1] + s = s.replace("[", "").replace("]", "") + methods = s.split("+") f0_computation_stack = [] print("Calculating f0 pitch estimations for methods: %s" % str(methods)) @@ -202,35 +207,39 @@ class VC(object): ) elif method == "crepe": f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max) - f0 = f0[1:] # Get rid of extra first frame + f0 = f0[1:] # Get rid of extra first frame elif method == "crepe-tiny": f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny") - f0 = f0[1:] # Get rid of extra first frame + f0 = f0[1:] # Get rid of extra first frame elif method == "mangio-crepe": - f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length) + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, crepe_hop_length + ) elif method == "mangio-crepe-tiny": - f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny") + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, crepe_hop_length, "tiny" + ) elif method == "harvest": f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) if filter_radius > 2: f0 = signal.medfilt(f0, 3) - f0 = f0[1:] # Get rid of first frame. - elif method == "dio": # Potentially buggy? + f0 = f0[1:] # Get rid of first frame. + elif method == "dio": # Potentially buggy? f0, t = pyworld.dio( x.astype(np.double), fs=self.sr, f0_ceil=f0_max, f0_floor=f0_min, - frame_period=10 + frame_period=10, ) f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr) f0 = signal.medfilt(f0, 3) f0 = f0[1:] - #elif method == "pyin": Not Working just yet + # elif method == "pyin": Not Working just yet # f0 = self.get_f0_pyin_computation(x, f0_min, f0_max) # Push method to the stack f0_computation_stack.append(f0) - + for fc in f0_computation_stack: print(len(fc)) @@ -280,13 +289,13 @@ class VC(object): f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) if filter_radius > 2: f0 = signal.medfilt(f0, 3) - elif f0_method == "dio": # Potentially Buggy? + elif f0_method == "dio": # Potentially Buggy? f0, t = pyworld.dio( x.astype(np.double), fs=self.sr, f0_ceil=f0_max, f0_floor=f0_min, - frame_period=10 + frame_period=10, ) f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr) f0 = signal.medfilt(f0, 3) @@ -295,12 +304,17 @@ class VC(object): elif f0_method == "crepe-tiny": f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max, "tiny") elif f0_method == "mangio-crepe": - f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length) + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, crepe_hop_length + ) elif f0_method == "mangio-crepe-tiny": - f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length, "tiny") + f0 = self.get_f0_crepe_computation( + x, f0_min, f0_max, p_len, crepe_hop_length, "tiny" + ) elif f0_method == "rmvpe": if hasattr(self, "model_rmvpe") == False: from rmvpe import RMVPE + print("loading rmvpe model") self.model_rmvpe = RMVPE( "rmvpe.pt", is_half=self.is_half, device=self.device @@ -311,7 +325,7 @@ class VC(object): # Perform hybrid median pitch estimation input_audio_path2wav[input_audio_path] = x.astype(np.double) f0 = self.get_f0_hybrid_computation( - f0_method, + f0_method, input_audio_path, x, f0_min, @@ -319,7 +333,7 @@ class VC(object): p_len, filter_radius, crepe_hop_length, - time_step + time_step, ) f0 *= pow(2, f0_up_key / 12)