diff --git a/.gitignore b/.gitignore index 630c32e..7cde528 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,49 @@ .DS_Store __pycache__ /TEMP +/AUDIOS +/DATASETS +/LOGS +/RUNTIME *.pyd hubert_base.pt /logs .venv +alexforkINSTALL.bat +Changelog_CN.md +Changelog_EN.md +Changelog_KO.md +difdep.py +EasierGUI.py +envfilescheck.bat +export_onnx.py +export_onnx_old.py +ffmpeg.exe +ffprobe.exe +Fixes/Launch_Tensorboard.bat +Fixes/LOCAL_CREPE_FIX.bat +Fixes/local_fixes.py +Fixes/tensor-launch.py +gui.py +infer-web — backup.py +infer-webbackup.py +install_easy_dependencies.py +install_easyGUI.bat +installstft.bat +Launch_Tensorboard.bat +listdepend.bat +LOCAL_CREPE_FIX.bat +local_fixes.py +oldinfer.py +onnx_inference_demo.py +Praat.exe +requirementsNEW.txt +rmvpe.pt +run_easiergui.bat +tensor-launch.py +values1.json +使用需遵守的协议-LICENSE.txt +trainset_preprocess_pipeline_print.py +configs/48k.json +configs/40k.json +configs/32k.json diff --git a/README.md b/README.md index a4fe6a2..33c4f8c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ +# 7/26 Changelog: +- Fixed the cli inferencing traceback. +- SQL Databases implemented for formanting training stop. +- Gradio browser tab renamed to `Mangio-RVC-Web 💻`. +- Rudimentary functions from `infer-web.py` removed. +- Formanting now accepts any audio format, as long as it is supported by FFmpeg. + # 7/25 Changelog: - Better MacOS installation script. For inference, all that needs to be done is running the `run.sh` from the extracted zip folder, where it will install Python 3.8, Homebrew, and other dependencies for you automatically. M1 Macs are natively supported for GPU acceleration, and training should work if you choose to download the pretrained models. diff --git a/stop.txt b/TEMP/db similarity index 100% rename from stop.txt rename to TEMP/db diff --git a/extract_f0_print.py b/extract_f0_print.py index 4be7f5e..98de3dc 100644 --- a/extract_f0_print.py +++ b/extract_f0_print.py @@ -19,10 +19,8 @@ f = open("%s/extract_f0_feature.log" % exp_dir, "a+") DoFormant = False -with open("formanting.txt", "r") as fvf: - content = fvf.readlines() - Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0] - +Quefrency = 0.0 +Timbre = 0.0 def printt(strr): print(strr) diff --git a/formanting.txt b/formanting.txt deleted file mode 100644 index 86ee10e..0000000 --- a/formanting.txt +++ /dev/null @@ -1,3 +0,0 @@ -False -8.0 -1.2 diff --git a/formantshiftcfg/f2m.txt b/formantshiftcfg/f2m.txt index 9b33ee5..a11d276 100644 --- a/formantshiftcfg/f2m.txt +++ b/formantshiftcfg/f2m.txt @@ -1,2 +1,2 @@ 8.0 --1.2 \ No newline at end of file +0.8 \ No newline at end of file diff --git a/formantshiftcfg/random.txt b/formantshiftcfg/random.txt index 3297289..8cfd7b5 100644 --- a/formantshiftcfg/random.txt +++ b/formantshiftcfg/random.txt @@ -1,2 +1,2 @@ -16.0 +32.0 9.8 \ No newline at end of file diff --git a/infer-web.py b/infer-web.py index 6bf28bb..92603a5 100644 --- a/infer-web.py +++ b/infer-web.py @@ -43,8 +43,44 @@ from train.process_ckpt import change_info, extract_small_model, merge, show_inf from vc_infer_pipeline import VC from sklearn.cluster import MiniBatchKMeans +import sqlite3 + +def clear_sql(signal, frame): + cursor.execute("DELETE FROM formant_data") + cursor.execute("DELETE FROM stop_train") + conn.commit() + conn.close() + print("Clearing SQL database...") + sys.exit(0) + +if sys.platform == 'win32': + signal.signal(signal.SIGBREAK, clear_sql) + +signal.signal(signal.SIGINT, clear_sql) +signal.signal(signal.SIGTERM, clear_sql) + + + logging.getLogger("numba").setLevel(logging.WARNING) +conn = sqlite3.connect('TEMP/db:cachedb?mode=memory&cache=shared', check_same_thread=False) +cursor = conn.cursor() + + + +cursor.execute(""" + CREATE TABLE IF NOT EXISTS formant_data ( + Quefrency FLOAT, + Timbre FLOAT, + DoFormant INTEGER + ) +""") + +cursor.execute(""" + CREATE TABLE IF NOT EXISTS stop_train ( + stop BOOL + ) +""") tmp = os.path.join(now_dir, "TEMP") shutil.rmtree(tmp, ignore_errors=True) @@ -52,21 +88,29 @@ shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_erro shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) os.makedirs(tmp, exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) +os.makedirs(os.path.join(now_dir, "audios"), exist_ok=True) +os.makedirs(os.path.join(now_dir, "datasets"), exist_ok=True) os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) os.environ["TEMP"] = tmp warnings.filterwarnings("ignore") torch.manual_seed(114514) -DoFormant = False -Quefrency = 8.0 -Timbre = 1.2 +global DoFormant, Quefrency, Timbre -with open('formanting.txt', 'w+') as fsf: - fsf.truncate(0) - fsf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) +try: + cursor.execute("SELECT Quefrency, Timbre, DoFormant FROM formant_data") + Quefrency, Timbre, DoFormant = cursor.fetchone() + +except Exception: + Quefrency = 8.0 + Timbre = 1.2 + DoFormant = False + cursor.execute("DELETE FROM formant_data") + cursor.execute("DELETE FROM stop_train") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (Quefrency, Timbre, 0)) + conn.commit() - config = Config() i18n = I18nAuto() i18n.print() @@ -124,20 +168,10 @@ else: default_batch_size = 1 gpus = "-".join([i[0] for i in gpu_infos]) - -class ToolButton(gr.Button, gr.components.FormComponent): - """Small button with single emoji as text, fits inside gradio forms""" - - def __init__(self, **kwargs): - super().__init__(variant="tool", **kwargs) - - def get_block_name(self): - return "button" - - hubert_model = None + def load_hubert(): global hubert_model models, _, _ = checkpoint_utils.load_model_ensemble_and_task( @@ -156,10 +190,7 @@ def load_hubert(): weight_root = "weights" weight_uvr5_root = "uvr5_weights" index_root = "./logs/" -global audio_root audio_root = "audios" -global input_audio_path0 -global input_audio_path1 names = [] for name in os.listdir(weight_root): if name.endswith(".pth"): @@ -228,17 +259,6 @@ def get_fshift_presets(): return '' -def get_audios(): - if check_for_name() != '': - audios_path= '"' + os.path.abspath(os.getcwd()) + '/audios/' - if os.path.exists(audios_path): - for file in os.listdir(audios_path): - print(audios_path.join(file) + '"') - return os.path.join(audios_path, file + '"') - return '' - else: - return '' - def vc_single( sid, @@ -627,14 +647,13 @@ def if_done_multi(done, ps): done[0] = True def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button): - if (cbox): DoFormant = True - with open('formanting.txt', 'w') as fxxf: - fxxf.truncate(0) - - fxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) + cursor.execute("DELETE FROM formant_data") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (qfrency, tmbre, 1)) + conn.commit() + #print(f"is checked? - {cbox}\ngot {DoFormant}") return ( @@ -650,10 +669,10 @@ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_ref else: DoFormant = False - with open('formanting.txt', 'w') as fxf: - fxf.truncate(0) - - fxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) + cursor.execute("DELETE FROM formant_data") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (qfrency, tmbre, int(DoFormant))) + conn.commit() + #print(f"is checked? - {cbox}\ngot {DoFormant}") return ( {"value": False, "__type__": "update"}, @@ -670,11 +689,10 @@ def formant_apply(qfrency, tmbre): Quefrency = qfrency Timbre = tmbre DoFormant = True - - with open('formanting.txt', 'w') as fxxxf: - fxxxf.truncate(0) + cursor.execute("DELETE FROM formant_data") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (qfrency, tmbre, 1)) + conn.commit() - fxxxf.writelines([str(DoFormant) + '\n', str(Quefrency) + '\n', str(Timbre) + '\n']) return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"}) def update_fshift_presets(preset, qfrency, tmbre): @@ -829,12 +847,12 @@ def change_sr2(sr2, if_f0_3, version19): if not if_pretrained_generator_exist: print( "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), - "not exist, will not use pretrained model", + "doesn't exist, will not use pretrained model", ) if not if_pretrained_discriminator_exist: print( "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), - "not exist, will not use pretrained model", + "doesn't exist, will not use pretrained model", ) return ( "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) @@ -865,12 +883,12 @@ def change_version19(sr2, if_f0_3, version19): if not if_pretrained_generator_exist: print( "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), - "not exist, will not use pretrained model", + "doesn't exist, will not use pretrained model", ) if not if_pretrained_discriminator_exist: print( "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), - "not exist, will not use pretrained model", + "doesn't exist, will not use pretrained model", ) return ( "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) @@ -972,6 +990,8 @@ def click_train( if_save_every_weights18, version19, ): + cursor.execute("DELETE FROM stop_train") + conn.commit() # 生成filelist exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) os.makedirs(exp_dir, exist_ok=True) @@ -1547,10 +1567,26 @@ def cli_infer(com): mix = float(com[10]) feature_ratio = float(com[11]) protection_amnt = float(com[12]) - ##### + protect1 = 0.5 + + if com[14] == 'False' or com[14] == 'false': + DoFormant = False + Quefrency = 0.0 + Timbre = 0.0 + cursor.execute("DELETE FROM formant_data") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (Quefrency, Timbre, 0)) + conn.commit() + + else: + DoFormant = True + Quefrency = float(com[15]) + Timbre = float(com[16]) + cursor.execute("DELETE FROM formant_data") + cursor.execute("INSERT INTO formant_data (Quefrency, Timbre, DoFormant) VALUES (?, ?, ?)", (Quefrency, Timbre, 1)) + conn.commit() print("Mangio-RVC-Fork Infer-CLI: Starting the inference...") - vc_data = get_vc(model_name) + vc_data = get_vc(model_name, protection_amnt, protect1) print(vc_data) print("Mangio-RVC-Fork Infer-CLI: Performing inference...") conversion_data = vc_single( @@ -1697,7 +1733,6 @@ def preset_apply(preset, qfer, tmbr): with open(str(preset), 'r') as p: content = p.readlines() qfer, tmbr = content[0].split('\n')[0], content[1] - formant_apply(qfer, tmbr) else: pass @@ -1705,70 +1740,86 @@ def preset_apply(preset, qfer, tmbr): def print_page_details(): if cli_current_page == "HOME": - print(" go home : Takes you back to home with a navigation list.") - print(" go infer : Takes you to inference command execution.\n") - print(" go pre-process : Takes you to training step.1) pre-process command execution.") - print(" go extract-feature : Takes you to training step.2) extract-feature command execution.") - print(" go train : Takes you to training step.3) being or continue training command execution.") - print(" go train-feature : Takes you to the train feature index command execution.\n") - print(" go extract-model : Takes you to the extract small model command execution.") + print( + "\n go home : Takes you back to home with a navigation list." + "\n go infer : Takes you to inference command execution." + "\n go pre-process : Takes you to training step.1) pre-process command execution." + "\n go extract-feature : Takes you to training step.2) extract-feature command execution." + "\n go train : Takes you to training step.3) being or continue training command execution." + "\n go train-feature : Takes you to the train feature index command execution." + "\n go extract-model : Takes you to the extract small model command execution." + ) elif cli_current_page == "INFER": - print(" arg 1) model name with .pth in ./weights: mi-test.pth") - print(" arg 2) source audio path: myFolder\\MySource.wav") - print(" arg 3) output file name to be placed in './audio-outputs': MyTest.wav") - print(" arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index") - print(" arg 5) speaker id: 0") - print(" arg 6) transposition: 0") - print(" arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny)") - print(" arg 8) crepe hop length: 160") - print(" arg 9) harvest median filter radius: 3 (0-7)") - print(" arg 10) post resample rate: 0") - print(" arg 11) mix volume envelope: 1") - print(" arg 12) feature index ratio: 0.78 (0-1)") - print(" arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.) \n") - print("Example: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33") + print( + "\n arg 1) model name with .pth in ./weights: mi-test.pth" + "\n arg 2) source audio path: myFolder\\MySource.wav" + "\n arg 3) output file name to be placed in './audio-outputs': MyTest.wav" + "\n arg 4) feature index file path: logs/mi-test/added_IVF3042_Flat_nprobe_1.index" + "\n arg 5) speaker id: 0" + "\n arg 6) transposition: 0" + "\n arg 7) f0 method: harvest (pm, harvest, crepe, crepe-tiny, hybrid[x,x,x,x], mangio-crepe, mangio-crepe-tiny, rmvpe)" + "\n arg 8) crepe hop length: 160" + "\n arg 9) harvest median filter radius: 3 (0-7)" + "\n arg 10) post resample rate: 0" + "\n arg 11) mix volume envelope: 1" + "\n arg 12) feature index ratio: 0.78 (0-1)" + "\n arg 13) Voiceless Consonant Protection (Less Artifact): 0.33 (Smaller number = more protection. 0.50 means Dont Use.)" + "\n arg 14) Whether to formant shift the inference audio before conversion: False (if set to false, you can ignore setting the quefrency and timbre values for formanting)" + "\n arg 15)* Quefrency for formanting: 8.0 (no need to set if arg14 is False/false)" + "\n arg 16)* Timbre for formanting: 1.2 (no need to set if arg14 is False/false) \n" + "\nExample: mi-test.pth saudio/Sidney.wav myTest.wav logs/mi-test/added_index.index 0 -2 harvest 160 3 0 1 0.95 0.33 0.45 True 8.0 1.2" + ) elif cli_current_page == "PRE-PROCESS": - print(" arg 1) Model folder name in ./logs: mi-test") - print(" arg 2) Trainset directory: mydataset (or) E:\\my-data-set") - print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") - print(" arg 4) Number of CPU threads to use: 8 \n") - print("Example: mi-test mydataset 40k 24") + print( + "\n arg 1) Model folder name in ./logs: mi-test" + "\n arg 2) Trainset directory: mydataset (or) E:\\my-data-set" + "\n arg 3) Sample rate: 40k (32k, 40k, 48k)" + "\n arg 4) Number of CPU threads to use: 8 \n" + "\nExample: mi-test mydataset 40k 24" + ) elif cli_current_page == "EXTRACT-FEATURE": - print(" arg 1) Model folder name in ./logs: mi-test") - print(" arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") - print(" arg 3) Number of CPU threads to use: 8") - print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") - print(" arg 5) f0 Method: harvest (pm, harvest, dio, crepe)") - print(" arg 6) Crepe hop length: 128") - print(" arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n") - print("Example: mi-test 0 24 1 harvest 128 v2") + print( + "\n arg 1) Model folder name in ./logs: mi-test" + "\n arg 2) Gpu card slot: 0 (0-1-2 if using 3 GPUs)" + "\n arg 3) Number of CPU threads to use: 8" + "\n arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" + "\n arg 5) f0 Method: harvest (pm, harvest, dio, crepe)" + "\n arg 6) Crepe hop length: 128" + "\n arg 7) Version for pre-trained models: v2 (use either v1 or v2)\n" + "\nExample: mi-test 0 24 1 harvest 128 v2" + ) elif cli_current_page == "TRAIN": - print(" arg 1) Model folder name in ./logs: mi-test") - print(" arg 2) Sample rate: 40k (32k, 40k, 48k)") - print(" arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") - print(" arg 4) speaker id: 0") - print(" arg 5) Save epoch iteration: 50") - print(" arg 6) Total epochs: 10000") - print(" arg 7) Batch size: 8") - print(" arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)") - print(" arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)") - print(" arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)") - print(" arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)") - print(" arg 12) Model architecture version: v2 (use either v1 or v2)\n") - print("Example: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2") + print( + "\n arg 1) Model folder name in ./logs: mi-test" + "\n arg 2) Sample rate: 40k (32k, 40k, 48k)" + "\n arg 3) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" + "\n arg 4) speaker id: 0" + "\n arg 5) Save epoch iteration: 50" + "\n arg 6) Total epochs: 10000" + "\n arg 7) Batch size: 8" + "\n arg 8) Gpu card slot: 0 (0-1-2 if using 3 GPUs)" + "\n arg 9) Save only the latest checkpoint: 0 (0 for no, 1 for yes)" + "\n arg 10) Whether to cache training set to vram: 0 (0 for no, 1 for yes)" + "\n arg 11) Save extracted small model every generation?: 0 (0 for no, 1 for yes)" + "\n arg 12) Model architecture version: v2 (use either v1 or v2)\n" + "\nExample: mi-test 40k 1 0 50 10000 8 0 0 0 0 v2" + ) elif cli_current_page == "TRAIN-FEATURE": - print(" arg 1) Model folder name in ./logs: mi-test") - print(" arg 2) Model architecture version: v2 (use either v1 or v2)\n") - print("Example: mi-test v2") + print( + "\n arg 1) Model folder name in ./logs: mi-test" + "\n arg 2) Model architecture version: v2 (use either v1 or v2)\n" + "\nExample: mi-test v2" + ) elif cli_current_page == "EXTRACT-MODEL": - print(" arg 1) Model Path: logs/mi-test/G_168000.pth") - print(" arg 2) Model save name: MyModel") - print(" arg 3) Sample rate: 40k (32k, 40k, 48k)") - print(" arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)") - print(' arg 5) Model information: "My Model"') - print(" arg 6) Model architecture version: v2 (use either v1 or v2)\n") - print('Example: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2') - print("") + print( + "\n arg 1) Model Path: logs/mi-test/G_168000.pth" + "\n arg 2) Model save name: MyModel" + "\n arg 3) Sample rate: 40k (32k, 40k, 48k)" + "\n arg 4) Has Pitch Guidance?: 1 (0 for no, 1 for yes)" + '\n arg 5) Model information: "My Model"' + "\n arg 6) Model architecture version: v2 (use either v1 or v2)\n" + '\nExample: logs/mi-test/G_168000.pth MyModel 40k 1 "Created by Cole Mangio" v2' + ) def change_page(page): global cli_current_page @@ -1810,7 +1861,7 @@ def execute_command(com): def cli_navigation_loop(): while True: - print("You are currently in '%s':" % cli_current_page) + print("\nYou are currently in '%s':" % cli_current_page) print_page_details() command = input("%s: " % cli_current_page) try: @@ -1855,11 +1906,11 @@ def match_index(sid0): if filename.endswith(".index"): for i in range(len(indexes_list)): if indexes_list[i] == (os.path.join(("./logs/" + folder), filename).replace('\\','/')): - print('regular index found') + #print('regular index found') break else: if indexes_list[i] == (os.path.join(("./logs/" + folder.lower()), filename).replace('\\','/')): - print('lowered index found') + #print('lowered index found') parent_dir = "./logs/" + folder.lower() break #elif (indexes_list[i]).casefold() == ((os.path.join(("./logs/" + folder), filename).replace('\\','/')).casefold()): @@ -1888,17 +1939,11 @@ def match_index(sid0): #print('nothing found') return ('', '') -def choveraudio(): - return '' - - def stoptraining(mim): if int(mim) == 1: - with open("stop.txt", "w+") as tostops: - - - tostops.writelines('stop') + cursor.execute("INSERT INTO stop_train (stop) VALUES (?)", (True,)) + conn.commit() #p.terminate() #p.kill() try: @@ -1921,8 +1966,8 @@ def whethercrepeornah(radio): return ({"visible": mango, "__type__": "update"}) -#Change your Gradio Theme here. 👇 👇 👇 👇 -with gr.Blocks(theme=gr.themes.Soft()) as app: +#Change your Gradio Theme here. 👇 👇 👇 👇 Example: " theme='HaleyCH/HaleyCH_Theme' " +with gr.Blocks(theme=gr.themes.Soft(), title='Mangio-RVC-Web 💻') as app: gr.HTML("

The Mangio-RVC-Fork 💻

") gr.Markdown( value=i18n( @@ -1977,10 +2022,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: input_audio1 = gr.Dropdown( label=i18n("Auto detect audio path and select from the dropdown:"), choices=sorted(audio_paths), - value=get_audios(), + value='', interactive=True, ) - input_audio1.change(fn=choveraudio,inputs=[],outputs=[input_audio0]) + input_audio1.change(fn=lambda:'',inputs=[],outputs=[input_audio0]) f0method0 = gr.Radio( label=i18n( "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU" @@ -2068,8 +2113,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: interactive=True, ) formanting = gr.Checkbox( - value=False, - label="[EXPERIMENTAL, WAV ONLY] Formant shift inference audio", + value=bool(DoFormant), + label="[EXPERIMENTAL] Formant shift inference audio", info="Used for male to female and vice-versa conversions", interactive=True, visible=True, @@ -2079,33 +2124,39 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: value='', choices=get_fshift_presets(), label="browse presets for formanting", - visible=False, + visible=bool(DoFormant), + ) + + formant_refresh_button = gr.Button( + value='\U0001f504', + visible=bool(DoFormant), + variant='primary', ) - formant_refresh_button = gr.Button(value='\U0001f504', visible=False,variant='primary') - #formant_refresh_button = ToolButton( elem_id='1') - #create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets") qfrency = gr.Slider( value=Quefrency, + info="Default value is 1.0", label="Quefrency for formant shifting", - minimum=-16.0, + minimum=0.0, maximum=16.0, step=0.1, - visible=False, + visible=bool(DoFormant), interactive=True, - ) + ) + tmbre = gr.Slider( value=Timbre, + info="Default value is 1.0", label="Timbre for formant shifting", - minimum=-16.0, + minimum=0.0, maximum=16.0, step=0.1, - visible=False, + visible=bool(DoFormant), interactive=True, ) formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre]) - frmntbut = gr.Button("Apply", variant="primary", visible=False) + frmntbut = gr.Button("Apply", variant="primary", visible=bool(DoFormant)) formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button]) frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre]) formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre]) @@ -2240,7 +2291,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: but1.click( vc_multi, [ - sid0, + spk_item, dir_input, opt_input, inputs, @@ -2418,7 +2469,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: f0method8.change(fn=whethercrepeornah, inputs=[f0method8], outputs=[extraction_crepe_hop_length]) but2 = gr.Button(i18n("特征提取"), variant="primary") - info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8, interactive=False) but2.click( extract_f0_feature, [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length], @@ -2453,8 +2504,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as app: interactive=True, ) if_save_latest13 = gr.Checkbox( - label="Whether to save only the latest .ckpt file to save hard disk space", - + label="Whether to save only the latest .ckpt file to save hard drive space", value=True, interactive=True, ) diff --git a/my_utils.py b/my_utils.py index 90eeb52..2f534c5 100644 --- a/my_utils.py +++ b/my_utils.py @@ -4,14 +4,20 @@ import numpy as np # import praatio # import praatio.praat_scripts import os +import random + +import sqlite3 + -# from os.path import join # praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe") def load_audio(file, sr, DoFormant, Quefrency, Timbre): + converted = False try: + conn = sqlite3.connect('TEMP/db:cachedb?mode=memory&cache=shared', check_same_thread=False) + cursor = conn.cursor() # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26 # This launches a subprocess to decode audio while down-mixing and resampling as necessary. # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed. @@ -19,39 +25,65 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre): file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") ) # 防止小白拷路径头尾带了空格和"和回车 file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") - with open("formanting.txt", "r") as fvf: - content = fvf.readlines() - if "True" in content[0].split("\n")[0]: - # print("true") - DoFormant = True - Quefrency, Timbre = content[1].split("\n")[0], content[2].split("\n")[0] - - else: - # print("not true") - DoFormant = False - - if DoFormant: + cursor.execute("SELECT Quefrency, Timbre, DoFormant FROM formant_data") + Quefrency, Timbre, DoFormant = cursor.fetchone() + #print(f"dofor={bool(DoFormant)} timbr={Timbre} quef={Quefrency}\n") + if bool(DoFormant): + numerator = round(random.uniform(1,4), 4) # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}") # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted)) - print("formanting...") - + + if not file.endswith(".wav"): + + if not os.path.isfile(f"{file_formanted}.wav"): + converted = True + #print(f"\nfile = {file}\n") + #print(f"\nfile_formanted = {file_formanted}\n") + converting = ( + ffmpeg.input(file_formanted, threads = 0) + .output(f"{file_formanted}.wav") + .run( + cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True + ) + ) + else: + pass + + + + file_formanted = f"{file_formanted}.wav" if not file_formanted.endswith(".wav") else file_formanted + + + + print(f" · Formanting {file_formanted}...\n") + + + os.system( - 'stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' - % (file, Quefrency, Timbre, file_formanted) + 'stftpitchshift.exe -i "%s" -q "%s" -t "%s" -o "%sFORMANTED_%s.wav"' + % (file_formanted, Quefrency, Timbre, file_formanted, str(numerator)) ) - print("formanted!") + + + + print(f" · Formanted {file_formanted}!\n") + + + # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\') # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\') - + #print("%sFORMANTED_%s.wav" % (file_formanted, str(numerator))) + out, _ = ( - ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0) + ffmpeg.input("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)), threads=0) .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) .run( cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True ) ) - os.remove("%sFORMANTED%s" % (file_formanted, ".wav")) + + else: out, _ = ( ffmpeg.input(file, threads=0) @@ -62,5 +94,15 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre): ) except Exception as e: raise RuntimeError(f"Failed to load audio: {e}") - + + if DoFormant: + try: os.remove("%sFORMANTED_%s.wav" % (file_formanted, str(numerator))) + except Exception: pass; print("couldn't remove formanted type of file") + + if converted: + try: os.remove(file_formanted) + except Exception: pass; print("couldn't remove converted type of file") + converted = False + + conn.close() return np.frombuffer(out, np.float32).flatten() diff --git a/stftpitchshift.exe b/stftpitchshift.exe new file mode 100644 index 0000000..2c7d3d6 Binary files /dev/null and b/stftpitchshift.exe differ diff --git a/stftpitchshift.lib b/stftpitchshift.lib new file mode 100644 index 0000000..9a07fcc Binary files /dev/null and b/stftpitchshift.lib differ diff --git a/train_nsf_sim_cache_sid_load_pretrain.py b/train_nsf_sim_cache_sid_load_pretrain.py index 4ae7308..53e1776 100644 --- a/train_nsf_sim_cache_sid_load_pretrain.py +++ b/train_nsf_sim_cache_sid_load_pretrain.py @@ -33,6 +33,8 @@ from data_utils import ( DistributedBucketSampler, ) +import sqlite3 + if hps.version == "v1": from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid as RVC_Model_f0, @@ -254,6 +256,7 @@ def run(rank, n_gpus, hps): def train_and_evaluate( rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers, cache ): + net_g, net_d = nets optim_g, optim_d = optims train_loader, eval_loader = loaders @@ -350,6 +353,10 @@ def train_and_evaluate( # Run steps epoch_recorder = EpochRecorder() + + conn = sqlite3.connect('TEMP/db:cachedb?mode=memory&cache=shared', check_same_thread=False) + cursor = conn.cursor() + for batch_idx, info in data_iterator: # Data ## Unpack @@ -568,33 +575,33 @@ def train_and_evaluate( ), ) ) - - with open("stop.txt", "r+") as tostop: - content = tostop.read() - if "stop" in content: - logger.info("Stop Button was pressed. The program is closed.") - if hasattr(net_g, "module"): - ckpt = net_g.module.state_dict() - else: - ckpt = net_g.state_dict() - logger.info( - "saving final ckpt:%s" - % ( - savee( - ckpt, - hps.sample_rate, - hps.if_f0, - hps.name, - epoch, - hps.version, - hps, - ) + + cursor.execute("SELECT stop FROM stop_train LIMIT 1") + if bool(cursor.fetchone()) == True: + logger.info("Stop Button was pressed. The program is closed.") + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + logger.info( + "saving final ckpt:%s" + % ( + savee( + ckpt, + hps.sample_rate, + hps.if_f0, + hps.name, + epoch, + hps.version, + hps, ) ) - - tostop.truncate(0) - tostop.writelines("not") - os._exit(2333333) + ) + sleep(1) + cursor.execute("DELETE FROM stop_train") + conn.commit() + conn.close() + os._exit(2333333) if rank == 0: logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record())) @@ -614,7 +621,11 @@ def train_and_evaluate( ) ) sleep(1) + cursor.execute("DELETE FROM stop_train") + conn.commit() + conn.close() os._exit(2333333) + if __name__ == "__main__":