stftpshift overhaul + fixes

fixed cli traceback + implemented formant shifting fixed batch conversion
2026-05-18 05:04:44 +02:00 · 2023-07-26 06:24:23 +07:00
parent 1b76386205
commit f68dbbf0ff
4 changed files with 50 additions and 19 deletions
--- a/formantshiftcfg/f2m.txt
+++ b/formantshiftcfg/f2m.txt
@@ -1,2 +1,2 @@
 8.0
-1.2
+0.8
--- a/formantshiftcfg/random.txt
+++ b/formantshiftcfg/random.txt
@@ -1,2 +1,2 @@
-16.0
+32.0
 9.8
--- a/infer-web.py
+++ b/infer-web.py
@@ -388,6 +388,7 @@ def vc_multi(
            info, opt = vc_single(
                sid,
                path,
+                None,
                f0_up_key,
                None,
                f0_method,
@@ -2134,8 +2135,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title='Mangio-RVC-Web 💻') as app:
                        
                        qfrency = gr.Slider(
                                value=Quefrency,
+                                info="Default value is 1.0",
                                label="Quefrency for formant shifting",
-                                minimum=-16.0,
+                                minimum=0.0,
                                maximum=16.0,
                                step=0.1,
                                visible=bool(DoFormant),
@@ -2144,8 +2146,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title='Mangio-RVC-Web 💻') as app:
                            
                        tmbre = gr.Slider(
                            value=Timbre,
+                            info="Default value is 1.0",
                            label="Timbre for formant shifting",
-                            minimum=-16.0,
+                            minimum=0.0,
                            maximum=16.0,
                            step=0.1,
                            visible=bool(DoFormant),
--- a/my_utils.py
+++ b/my_utils.py
@@ -4,6 +4,7 @@ import numpy as np
 # import praatio
 # import praatio.praat_scripts
 import os
+import random

 import sqlite3

@@ -25,39 +26,61 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
        file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        cursor.execute("SELECT Quefrency, Timbre, DoFormant FROM formant_data")
        Quefrency, Timbre, DoFormant = cursor.fetchone()
-        print(f"dofor={bool(DoFormant)} timbr={Timbre} quef={Quefrency}\n")
+        #print(f"dofor={bool(DoFormant)} timbr={Timbre} quef={Quefrency}\n")
        if bool(DoFormant):
+            numerator = round(random.uniform(1,4), 4)
            # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
            # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
            
            if not file.endswith(".wav"):
-                print(f"\nfile = {file}\n")
-                converting = (
-                    ffmpeg.input(file, threads = 0)
-                    .output(f"{file_formanted}.wav")
-                    .run(
-                        cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+                
+                if not os.path.isfile(f"{file_formanted}.wav"):
+                    #print(f"\nfile = {file}\n")
+                    #print(f"\nfile_formanted = {file_formanted}\n")
+                    converting = (
+                        ffmpeg.input(file_formanted, threads = 0)
+                        .output(f"{file_formanted}.wav")
+                        .run(
+                            cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
+                        )
                    )
-                )
-            print("formanting...")
+                else:
+                    pass
+            
+            
+            
+            file_formanted = f"{file_formanted}.wav" if not file_formanted.endswith(".wav") else file_formanted
+            
+            
+            
+            print(f" · Formanting {file_formanted}...\n")
+            
+            
+            
            os.system(
-                'stftpitchshift.exe -i "%s" -q %s -t %s -o "%sFORMANTED"'
-                % (file_formanted, Quefrency, Timbre, file_formanted)
+                'stftpitchshift.exe -i "%s" -q "%s" -t "%s" -o "%sFORMANTED_%s.wav"'
+                % (file_formanted, Quefrency, Timbre, file_formanted, str(numerator))
            )
-            print("formanted!")
+            
+            
+            
+            print(f" · Formanted {file_formanted}!\n")
+            
+            
+            
            # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
            # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
+            #print("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))
            
            out, _ = (
-                ffmpeg.input("%sFORMANTED%s" % (file_formanted, ".wav"), threads=0)
+                ffmpeg.input("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)), threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )

-            os.remove("%sFORMANTED%s" % (file_formanted, ".wav"))
-            os.remove(f"{file_formanted}.wav")
+            
            
        else:
            out, _ = (
@@ -70,5 +93,10 @@ def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")
    
+    try: os.remove("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))
+    except OSError: pass; print("couldn't remove formanted type of file")
+    try: os.remove(file_formanted)
+    except OSError: pass; print("couldn't remove converted type of file")
+    
    conn.close()
    return np.frombuffer(out, np.float32).flatten()