my_utils.py

import ffmpeg
import numpy as np

# import praatio
# import praatio.praat_scripts
import os
import random

import sqlite3


# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")


def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    converted = False
    try:
        conn = sqlite3.connect('TEMP/db:cachedb?mode=memory&cache=shared', check_same_thread=False)
        cursor = conn.cursor()
        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
        file = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )  # 防止小白拷路径头尾带了空格和"和回车
        file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        cursor.execute("SELECT Quefrency, Timbre, DoFormant FROM formant_data")
        Quefrency, Timbre, DoFormant = cursor.fetchone()
        #print(f"dofor={bool(DoFormant)} timbr={Timbre} quef={Quefrency}\n")
        if bool(DoFormant):
            numerator = round(random.uniform(1,4), 4)
            # os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
            # print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
            
            if not file.endswith(".wav"):
                
                if not os.path.isfile(f"{file_formanted}.wav"):
                    converted = True
                    #print(f"\nfile = {file}\n")
                    #print(f"\nfile_formanted = {file_formanted}\n")
                    converting = (
                        ffmpeg.input(file_formanted, threads = 0)
                        .output(f"{file_formanted}.wav")
                        .run(
                            cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                        )
                    )
                else:
                    pass
            
            
            file_formanted = f"{file_formanted}.wav" if not file_formanted.endswith(".wav") else file_formanted
            
            
            print(f" · Formanting {file_formanted}...\n")
            
            
            os.system(
                'stftpitchshift.exe -i "%s" -q "%s" -t "%s" -o "%sFORMANTED_%s.wav"'
                % (file_formanted, Quefrency, Timbre, file_formanted, str(numerator))
            )
            
            
            print(f" · Formanted {file_formanted}!\n")
            
            
            # filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
            # file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')
            #print("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))
            
            out, _ = (
                ffmpeg.input("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)), threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )

            
        else:
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(
                    cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True
                )
            )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")
    
    try: os.remove("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))
    except Exception: pass; print("couldn't remove formanted type of file")
    
    if converted:
        try: os.remove(file_formanted)
        except Exception: pass; print("couldn't remove converted type of file")
        converted = False
    
    conn.close()
    return np.frombuffer(out, np.float32).flatten()
删除无用文件，增加--colab启动选项 2023-04-01 15:02:53 +08:00			`import ffmpeg`
			`import numpy as np`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00
			`# import praatio`
			`# import praatio.praat_scripts`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`import os`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`import random`
Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00			`import sqlite3`


Apply Code Formatter Change 2023-07-23 03:47:53 +00:00
			`# praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")`

Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`def load_audio(file, sr, DoFormant, Quefrency, Timbre):`
fixed non-formant inference 2023-07-26 23:01:10 +07:00			`converted = False`
Add files via upload 2023-03-31 17:54:38 +08:00			`try:`
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00			`conn = sqlite3.connect('TEMP/db:cachedb?mode=memory&cache=shared', check_same_thread=False)`
			`cursor = conn.cursor()`
Add files via upload 2023-03-31 17:54:38 +08:00			`# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26`
			`# This launches a subprocess to decode audio while down-mixing and resampling as necessary.`
			# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00			`file = (`
			`file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")`
			`) # 防止小白拷路径头尾带了空格和"和回车`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`file_formanted = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")`
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00			`cursor.execute("SELECT Quefrency, Timbre, DoFormant FROM formant_data")`
			`Quefrency, Timbre, DoFormant = cursor.fetchone()`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`#print(f"dofor={bool(DoFormant)} timbr={Timbre} quef={Quefrency}\n")`
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00			`if bool(DoFormant):`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`numerator = round(random.uniform(1,4), 4)`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`# os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")`
			`# print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))`
formanting fix now formanting accepts any audio, not just wavs. done with ffmpeg conversion. 2023-07-25 00:23:56 +07:00
			`if not file.endswith(".wav"):`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00
			`if not os.path.isfile(f"{file_formanted}.wav"):`
fixed non-formant inference 2023-07-26 23:01:10 +07:00			`converted = True`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`#print(f"\nfile = {file}\n")`
			`#print(f"\nfile_formanted = {file_formanted}\n")`
			`converting = (`
			`ffmpeg.input(file_formanted, threads = 0)`
			`.output(f"{file_formanted}.wav")`
			`.run(`
			`cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True`
			`)`
formanting fix now formanting accepts any audio, not just wavs. done with ffmpeg conversion. 2023-07-25 00:23:56 +07:00			`)`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`else:`
			`pass`



			`file_formanted = f"{file_formanted}.wav" if not file_formanted.endswith(".wav") else file_formanted`



			`print(f" · Formanting {file_formanted}...\n")`



Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`os.system(`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`'stftpitchshift.exe -i "%s" -q "%s" -t "%s" -o "%sFORMANTED_%s.wav"'`
			`% (file_formanted, Quefrency, Timbre, file_formanted, str(numerator))`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`)`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00


			`print(f" · Formanted {file_formanted}!\n")`



Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`# filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')`
			`# file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`#print("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))`
patch for new fixes i made before remade system of reading the formant settings on startup (still janky as hell, going to remake with sql) fixed when value is true in the txt but the checkbox isn't checked. therefore, also fixed when other parts of formanting in the ui doesn't appear. my_utils.py now utilizes the stftpitchshift from local environment instead of globally installed one. 2023-07-24 23:59:28 +07:00
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`out, _ = (`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`ffmpeg.input("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)), threads=0)`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`.run(`
			`cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True`
			`)`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`)`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`else:`
			`out, _ = (`
			`ffmpeg.input(file, threads=0)`
			`.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)`
Apply Code Formatter Change 2023-07-23 03:47:53 +00:00			`.run(`
			`cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True`
			`)`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`)`
fix: train step2a & add arg --port --pycmd --noparallel 2023-04-01 16:42:19 +08:00			`except Exception as e:`
			`raise RuntimeError(f"Failed to load audio: {e}")`
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00			`try: os.remove("%sFORMANTED_%s.wav" % (file_formanted, str(numerator)))`
fixed non-formant inference 2023-07-26 23:01:10 +07:00			`except Exception: pass; print("couldn't remove formanted type of file")`

			`if converted:`
			`try: os.remove(file_formanted)`
			`except Exception: pass; print("couldn't remove converted type of file")`
			`converted = False`
stftpshift overhaul + fixes fixed cli traceback + implemented formant shifting fixed batch conversion 2023-07-26 06:24:23 +07:00
data storage overhaul switched from storing data in txt to sqlite3. no more formant.txt and stop.txt! all of the data from sql database is stored in memory of PC. 2023-07-25 05:53:47 +07:00			`conn.close()`
Format code (#142) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-04-24 20:35:56 +08:00			`return np.frombuffer(out, np.float32).flatten()`