my_utils.py

import ffmpeg
import numpy as np
#import praatio
#import praatio.praat_scripts
import os
#from os.path import join

#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")

def load_audio(file, sr, DoFormant, Quefrency, Timbre):
    try:
        # https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
        # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
        # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
        file = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )  # 防止小白拷路径头尾带了空格和"和回车
        file_formanted = (
            file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
        )
        with open('formanting.txt', 'r') as fvf:
            content = fvf.readlines()
            if 'True' in content[0].split('\n')[0]:
                #print("true")
                DoFormant = True
                Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]
                
            else:
                #print("not true")
                DoFormant = False
            
        if DoFormant:
            #os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")
            #print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))
            print("formanting...")
            
            os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))
            print("formanted!")
            #filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')
            #file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')

            out, _ = (
                ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
            )
            
            os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))
        else:
            
            out, _ = (
                ffmpeg.input(file, threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
            )
    except Exception as e:
        raise RuntimeError(f"Failed to load audio: {e}")

    return np.frombuffer(out, np.float32).flatten()
删除无用文件，增加--colab启动选项 2023-04-01 15:02:53 +08:00			`import ffmpeg`
			`import numpy as np`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`#import praatio`
			`#import praatio.praat_scripts`
			`import os`
			`#from os.path import join`
Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`#praatEXE = join('.',os.path.abspath(os.getcwd()) + r"\Praat.exe")`
Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`def load_audio(file, sr, DoFormant, Quefrency, Timbre):`
Add files via upload 2023-03-31 17:54:38 +08:00			`try:`
			`# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26`
			`# This launches a subprocess to decode audio while down-mixing and resampling as necessary.`
			# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
Reformat and rewrite _get_name_params (#57) * Reformat * rewrite _get_name_params * Add workflow for automatic formatting * Revert "Add workflow for automatic formatting" This reverts commit 9111c5dbc1830248305fb075587a88be07ad3115. * revert Retrieval_based_Voice_Conversion_WebUI.ipynb --------- Co-authored-by: 源文雨 <41315874+fumiama@users.noreply.github.com> 2023-04-15 20:44:24 +09:00			`file = (`
			`file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")`
			`) # 防止小白拷路径头尾带了空格和"和回车`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`file_formanted = (`
			`file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")`
Add files via upload 2023-03-31 17:54:38 +08:00			`)`
Tweaked files upload and replace 2023-07-21 20:36:54 +07:00			`with open('formanting.txt', 'r') as fvf:`
			`content = fvf.readlines()`
			`if 'True' in content[0].split('\n')[0]:`
			`#print("true")`
			`DoFormant = True`
			`Quefrency, Timbre = content[1].split('\n')[0], content[2].split('\n')[0]`

			`else:`
			`#print("not true")`
			`DoFormant = False`

			`if DoFormant:`
			`#os.system(f"stftpitchshift -i {file} -q {Quefrency} -t {Timbre} -o {file_formanted}")`
			`#print('stftpitchshift -i "%s" -p 1.0 --rms -w 128 -v 8 -q %s -t %s -o "%s"' % (file, Quefrency, Timbre, file_formanted))`
			`print("formanting...")`

			`os.system('stftpitchshift -i "%s" -q %s -t %s -o "%sFORMANTED"' % (file, Quefrency, Timbre, file_formanted))`
			`print("formanted!")`
			`#filepraat = (os.path.abspath(os.getcwd()) + '\\' + file).replace('/','\\')`
			`#file_formantedpraat = ('"' + os.path.abspath(os.getcwd()) + '/' + 'formanted'.join(file_formanted) + '"').replace('/','\\')`

			`out, _ = (`
			`ffmpeg.input('%sFORMANTED%s' % (file_formanted, '.wav'), threads=0)`
			`.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)`
			`.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)`
			`)`

			`os.remove('%sFORMANTED%s' % (file_formanted, '.wav'))`
			`else:`

			`out, _ = (`
			`ffmpeg.input(file, threads=0)`
			`.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)`
			`.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)`
			`)`
fix: train step2a & add arg --port --pycmd --noparallel 2023-04-01 16:42:19 +08:00			`except Exception as e:`
			`raise RuntimeError(f"Failed to load audio: {e}")`
Add files via upload 2023-03-31 17:54:38 +08:00
Format code (#142) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> 2023-04-24 20:35:56 +08:00			`return np.frombuffer(out, np.float32).flatten()`