import os from ffmpeg import audio from pathlib import Path import numpy as np import parselmouth from synthesizer.inference import Synthesizer from synthesizer.hparams import hparams import soundfile as sf from parselmouth.praat import run_file high_lim_speed_factor = 1.0 low_lim_speed_factor = 0.6 def AudioAnalysis(dir, file): sound = os.path.join(dir, file) dir_path = os.path.dirname(os.path.realpath(__file__)) # current dir source_run = os.path.join(dir_path, "myspsolution.praat") try: objects = run_file(source_run, -20, 2, 0.27, "yes",sound, dir, 80, 400, 0.01, capture_output=True, return_variables = True) # 第四个参数为原praat脚本中的 Minimum_pause_duration(若有bug可适当调小) totDur = objects[2]['originaldur'] nPause = objects[2]['npause'] arDur = objects[2]['speakingtot'] nSyl = objects[2]['voicedcount'] arRate = objects[2]['articulationrate'] except: totDur = 0 nPause = 0 arDur = 0 nSyl = 0 arRate = 0 print("Try again the sound of the audio was not clear") return round(totDur, 2), int(nPause), round(arDur, 2), int(nSyl), round(arRate, 2) def FixSpeed(totDur_ori: float, nPause_ori: int, arDur_ori: float, nSyl_ori: int, arRate_ori: float, audio_syn): speed_factor = 0 path_syn, filename_syn = os.path.split(audio_syn) name_syn, suffix_syn = os.path.splitext(filename_syn) totDur_syn, nPause_syn, arDur_syn, nSyl_syn, arRate_syn = AudioAnalysis(path_syn, filename_syn) print(f"for original audio:\n\ttotDur = {totDur_ori}s\n\tnPause = {nPause_ori}\n\tarDur = {arDur_ori}s\n\tnSyl = {nSyl_ori}\n\tarRate = {arRate_ori} per second\n-----") print(f"for synthesized audio:\n\ttotDur = {totDur_syn}s\n\tnPause = {nPause_syn}\n\tarDur = {arDur_syn}s\n\tnSyl = {nSyl_syn}\n\tarRate = {arRate_syn} per second\n-----") if arRate_syn == 0: print("exception!\n The speed factor is abnormal") return audio_syn speed_factor = round(arRate_ori/arRate_syn, 2) print(f"speed_factor = {speed_factor}") if speed_factor > high_lim_speed_factor or\ speed_factor < low_lim_speed_factor: print("exception!\n The speed factor is abnormal") return audio_syn else: out_file = os.path.join(path_syn, name_syn + "_{}".format(speed_factor) + suffix_syn) audio.a_speed(audio_syn, speed_factor, out_file) os.remove(audio_syn) # remove intermediate wav files print(f"Finished!\nThe path of out_file is {out_file}") return out_file def TransFormat(fullpath, out_suffix): is_wav_file = False # 原始音频的后缀是否为.wav path_, name = os.path.split(fullpath) name, suffix = os.path.splitext(name) wav = Synthesizer.load_preprocess_wav(fullpath) if suffix == ".wav": # 如果原始音频的后缀为.wav,则不用进行格式转换 is_wav_file = True return is_wav_file, wav, str(fullpath) else: # 如果原始音频的后缀不是.wav,则需要进行格式转换 out_file = os.path.join(path_, name + "." + str(out_suffix)) sf.write(out_file, wav.astype(np.float32), hparams.sample_rate) return is_wav_file, wav, str(out_file) def DelFile(rootDir, matchText: str): fileList = os.listdir(rootDir) for file in fileList: if matchText in file: delFile = os.path.join(rootDir, file) os.remove(delFile) print("Deleted:", delFile) def work(totDur_ori: float, nPause_ori: int, arDur_ori: float, nSyl_ori: int, arRate_ori: float, audio_syn): fix_file = FixSpeed(totDur_ori, nPause_ori, arDur_ori, nSyl_ori, arRate_ori, audio_syn) # DelFile(in_path, '.TextGrid') out_path, _ = os.path.split(audio_syn) DelFile(out_path, '.TextGrid') return fix_file