Files
voice-cloning-collab/fixSpeed.py

107 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from ffmpeg import audio
from pathlib import Path
import numpy as np
import parselmouth
from synthesizer.inference import Synthesizer
from synthesizer.hparams import hparams
import soundfile as sf
from parselmouth.praat import run_file
high_lim_speed_factor = 1.0
low_lim_speed_factor = 0.6
def AudioAnalysis(dir, file):
sound = os.path.join(dir, file)
dir_path = os.path.dirname(os.path.realpath(__file__)) # current dir
source_run = os.path.join(dir_path, "myspsolution.praat")
try:
objects = run_file(source_run, -20, 2, 0.27, "yes",sound, dir, 80, 400, 0.01, capture_output=True, return_variables = True)
# 第四个参数为原praat脚本中的 Minimum_pause_duration若有bug可适当调小
totDur = objects[2]['originaldur']
nPause = objects[2]['npause']
arDur = objects[2]['speakingtot']
nSyl = objects[2]['voicedcount']
arRate = objects[2]['articulationrate']
except:
totDur = 0
nPause = 0
arDur = 0
nSyl = 0
arRate = 0
print("Try again the sound of the audio was not clear")
return round(totDur, 2), int(nPause), round(arDur, 2), int(nSyl), round(arRate, 2)
def FixSpeed(totDur_ori: float,
nPause_ori: int,
arDur_ori: float,
nSyl_ori: int,
arRate_ori: float,
audio_syn):
speed_factor = 0
path_syn, filename_syn = os.path.split(audio_syn)
name_syn, suffix_syn = os.path.splitext(filename_syn)
totDur_syn, nPause_syn, arDur_syn, nSyl_syn, arRate_syn = AudioAnalysis(path_syn, filename_syn)
print(f"for original audio:\n\ttotDur = {totDur_ori}s\n\tnPause = {nPause_ori}\n\tarDur = {arDur_ori}s\n\tnSyl = {nSyl_ori}\n\tarRate = {arRate_ori} per second\n-----")
print(f"for synthesized audio:\n\ttotDur = {totDur_syn}s\n\tnPause = {nPause_syn}\n\tarDur = {arDur_syn}s\n\tnSyl = {nSyl_syn}\n\tarRate = {arRate_syn} per second\n-----")
if arRate_syn == 0:
print("exception!\n The speed factor is abnormal")
return audio_syn
speed_factor = round(arRate_ori/arRate_syn, 2)
print(f"speed_factor = {speed_factor}")
if speed_factor > high_lim_speed_factor or\
speed_factor < low_lim_speed_factor:
print("exception!\n The speed factor is abnormal")
return audio_syn
else:
out_file = os.path.join(path_syn, name_syn + "_{}".format(speed_factor) + suffix_syn)
audio.a_speed(audio_syn, speed_factor, out_file)
os.remove(audio_syn) # remove intermediate wav files
print(f"Finished!\nThe path of out_file is {out_file}")
return out_file
def TransFormat(fullpath, out_suffix):
is_wav_file = False # 原始音频的后缀是否为.wav
path_, name = os.path.split(fullpath)
name, suffix = os.path.splitext(name)
wav = Synthesizer.load_preprocess_wav(fullpath)
if suffix == ".wav": # 如果原始音频的后缀为.wav则不用进行格式转换
is_wav_file = True
return is_wav_file, wav, str(fullpath)
else: # 如果原始音频的后缀不是.wav则需要进行格式转换
out_file = os.path.join(path_, name + "." + str(out_suffix))
sf.write(out_file, wav.astype(np.float32), hparams.sample_rate)
return is_wav_file, wav, str(out_file)
def DelFile(rootDir, matchText: str):
fileList = os.listdir(rootDir)
for file in fileList:
if matchText in file:
delFile = os.path.join(rootDir, file)
os.remove(delFile)
print("Deleted", delFile)
def work(totDur_ori: float,
nPause_ori: int,
arDur_ori: float,
nSyl_ori: int,
arRate_ori: float,
audio_syn):
fix_file = FixSpeed(totDur_ori,
nPause_ori,
arDur_ori,
nSyl_ori,
arRate_ori,
audio_syn)
# DelFile(in_path, '.TextGrid')
out_path, _ = os.path.split(audio_syn)
DelFile(out_path, '.TextGrid')
return fix_file