add speed changer to the toolbox

This commit is contained in:
liuhaozhe6788
2023-06-26 10:00:37 +08:00
parent b2df6fef8e
commit 9ef191d6df
2 changed files with 28 additions and 5 deletions

View File

@@ -60,7 +60,7 @@ def FixSpeed(totDur_ori: float,
audio.a_speed(audio_syn, speed_factor, out_file)
os.remove(audio_syn) # remove intermediate wav files
print(f"Finished!\nThe path of out_file is {out_file}")
return out_file
return out_file, speed_factor
def TransFormat(fullpath, out_suffix):
@@ -92,7 +92,7 @@ def work(totDur_ori: float,
nSyl_ori: int,
arRate_ori: float,
audio_syn):
fix_file = FixSpeed(totDur_ori,
fix_file, speed_factor = FixSpeed(totDur_ori,
nPause_ori,
arDur_ori,
nSyl_ori,
@@ -101,6 +101,5 @@ def work(totDur_ori: float,
# DelFile(in_path, '.TextGrid')
out_path, _ = os.path.split(audio_syn)
DelFile(out_path, '.TextGrid')
return fix_file
return fix_file, speed_factor

View File

@@ -7,6 +7,7 @@ import re
import numpy as np
import torch
import soundfile as sf
import librosa
import spacy
import encoder
@@ -17,6 +18,7 @@ from synthesizer.hparams import syn_hparams
from toolbox.ui import UI
from toolbox.utterance import Utterance
from vocoder import inference as vocoder
from speed_changer.fixSpeed import *
import time
@@ -183,6 +185,16 @@ class Toolbox:
spec = Synthesizer_infer.make_spectrogram(wav)
self.ui.draw_spec(spec, "current")
path_ori = os.getcwd()
file_ori = 'temp.wav'
fpath = os.path.join(path_ori, file_ori)
sf.write(fpath, wav, samplerate=encoder.params_data.sampling_rate)
# adjust the speed
self.wav_ori_info = AudioAnalysis(path_ori, file_ori)
DelFile(path_ori, '.TextGrid')
os.remove(fpath)
# Compute the embedding
if not encoder_infer.is_loaded():
self.init_encoder()
@@ -288,6 +300,18 @@ class Toolbox:
if self.ui.trim_silences_checkbox.isChecked():
wav = encoder_infer.preprocess_wav(wav)
path_ori = os.getcwd()
file_ori = 'temp.wav'
filename = os.path.join(path_ori, file_ori)
sf.write(filename, wav.astype(np.float32), syn_hparams.sample_rate)
self.ui.log("\nSaved output (haven't change speed) as %s\n\n" % filename)
# Fix Speed(generate new audio)
fix_file, speed_factor = work(*self.wav_ori_info, filename)
self.ui.log(f"\nSaved output (fixed speed) as {fix_file}\n\n")
wav, _ = librosa.load(fix_file, syn_hparams.sample_rate)
os.remove(fix_file)
# Play it
wav = wav / np.abs(wav).max() * 4
self.ui.play(wav, Synthesizer_infer.sample_rate)
@@ -324,7 +348,7 @@ class Toolbox:
embed, partial_embeds, _ = encoder_infer.embed_utterance(encoder_wav, return_partials=True)
# Add the utterance
name = speaker_name + "_gen_%05d" % np.random.randint(100000)
name = speaker_name + "_gen_%05d_" % np.random.randint(100000) + str(speed_factor)
utterance = Utterance(name, speaker_name, wav, spec, embed, partial_embeds, True)
self.utterances.add(utterance)