mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 05:04:51 +02:00
add speed changer to the toolbox
This commit is contained in:
@@ -60,7 +60,7 @@ def FixSpeed(totDur_ori: float,
|
||||
audio.a_speed(audio_syn, speed_factor, out_file)
|
||||
os.remove(audio_syn) # remove intermediate wav files
|
||||
print(f"Finished!\nThe path of out_file is {out_file}")
|
||||
return out_file
|
||||
return out_file, speed_factor
|
||||
|
||||
|
||||
def TransFormat(fullpath, out_suffix):
|
||||
@@ -92,7 +92,7 @@ def work(totDur_ori: float,
|
||||
nSyl_ori: int,
|
||||
arRate_ori: float,
|
||||
audio_syn):
|
||||
fix_file = FixSpeed(totDur_ori,
|
||||
fix_file, speed_factor = FixSpeed(totDur_ori,
|
||||
nPause_ori,
|
||||
arDur_ori,
|
||||
nSyl_ori,
|
||||
@@ -101,6 +101,5 @@ def work(totDur_ori: float,
|
||||
# DelFile(in_path, '.TextGrid')
|
||||
out_path, _ = os.path.split(audio_syn)
|
||||
DelFile(out_path, '.TextGrid')
|
||||
return fix_file
|
||||
|
||||
return fix_file, speed_factor
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import re
|
||||
import numpy as np
|
||||
import torch
|
||||
import soundfile as sf
|
||||
import librosa
|
||||
import spacy
|
||||
|
||||
import encoder
|
||||
@@ -17,6 +18,7 @@ from synthesizer.hparams import syn_hparams
|
||||
from toolbox.ui import UI
|
||||
from toolbox.utterance import Utterance
|
||||
from vocoder import inference as vocoder
|
||||
from speed_changer.fixSpeed import *
|
||||
import time
|
||||
|
||||
|
||||
@@ -183,6 +185,16 @@ class Toolbox:
|
||||
spec = Synthesizer_infer.make_spectrogram(wav)
|
||||
self.ui.draw_spec(spec, "current")
|
||||
|
||||
path_ori = os.getcwd()
|
||||
file_ori = 'temp.wav'
|
||||
fpath = os.path.join(path_ori, file_ori)
|
||||
sf.write(fpath, wav, samplerate=encoder.params_data.sampling_rate)
|
||||
|
||||
# adjust the speed
|
||||
self.wav_ori_info = AudioAnalysis(path_ori, file_ori)
|
||||
DelFile(path_ori, '.TextGrid')
|
||||
os.remove(fpath)
|
||||
|
||||
# Compute the embedding
|
||||
if not encoder_infer.is_loaded():
|
||||
self.init_encoder()
|
||||
@@ -288,6 +300,18 @@ class Toolbox:
|
||||
if self.ui.trim_silences_checkbox.isChecked():
|
||||
wav = encoder_infer.preprocess_wav(wav)
|
||||
|
||||
path_ori = os.getcwd()
|
||||
file_ori = 'temp.wav'
|
||||
filename = os.path.join(path_ori, file_ori)
|
||||
sf.write(filename, wav.astype(np.float32), syn_hparams.sample_rate)
|
||||
self.ui.log("\nSaved output (haven't change speed) as %s\n\n" % filename)
|
||||
|
||||
# Fix Speed(generate new audio)
|
||||
fix_file, speed_factor = work(*self.wav_ori_info, filename)
|
||||
self.ui.log(f"\nSaved output (fixed speed) as {fix_file}\n\n")
|
||||
wav, _ = librosa.load(fix_file, syn_hparams.sample_rate)
|
||||
os.remove(fix_file)
|
||||
|
||||
# Play it
|
||||
wav = wav / np.abs(wav).max() * 4
|
||||
self.ui.play(wav, Synthesizer_infer.sample_rate)
|
||||
@@ -324,7 +348,7 @@ class Toolbox:
|
||||
embed, partial_embeds, _ = encoder_infer.embed_utterance(encoder_wav, return_partials=True)
|
||||
|
||||
# Add the utterance
|
||||
name = speaker_name + "_gen_%05d" % np.random.randint(100000)
|
||||
name = speaker_name + "_gen_%05d_" % np.random.randint(100000) + str(speed_factor)
|
||||
utterance = Utterance(name, speaker_name, wav, spec, embed, partial_embeds, True)
|
||||
self.utterances.add(utterance)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user