mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2025-12-23 23:20:15 +01:00
new commits
This commit is contained in:
@@ -311,7 +311,7 @@ if __name__ == '__main__':
|
||||
# Synthesizing the waveform is fairly straightforward. Remember that the longer the
|
||||
# spectrogram, the more time-efficient the vocoder.
|
||||
if not args.griffin_lim:
|
||||
wav = vocoder.infer_waveform(spec, target=4000, overlap=400)
|
||||
wav = vocoder.infer_waveform(spec, target=vocoder.hp.voc_target, overlap=vocoder.hp.voc_overlap, crossfade=vocoder.hp.is_crossfade)
|
||||
else:
|
||||
wav = Synthesizer.griffin_lim(spec)
|
||||
|
||||
|
||||
@@ -40,8 +40,9 @@ voc_seq_len = hop_length * 5 # must be a multiple of hop_length
|
||||
|
||||
# Generating / Synthesizing
|
||||
voc_gen_batched = True # very fast (realtime+) single utterance batched generation
|
||||
voc_target = 8000 # target number of samples to be generated in each batch entry
|
||||
voc_target = 4000 # target number of samples to be generated in each batch entry
|
||||
voc_overlap = 400 # number of samples for crossfading between batches
|
||||
is_crossfade = True # crossfading or not
|
||||
|
||||
# Output Noise Reduce
|
||||
prop_decrease_low_freq = 0.6 # prop decrease for low dominant frequency
|
||||
|
||||
@@ -50,7 +50,7 @@ def is_loaded():
|
||||
|
||||
|
||||
def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
|
||||
progress_callback=None):
|
||||
progress_callback=None, crossfade=True):
|
||||
"""
|
||||
Infers the waveform of a mel spectrogram output by the synthesizer (the format must match
|
||||
that of the synthesizer!)
|
||||
@@ -67,7 +67,7 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
|
||||
if normalize:
|
||||
mel = mel / hp.mel_max_abs_value
|
||||
mel = torch.from_numpy(mel[None, ...])
|
||||
wav = _model.generate(mel, batched, target, overlap, hp.mu_law, progress_callback)
|
||||
wav = _model.generate(mel, batched, target, overlap, hp.mu_law, progress_callback, crossfade=crossfade)
|
||||
wav = waveform_denoising(wav)
|
||||
return wav
|
||||
|
||||
|
||||
@@ -150,7 +150,7 @@ class WaveRNN(nn.Module):
|
||||
x = F.relu(self.fc2(x))
|
||||
return self.fc3(x)
|
||||
|
||||
def generate(self, mels, batched, target, overlap, mu_law, progress_callback=None):
|
||||
def generate(self, mels, batched, target, overlap, mu_law, progress_callback=None,crossfade=True):
|
||||
mu_law = mu_law if self.mode == 'RAW' else False
|
||||
progress_callback = progress_callback or self.gen_display
|
||||
|
||||
@@ -240,7 +240,7 @@ class WaveRNN(nn.Module):
|
||||
output = output.astype(np.float64)
|
||||
|
||||
if batched:
|
||||
output = self.xfade_and_unfold(output, target, overlap)
|
||||
output = self.xfade_and_unfold(output, target, overlap, crossfade=crossfade)
|
||||
else:
|
||||
output = output[0]
|
||||
|
||||
@@ -340,7 +340,7 @@ class WaveRNN(nn.Module):
|
||||
|
||||
return folded
|
||||
|
||||
def xfade_and_unfold(self, y, target, overlap):
|
||||
def xfade_and_unfold(self, y, target, overlap, crossfade=True):
|
||||
|
||||
''' Applies a crossfade and unfolds into a 1d array.
|
||||
|
||||
@@ -382,9 +382,12 @@ class WaveRNN(nn.Module):
|
||||
silence = np.zeros((silence_len), dtype=np.float64)
|
||||
|
||||
# Equal power crossfade
|
||||
t = np.linspace(-1, 1, fade_len, dtype=np.float64)
|
||||
fade_in = np.sqrt(0.5 * (1 + t))
|
||||
fade_out = np.sqrt(0.5 * (1 - t))
|
||||
if crossfade:
|
||||
t = np.linspace(-1, 1, fade_len, dtype=np.float64)
|
||||
fade_in = np.sqrt(0.5 * (1 + t))
|
||||
fade_out = np.sqrt(0.5 * (1 - t))
|
||||
else:
|
||||
fade_in = fade_out = np.ones((fade_len), dtype=np.float64)
|
||||
|
||||
# Concat the silence to the fades
|
||||
fade_in = np.concatenate([silence, fade_in])
|
||||
|
||||
Reference in New Issue
Block a user