new commits

This commit is contained in:
liuhaozhe6788
2023-06-19 19:44:56 +08:00
parent f3d34866a8
commit 000c3ad71f
4 changed files with 14 additions and 10 deletions

View File

@@ -311,7 +311,7 @@ if __name__ == '__main__':
# Synthesizing the waveform is fairly straightforward. Remember that the longer the
# spectrogram, the more time-efficient the vocoder.
if not args.griffin_lim:
wav = vocoder.infer_waveform(spec, target=4000, overlap=400)
wav = vocoder.infer_waveform(spec, target=vocoder.hp.voc_target, overlap=vocoder.hp.voc_overlap, crossfade=vocoder.hp.is_crossfade)
else:
wav = Synthesizer.griffin_lim(spec)

View File

@@ -40,8 +40,9 @@ voc_seq_len = hop_length * 5 # must be a multiple of hop_length
# Generating / Synthesizing
voc_gen_batched = True # very fast (realtime+) single utterance batched generation
voc_target = 8000 # target number of samples to be generated in each batch entry
voc_target = 4000 # target number of samples to be generated in each batch entry
voc_overlap = 400 # number of samples for crossfading between batches
is_crossfade = True # crossfading or not
# Output Noise Reduce
prop_decrease_low_freq = 0.6 # prop decrease for low dominant frequency

View File

@@ -50,7 +50,7 @@ def is_loaded():
def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
progress_callback=None):
progress_callback=None, crossfade=True):
"""
Infers the waveform of a mel spectrogram output by the synthesizer (the format must match
that of the synthesizer!)
@@ -67,7 +67,7 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
if normalize:
mel = mel / hp.mel_max_abs_value
mel = torch.from_numpy(mel[None, ...])
wav = _model.generate(mel, batched, target, overlap, hp.mu_law, progress_callback)
wav = _model.generate(mel, batched, target, overlap, hp.mu_law, progress_callback, crossfade=crossfade)
wav = waveform_denoising(wav)
return wav

View File

@@ -150,7 +150,7 @@ class WaveRNN(nn.Module):
x = F.relu(self.fc2(x))
return self.fc3(x)
def generate(self, mels, batched, target, overlap, mu_law, progress_callback=None):
def generate(self, mels, batched, target, overlap, mu_law, progress_callback=None,crossfade=True):
mu_law = mu_law if self.mode == 'RAW' else False
progress_callback = progress_callback or self.gen_display
@@ -240,7 +240,7 @@ class WaveRNN(nn.Module):
output = output.astype(np.float64)
if batched:
output = self.xfade_and_unfold(output, target, overlap)
output = self.xfade_and_unfold(output, target, overlap, crossfade=crossfade)
else:
output = output[0]
@@ -340,7 +340,7 @@ class WaveRNN(nn.Module):
return folded
def xfade_and_unfold(self, y, target, overlap):
def xfade_and_unfold(self, y, target, overlap, crossfade=True):
''' Applies a crossfade and unfolds into a 1d array.
@@ -382,9 +382,12 @@ class WaveRNN(nn.Module):
silence = np.zeros((silence_len), dtype=np.float64)
# Equal power crossfade
t = np.linspace(-1, 1, fade_len, dtype=np.float64)
fade_in = np.sqrt(0.5 * (1 + t))
fade_out = np.sqrt(0.5 * (1 - t))
if crossfade:
t = np.linspace(-1, 1, fade_len, dtype=np.float64)
fade_in = np.sqrt(0.5 * (1 + t))
fade_out = np.sqrt(0.5 * (1 - t))
else:
fade_in = fade_out = np.ones((fade_len), dtype=np.float64)
# Concat the silence to the fades
fade_in = np.concatenate([silence, fade_in])