mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 05:04:51 +02:00
new commits
This commit is contained in:
@@ -43,8 +43,8 @@ hparams = HParams(
|
||||
tts_num_highways = 4,
|
||||
tts_dropout = 0.5,
|
||||
tts_cleaner_names = ["english_cleaners"],
|
||||
tts_start_threshold = -2.0,
|
||||
tts_stop_threshold = -1.8, # Value below which audio generation ends.
|
||||
tts_start_threshold = -1.2,
|
||||
tts_stop_threshold = -1.2, # Value below which audio generation ends.
|
||||
# For example, for a range of [-4, 4], this
|
||||
# will terminate the sequence at the first
|
||||
# frame that has all values < -3.4
|
||||
|
||||
@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
|
||||
if t == 0:
|
||||
first_stop_token = stop_tokens[0]
|
||||
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
|
||||
if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
|
||||
if (stop_tokens > first_stop_token * 4e3).all() and t > (20 * self.r): break
|
||||
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -193,6 +193,8 @@ def add_breaks(text):
|
||||
text = text.replace(',', '. ')
|
||||
text = text.replace(';', '. ')
|
||||
text = text.replace(':', '. ')
|
||||
text = text.replace('!', '. ')
|
||||
text = text.replace('?', '. ')
|
||||
return text
|
||||
|
||||
|
||||
|
||||
@@ -46,5 +46,5 @@ voc_overlap = 400 # number of samples for crossfading between
|
||||
# Output Noise Reduce
|
||||
prop_decrease_low_freq = 0.6 # prop decrease for low dominant frequency
|
||||
prop_decrease_high_freq = 0.9 # prop decrease for high dominant frequency
|
||||
|
||||
dry=1 # dry ratio for facebook denoiser
|
||||
sex = -1
|
||||
@@ -79,7 +79,9 @@ def waveform_denoising(wav):
|
||||
_device = torch.device('cpu')
|
||||
model = master64().to(_device)
|
||||
noisy=torch.from_numpy(np.array([wav])).to(_device).float()
|
||||
estimate = model(noisy)[0].cpu().detach().numpy()
|
||||
estimate = model(noisy)
|
||||
estimate = estimate * (1-hp.dry) + noisy * hp.dry
|
||||
estimate = estimate[0].cpu().detach().numpy()
|
||||
return nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease)
|
||||
|
||||
def get_dominant_freq(wav, name="fft"):
|
||||
|
||||
@@ -251,7 +251,7 @@ class WaveRNN(nn.Module):
|
||||
|
||||
# Fade-out at the end to avoid signal cutting out suddenly
|
||||
fade_out_len = min(wave_len, 20 * self.hop_length)
|
||||
fade_out = np.linspace(1, 0, fade_out_len)
|
||||
fade_out = np.linspace(1, 0.5, fade_out_len)
|
||||
output = output[:wave_len]
|
||||
output[-fade_out_len:] *= fade_out
|
||||
|
||||
|
||||
Reference in New Issue
Block a user