new commits

2026-05-18 05:04:51 +02:00 · 2023-06-13 14:53:03 +08:00
parent 7a9ce1d39e
commit a77ef24e44
6 changed files with 10 additions and 6 deletions
--- a/synthesizer/hparams.py
+++ b/synthesizer/hparams.py
@@ -43,8 +43,8 @@ hparams = HParams(
        tts_num_highways = 4,
        tts_dropout = 0.5,
        tts_cleaner_names = ["english_cleaners"],
-        tts_start_threshold = -2.0, 
-        tts_stop_threshold = -1.8,                  # Value below which audio generation ends.
+        tts_start_threshold = -1.2, 
+        tts_stop_threshold = -1.2,                     # Value below which audio generation ends.
                                                    # For example, for a range of [-4, 4], this
                                                    # will terminate the sequence at the first
                                                    # frame that has all values < -3.4
--- a/synthesizer/models/tacotron.py
+++ b/synthesizer/models/tacotron.py
@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
            if t == 0:
                first_stop_token = stop_tokens[0]      
            # Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
-            if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
+            if (stop_tokens > first_stop_token * 4e3).all() and t > (20 * self.r): break
            # if (stop_tokens > 0.5).all() and t > (20 * self.r): break
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
--- a/synthesizer/utils/cleaners.py
+++ b/synthesizer/utils/cleaners.py
@@ -193,6 +193,8 @@ def add_breaks(text):
    text = text.replace(',', '. ')
    text = text.replace(';', '. ')
    text = text.replace(':', '. ')
+    text = text.replace('!', '. ')
+    text = text.replace('?', '. ')
    return text


--- a/vocoder/hparams.py
+++ b/vocoder/hparams.py
@@ -46,5 +46,5 @@ voc_overlap = 400                   # number of samples for crossfading between
 # Output Noise Reduce
 prop_decrease_low_freq = 0.6        # prop decrease for low dominant frequency
 prop_decrease_high_freq = 0.9        # prop decrease for high dominant frequency
-
+dry=1                              # dry ratio for facebook denoiser
 sex = -1                        
--- a/vocoder/inference.py
+++ b/vocoder/inference.py
@@ -79,7 +79,9 @@ def waveform_denoising(wav):
        _device = torch.device('cpu')
    model = master64().to(_device)
    noisy=torch.from_numpy(np.array([wav])).to(_device).float()
-    estimate = model(noisy)[0].cpu().detach().numpy()
+    estimate = model(noisy)
+    estimate = estimate * (1-hp.dry) + noisy * hp.dry
+    estimate = estimate[0].cpu().detach().numpy()
    return  nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease) 

 def get_dominant_freq(wav, name="fft"):
--- a/vocoder/models/fatchord_version.py
+++ b/vocoder/models/fatchord_version.py
@@ -251,7 +251,7 @@ class WaveRNN(nn.Module):

        # Fade-out at the end to avoid signal cutting out suddenly
        fade_out_len = min(wave_len, 20 * self.hop_length)
-        fade_out = np.linspace(1, 0, fade_out_len)
+        fade_out = np.linspace(1, 0.5, fade_out_len)
        output = output[:wave_len]
        output[-fade_out_len:] *= fade_out