Merge branch 'develop'

2026-05-18 05:04:51 +02:00 · 2023-06-09 17:52:43 +08:00
parent 00be83f97e 11a4045b06
commit 6a6d1a8df9
5 changed files with 18 additions and 13 deletions
--- a/encoder/params_data.py
+++ b/encoder/params_data.py
@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
 # 判断用户输入语音为男声或女声的分界频率
 split_freq = 170  
 # embed去噪置零的阈值
-set_zero_thres=0.06
+set_zero_thres=0.04

--- a/requirements.txt
+++ b/requirements.txt
--- a/synthesizer/models/tacotron.py
+++ b/synthesizer/models/tacotron.py
@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
            if t == 0:
                first_stop_token = stop_tokens[0]      
            # Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
-            if (stop_tokens > first_stop_token * 2e3).all() and t > (20 * self.r): break
+            if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
            # if (stop_tokens > 0.5).all() and t > (20 * self.r): break
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
--- a/vocoder/inference.py
+++ b/vocoder/inference.py
@@ -1,8 +1,12 @@
 from vocoder.models.fatchord_version import WaveRNN
 from vocoder import hparams as hp
 from scipy.fft import rfft, rfftfreq
-import torch
+from scipy import signal
+from denoiser.pretrained import master64
+import librosa
 import numpy as np
+import torch
+import torchaudio
 import noisereduce as nr    


@@ -68,15 +72,15 @@ def infer_waveform(mel, normalize=True,  batched=True, target=8000, overlap=800,
    return wav

 def waveform_denoising(wav):
-    fft_max_freq = get_dominant_freq(wav)
    prop_decrease = hp.prop_decrease_low_freq if hp.sex else hp.prop_decrease_high_freq
-    # prop_decrease = 0.6 for low freq audio
-    # prop_decrease = 0.9 for high freq audio
-    print(f"\nthe dominant frequency of output audio is {fft_max_freq}Hz")
-
-    wav = nr.reduce_noise(wav, hp.sample_rate, prop_decrease=prop_decrease)
-
-    return wav
+    if torch.cuda.is_available():
+        _device = torch.device('cuda')
+    else:
+        _device = torch.device('cpu')
+    model = master64().to(_device)
+    noisy=torch.from_numpy(np.array([wav])).to(_device).float()
+    estimate = model(noisy)[0].cpu().detach().numpy()
+    return  nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease) 

 def get_dominant_freq(wav, name="fft"):
    import matplotlib.pyplot as plt
--- a/vocoder/models/fatchord_version.py
+++ b/vocoder/models/fatchord_version.py
@@ -250,9 +250,10 @@ class WaveRNN(nn.Module):
            output = de_emphasis(output)

        # Fade-out at the end to avoid signal cutting out suddenly
-        fade_out = np.linspace(1, 0, 20 * self.hop_length)
+        fade_out_len = min(wave_len, 20 * self.hop_length)
+        fade_out = np.linspace(1, 0, fade_out_len)
        output = output[:wave_len]
-        output[-20 * self.hop_length:] *= fade_out
+        output[-fade_out_len:] *= fade_out
        
        self.train()