Merge branch 'develop'

This commit is contained in:
liuhaozhe6788
2023-06-09 17:52:43 +08:00
5 changed files with 18 additions and 13 deletions

View File

@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
# 判断用户输入语音为男声或女声的分界频率
split_freq = 170
# embed去噪置零的阈值
set_zero_thres=0.06
set_zero_thres=0.04

Binary file not shown.

View File

@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
if t == 0:
first_stop_token = stop_tokens[0]
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
if (stop_tokens > first_stop_token * 2e3).all() and t > (20 * self.r): break
if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
if torch.cuda.is_available():
torch.cuda.empty_cache()

View File

@@ -1,8 +1,12 @@
from vocoder.models.fatchord_version import WaveRNN
from vocoder import hparams as hp
from scipy.fft import rfft, rfftfreq
import torch
from scipy import signal
from denoiser.pretrained import master64
import librosa
import numpy as np
import torch
import torchaudio
import noisereduce as nr
@@ -68,15 +72,15 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
return wav
def waveform_denoising(wav):
fft_max_freq = get_dominant_freq(wav)
prop_decrease = hp.prop_decrease_low_freq if hp.sex else hp.prop_decrease_high_freq
# prop_decrease = 0.6 for low freq audio
# prop_decrease = 0.9 for high freq audio
print(f"\nthe dominant frequency of output audio is {fft_max_freq}Hz")
wav = nr.reduce_noise(wav, hp.sample_rate, prop_decrease=prop_decrease)
return wav
if torch.cuda.is_available():
_device = torch.device('cuda')
else:
_device = torch.device('cpu')
model = master64().to(_device)
noisy=torch.from_numpy(np.array([wav])).to(_device).float()
estimate = model(noisy)[0].cpu().detach().numpy()
return nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease)
def get_dominant_freq(wav, name="fft"):
import matplotlib.pyplot as plt

View File

@@ -250,9 +250,10 @@ class WaveRNN(nn.Module):
output = de_emphasis(output)
# Fade-out at the end to avoid signal cutting out suddenly
fade_out = np.linspace(1, 0, 20 * self.hop_length)
fade_out_len = min(wave_len, 20 * self.hop_length)
fade_out = np.linspace(1, 0, fade_out_len)
output = output[:wave_len]
output[-20 * self.hop_length:] *= fade_out
output[-fade_out_len:] *= fade_out
self.train()