denoise using fb denoiser

This commit is contained in:
liuhaozhe6788
2023-06-09 17:49:19 +08:00
parent ba7b119073
commit 11a4045b06
6 changed files with 20 additions and 15 deletions

View File

@@ -236,7 +236,7 @@ if __name__ == '__main__':
start_syn = time.time()
# Generating the spectrogram
# text = input("Write a sentence to be synthesized:\n")
text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak. They agreed that the one who first succeeded in making the traveler take his cloak off should be considered stronger than the other. Then the North Wind blew as hard as he could, but the more he blew the more closely did the traveler fold his cloak around him; and at last the North Wind gave up the attempt. Then the Sun shined out warmly, and immediately the traveler took off his cloak.And so the North Wind was obliged to confess that the Sun was the stronger of the two."
text = "Mechanics is an essential branch of physics that provides a framework for understanding the behavior of physical bodies under the influence of various forces. The principles of mechanics are based on the laws of motion, which form the foundation of the field. Mechanics has many practical applications in engineering and technology, from aerospace and automotive engineering to robotics and manufacturing. As science and technology continue to evolve, the principles of mechanics will remain an important part of our understanding of the physical world."
# If seed is specified, reset torch seed and force synthesizer reload
if args.seed is not None:
@@ -307,7 +307,7 @@ if __name__ == '__main__':
wav = np.concatenate([i for w, b in zip(wavs, breaks) for i in (w, b)])
# Trim excess silences to compensate for gaps in spectrograms (issue #53)
generated_wav = encoder.inference.preprocess_wav(wav)
# generated_wav = encoder.inference.preprocess_wav(wav)
wav = wav / np.abs(wav).max() * 4
# Save it on the disk

View File

@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
# 判断用户输入语音为男声或女声的分界频率
split_freq = 170
# embed去噪置零的阈值
set_zero_thres=0.06
set_zero_thres=0.04

Binary file not shown.

View File

@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
if t == 0:
first_stop_token = stop_tokens[0]
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
if (stop_tokens > first_stop_token * 2e3).all() and t > (20 * self.r): break
if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
if torch.cuda.is_available():
torch.cuda.empty_cache()

View File

@@ -1,8 +1,12 @@
from vocoder.models.fatchord_version import WaveRNN
from vocoder import hparams as hp
from scipy.fft import rfft, rfftfreq
import torch
from scipy import signal
from denoiser.pretrained import master64
import librosa
import numpy as np
import torch
import torchaudio
import noisereduce as nr
@@ -68,15 +72,15 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
return wav
def waveform_denoising(wav):
fft_max_freq = get_dominant_freq(wav)
prop_decrease = hp.prop_decrease_low_freq if hp.sex else hp.prop_decrease_high_freq
# prop_decrease = 0.6 for low freq audio
# prop_decrease = 0.9 for high freq audio
print(f"\nthe dominant frequency of output audio is {fft_max_freq}Hz")
wav = nr.reduce_noise(wav, hp.sample_rate, prop_decrease=prop_decrease)
return wav
if torch.cuda.is_available():
_device = torch.device('cuda')
else:
_device = torch.device('cpu')
model = master64().to(_device)
noisy=torch.from_numpy(np.array([wav])).to(_device).float()
estimate = model(noisy)[0].cpu().detach().numpy()
return nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease)
def get_dominant_freq(wav, name="fft"):
import matplotlib.pyplot as plt

View File

@@ -250,9 +250,10 @@ class WaveRNN(nn.Module):
output = de_emphasis(output)
# Fade-out at the end to avoid signal cutting out suddenly
fade_out = np.linspace(1, 0, 20 * self.hop_length)
fade_out_len = min(wave_len, 20 * self.hop_length)
fade_out = np.linspace(1, 0, fade_out_len)
output = output[:wave_len]
output[-20 * self.hop_length:] *= fade_out
output[-fade_out_len:] *= fade_out
self.train()