mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 13:15:06 +02:00
denoise using fb denoiser
This commit is contained in:
@@ -236,7 +236,7 @@ if __name__ == '__main__':
|
||||
start_syn = time.time()
|
||||
# Generating the spectrogram
|
||||
# text = input("Write a sentence to be synthesized:\n")
|
||||
text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak. They agreed that the one who first succeeded in making the traveler take his cloak off should be considered stronger than the other. Then the North Wind blew as hard as he could, but the more he blew the more closely did the traveler fold his cloak around him; and at last the North Wind gave up the attempt. Then the Sun shined out warmly, and immediately the traveler took off his cloak.And so the North Wind was obliged to confess that the Sun was the stronger of the two."
|
||||
text = "Mechanics is an essential branch of physics that provides a framework for understanding the behavior of physical bodies under the influence of various forces. The principles of mechanics are based on the laws of motion, which form the foundation of the field. Mechanics has many practical applications in engineering and technology, from aerospace and automotive engineering to robotics and manufacturing. As science and technology continue to evolve, the principles of mechanics will remain an important part of our understanding of the physical world."
|
||||
|
||||
# If seed is specified, reset torch seed and force synthesizer reload
|
||||
if args.seed is not None:
|
||||
@@ -307,7 +307,7 @@ if __name__ == '__main__':
|
||||
wav = np.concatenate([i for w, b in zip(wavs, breaks) for i in (w, b)])
|
||||
|
||||
# Trim excess silences to compensate for gaps in spectrograms (issue #53)
|
||||
generated_wav = encoder.inference.preprocess_wav(wav)
|
||||
# generated_wav = encoder.inference.preprocess_wav(wav)
|
||||
wav = wav / np.abs(wav).max() * 4
|
||||
|
||||
# Save it on the disk
|
||||
|
||||
@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
|
||||
# 判断用户输入语音为男声或女声的分界频率
|
||||
split_freq = 170
|
||||
# embed去噪置零的阈值
|
||||
set_zero_thres=0.06
|
||||
set_zero_thres=0.04
|
||||
|
||||
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
|
||||
if t == 0:
|
||||
first_stop_token = stop_tokens[0]
|
||||
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
|
||||
if (stop_tokens > first_stop_token * 2e3).all() and t > (20 * self.r): break
|
||||
if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
|
||||
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from vocoder.models.fatchord_version import WaveRNN
|
||||
from vocoder import hparams as hp
|
||||
from scipy.fft import rfft, rfftfreq
|
||||
import torch
|
||||
from scipy import signal
|
||||
from denoiser.pretrained import master64
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchaudio
|
||||
import noisereduce as nr
|
||||
|
||||
|
||||
@@ -68,15 +72,15 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
|
||||
return wav
|
||||
|
||||
def waveform_denoising(wav):
|
||||
fft_max_freq = get_dominant_freq(wav)
|
||||
prop_decrease = hp.prop_decrease_low_freq if hp.sex else hp.prop_decrease_high_freq
|
||||
# prop_decrease = 0.6 for low freq audio
|
||||
# prop_decrease = 0.9 for high freq audio
|
||||
print(f"\nthe dominant frequency of output audio is {fft_max_freq}Hz")
|
||||
|
||||
wav = nr.reduce_noise(wav, hp.sample_rate, prop_decrease=prop_decrease)
|
||||
|
||||
return wav
|
||||
if torch.cuda.is_available():
|
||||
_device = torch.device('cuda')
|
||||
else:
|
||||
_device = torch.device('cpu')
|
||||
model = master64().to(_device)
|
||||
noisy=torch.from_numpy(np.array([wav])).to(_device).float()
|
||||
estimate = model(noisy)[0].cpu().detach().numpy()
|
||||
return nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease)
|
||||
|
||||
def get_dominant_freq(wav, name="fft"):
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
@@ -250,9 +250,10 @@ class WaveRNN(nn.Module):
|
||||
output = de_emphasis(output)
|
||||
|
||||
# Fade-out at the end to avoid signal cutting out suddenly
|
||||
fade_out = np.linspace(1, 0, 20 * self.hop_length)
|
||||
fade_out_len = min(wave_len, 20 * self.hop_length)
|
||||
fade_out = np.linspace(1, 0, fade_out_len)
|
||||
output = output[:wave_len]
|
||||
output[-20 * self.hop_length:] *= fade_out
|
||||
output[-fade_out_len:] *= fade_out
|
||||
|
||||
self.train()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user