mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 05:04:51 +02:00
Merge branch 'develop'
This commit is contained in:
@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
|
||||
# 判断用户输入语音为男声或女声的分界频率
|
||||
split_freq = 170
|
||||
# embed去噪置零的阈值
|
||||
set_zero_thres=0.06
|
||||
set_zero_thres=0.04
|
||||
|
||||
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
@@ -458,7 +458,7 @@ class Tacotron(nn.Module):
|
||||
if t == 0:
|
||||
first_stop_token = stop_tokens[0]
|
||||
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
|
||||
if (stop_tokens > first_stop_token * 2e3).all() and t > (20 * self.r): break
|
||||
if (stop_tokens > first_stop_token * 1e4).all() and t > (20 * self.r): break
|
||||
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
from vocoder.models.fatchord_version import WaveRNN
|
||||
from vocoder import hparams as hp
|
||||
from scipy.fft import rfft, rfftfreq
|
||||
import torch
|
||||
from scipy import signal
|
||||
from denoiser.pretrained import master64
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchaudio
|
||||
import noisereduce as nr
|
||||
|
||||
|
||||
@@ -68,15 +72,15 @@ def infer_waveform(mel, normalize=True, batched=True, target=8000, overlap=800,
|
||||
return wav
|
||||
|
||||
def waveform_denoising(wav):
|
||||
fft_max_freq = get_dominant_freq(wav)
|
||||
prop_decrease = hp.prop_decrease_low_freq if hp.sex else hp.prop_decrease_high_freq
|
||||
# prop_decrease = 0.6 for low freq audio
|
||||
# prop_decrease = 0.9 for high freq audio
|
||||
print(f"\nthe dominant frequency of output audio is {fft_max_freq}Hz")
|
||||
|
||||
wav = nr.reduce_noise(wav, hp.sample_rate, prop_decrease=prop_decrease)
|
||||
|
||||
return wav
|
||||
if torch.cuda.is_available():
|
||||
_device = torch.device('cuda')
|
||||
else:
|
||||
_device = torch.device('cpu')
|
||||
model = master64().to(_device)
|
||||
noisy=torch.from_numpy(np.array([wav])).to(_device).float()
|
||||
estimate = model(noisy)[0].cpu().detach().numpy()
|
||||
return nr.reduce_noise(np.squeeze(estimate), hp.sample_rate, prop_decrease=prop_decrease)
|
||||
|
||||
def get_dominant_freq(wav, name="fft"):
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
@@ -250,9 +250,10 @@ class WaveRNN(nn.Module):
|
||||
output = de_emphasis(output)
|
||||
|
||||
# Fade-out at the end to avoid signal cutting out suddenly
|
||||
fade_out = np.linspace(1, 0, 20 * self.hop_length)
|
||||
fade_out_len = min(wave_len, 20 * self.hop_length)
|
||||
fade_out = np.linspace(1, 0, fade_out_len)
|
||||
output = output[:wave_len]
|
||||
output[-20 * self.hop_length:] *= fade_out
|
||||
output[-fade_out_len:] *= fade_out
|
||||
|
||||
self.train()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user