mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 05:04:51 +02:00
new commits
This commit is contained in:
@@ -71,7 +71,7 @@ python synthesizer_train.py <model_id> <datasets_root>/SV2TTS/synthesizer --use_
|
||||
```
|
||||
if you want to monitor the training progress, run
|
||||
```
|
||||
tensorboard --logdir log/synthesizer --host localhost --port 8088
|
||||
tensorboard --logdir log/vc/synthesizer --host localhost --port 8088
|
||||
```
|
||||
### Vocoder
|
||||
|
||||
@@ -90,7 +90,7 @@ python vocoder_train.py <model_id> <datasets_root> --use_tb
|
||||
```
|
||||
if you want to monitor the training progress, run
|
||||
```
|
||||
tensorboard --logdir log/vocoder --host localhost --port 8080
|
||||
tensorboard --logdir log/vc/vocoder --host localhost --port 8080
|
||||
```
|
||||
**Note:**
|
||||
|
||||
|
||||
31
demo_cli.py
31
demo_cli.py
@@ -11,7 +11,7 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument("--run_id", type=str, default="20230609", help= \
|
||||
parser.add_argument("--run_id", type=str, default="default", help= \
|
||||
"Name for this model. By default, training outputs will be stored to saved_models/<run_id>/. If a model state "
|
||||
"from the same run ID was previously saved, the training will restart from there. Pass -f to overwrite saved "
|
||||
"states and restart from scratch.")
|
||||
@@ -138,10 +138,7 @@ if __name__ == '__main__':
|
||||
while True:
|
||||
# try:
|
||||
# Get the reference audio filepath
|
||||
# enter the number of reference audios
|
||||
message1 = "Please enter the number of reference audios:\n"
|
||||
num_of_input_audio = int(input(message1))
|
||||
# num_of_input_audio = 1
|
||||
num_of_input_audio = 1
|
||||
|
||||
for i in range(num_of_input_audio):
|
||||
# Computing the embedding
|
||||
@@ -219,7 +216,8 @@ if __name__ == '__main__':
|
||||
|
||||
start_syn = time.time()
|
||||
# Generating the spectrogram
|
||||
text = input("Write a sentence to be synthesized:\n")
|
||||
# text = input("Write a sentence to be synthesized:\n")
|
||||
text = "Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology."
|
||||
|
||||
# If seed is specified, reset torch seed and force synthesizer reload
|
||||
if args.seed is not None:
|
||||
@@ -236,19 +234,30 @@ if __name__ == '__main__':
|
||||
texts = preprocess_text(text)
|
||||
print(f"the list of inputs texts:\n{texts}")
|
||||
|
||||
embeds = [embed] * len(texts)
|
||||
specs, alignments, stop_tokens = synthesizer.synthesize_spectrograms(texts, embeds, require_visualization=True)
|
||||
# embeds = [embed] * len(texts)
|
||||
|
||||
specs = []
|
||||
alignments = []
|
||||
stop_tokens = []
|
||||
|
||||
for text in texts:
|
||||
spec, align, stop_token = synthesizer.synthesize_spectrograms([text], [embed], require_visualization=True)
|
||||
specs.append(spec[0])
|
||||
alignments.append(align[0])
|
||||
stop_tokens.append(stop_token[0])
|
||||
|
||||
breaks = [spec.shape[1] for spec in specs]
|
||||
spec = np.concatenate(specs, axis=1)
|
||||
alignments = np.array(alignments)
|
||||
stop_tokens = np.array(stop_tokens)
|
||||
|
||||
|
||||
## Save synthesizer visualization results
|
||||
if not os.path.exists("syn_results"):
|
||||
os.mkdir("syn_results")
|
||||
# save_attention_multiple(alignments, "syn_results/attention")
|
||||
# save_stop_tokens(stop_tokens, "syn_results/stop_tokens")
|
||||
# save_spectrogram(spec, "syn_results/mel")
|
||||
save_attention_multiple(alignments, "syn_results/attention")
|
||||
save_stop_tokens(stop_tokens, "syn_results/stop_tokens")
|
||||
save_spectrogram(spec, "syn_results/mel")
|
||||
print("Created the mel spectrogram")
|
||||
|
||||
end_syn = time.time()
|
||||
|
||||
Binary file not shown.
BIN
demo_results/text1/260-123286-0000_syn.wav
Normal file
BIN
demo_results/text1/260-123286-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text1/4294-9934-0000_syn.wav
Normal file
BIN
demo_results/text1/4294-9934-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text1/7176-88083-0000_syn.wav
Normal file
BIN
demo_results/text1/7176-88083-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text2/1688-142285-0000_syn.wav
Normal file
BIN
demo_results/text2/1688-142285-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text2/260-123286-0000_syn.wav
Normal file
BIN
demo_results/text2/260-123286-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text2/4294-9934-0000_syn.wav
Normal file
BIN
demo_results/text2/4294-9934-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text2/7176-88083-0000_syn.wav
Normal file
BIN
demo_results/text2/7176-88083-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
demo_results/text3/260-123286-0000_syn.wav
Normal file
BIN
demo_results/text3/260-123286-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text3/4294-9934-0000_syn.wav
Normal file
BIN
demo_results/text3/4294-9934-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
BIN
demo_results/text3/7176-88083-0000_syn.wav
Normal file
BIN
demo_results/text3/7176-88083-0000_syn.wav
Normal file
Binary file not shown.
Binary file not shown.
@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
|
||||
# 判断用户输入语音为男声或女声的分界频率
|
||||
split_freq = 170
|
||||
# embed去噪置零的阈值
|
||||
set_zero_thres=0.04
|
||||
set_zero_thres=0.08
|
||||
|
||||
|
||||
40
index.html
40
index.html
@@ -65,24 +65,24 @@
|
||||
</td>
|
||||
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text1/260-123286-0000_syn_1.0.wav"></audio>
|
||||
<a href="demo_results/text1/260-123286-0000_syn_1.0.wav">
|
||||
<audio controls src="demo_results/text1/260-123286-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text1/260-123286-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text2/260-123286-0000_syn_1.0.wav"></audio>
|
||||
<a href="demo_results/text2/260-123286-0000_syn_1.0.wav">
|
||||
<audio controls src="demo_results/text2/260-123286-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text2/260-123286-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text3/260-123286-0000_syn_0.97.wav"></audio>
|
||||
<a href="demo_results/text3/260-123286-0000_syn_0.97.wav">
|
||||
<audio controls src="demo_results/text3/260-123286-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text3/260-123286-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -102,8 +102,8 @@
|
||||
<tr>
|
||||
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text2/1688-142285-0000_syn_0.77.wav"></audio>
|
||||
<a href="demo_results/text2/1688-142285-0000_syn_0.77.wav">
|
||||
<audio controls src="demo_results/text2/1688-142285-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text2/1688-142285-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -123,24 +123,24 @@
|
||||
</td>
|
||||
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text1/4294-9934-0000_syn_0.98.wav"></audio>
|
||||
<a href="demo_results/text1/4294-9934-0000_syn_0.98.wav">
|
||||
<audio controls src="demo_results/text1/4294-9934-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text1/4294-9934-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text2/4294-9934-0000_syn_0.78.wav"></audio>
|
||||
<a href="demo_results/text2/4294-9934-0000_syn_0.78.wav">
|
||||
<audio controls src="demo_results/text2/4294-9934-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text2/4294-9934-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text3/4294-9934-0000_syn_0.76.wav"></audio>
|
||||
<a href="demo_results/text3/4294-9934-0000_syn_0.76.wav">
|
||||
<audio controls src="demo_results/text3/4294-9934-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text3/4294-9934-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -152,24 +152,24 @@
|
||||
</td>
|
||||
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text1/7176-88083-0000_syn_1.13.wav"></audio>
|
||||
<a href="demo_results/text1/7176-88083-0000_syn_1.13.wav">
|
||||
<audio controls src="demo_results/text1/7176-88083-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text1/7176-88083-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text2/7176-88083-0000_syn_0.76.wav"></audio>
|
||||
<a href="demo_results/text2/7176-88083-0000_syn_0.76.wav">
|
||||
<audio controls src="demo_results/text2/7176-88083-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text2/7176-88083-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
|
||||
<td align = "center">
|
||||
<audio controls src="demo_results/text3/7176-88083-0000_syn_0.8.wav"></audio>
|
||||
<a href="demo_results/text3/7176-88083-0000_syn_0.8.wav">
|
||||
<audio controls src="demo_results/text3/7176-88083-0000_syn.wav"></audio>
|
||||
<a href="demo_results/text3/7176-88083-0000_syn.wav">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
@@ -53,9 +53,12 @@ syn_hparams = HParams(
|
||||
tts_schedule = [(2, 1e-3, 40_000, 12), # Progressive training schedule
|
||||
(2, 5e-4, 80_000, 12), # (r, lr, step, batch_size)
|
||||
(2, 2e-4, 160_000, 12), #
|
||||
(2, 1e-4, 320_000, 12), # r = reduction factor (# of mel frames
|
||||
(2, 3e-5, 1280_000, 12), # synthesized for each decoder iteration)
|
||||
(2, 1e-5, 10_240_000, 32)], # lr = learning rate
|
||||
(2, 1e-4, 320_000, 64), # r = reduction factor (# of mel frames
|
||||
(2, 3e-5, 640_000, 64), # synthesized for each decoder iteration)
|
||||
(2, 1e-5, 1280_000, 64),
|
||||
(2, 5e-6, 2560_000, 64),
|
||||
(2, 1e-6, 5120_000, 64)],
|
||||
# lr = learning rate
|
||||
|
||||
tts_clip_grad_norm = 1.0, # clips the gradient norm to prevent explosion - set to None if not needed
|
||||
tts_eval_interval = 100, # Number of steps between model evaluation (sample generation)
|
||||
|
||||
@@ -445,7 +445,6 @@ class Tacotron(nn.Module):
|
||||
# Need a couple of lists for outputs
|
||||
mel_outputs, attn_scores, stop_outputs = [], [], []
|
||||
|
||||
first_stop_token = 0
|
||||
# Run the decoder loop
|
||||
for t in range(0, steps, self.r):
|
||||
prenet_in = mel_outputs[-1][:, :, -1] if t > 0 else go_frame
|
||||
@@ -456,10 +455,11 @@ class Tacotron(nn.Module):
|
||||
attn_scores.append(scores)
|
||||
stop_outputs.extend([stop_tokens] * self.r)
|
||||
if t == 0:
|
||||
first_stop_token = stop_tokens[0]
|
||||
first_stop_token = stop_tokens
|
||||
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
|
||||
if (stop_tokens > first_stop_token * 8e3).all() and t > (20 * self.r): break
|
||||
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
|
||||
# if torch.gt(stop_tokens, first_stop_token*10).all() and t > (1 * self.r):
|
||||
# break
|
||||
if (stop_tokens > 0.01).all() and t > (20 * self.r): break
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from datetime import datetime
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from os.path import exists
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
@@ -38,7 +39,7 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
|
||||
import tensorflow as tf
|
||||
import datetime
|
||||
# Hide GPU from visible devices
|
||||
log_dir = f"log/synthesizer/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
log_dir = f"log/vc/synthesizer/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
train_summary_writer = tf.summary.create_file_writer(log_dir)
|
||||
models_dir.mkdir(exist_ok=True)
|
||||
|
||||
@@ -135,8 +136,11 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
|
||||
train_dataset = SynthesizerDataset(train_metadata_fpath, train_mel_dir, train_embed_dir, hparams)
|
||||
dev_dataset = SynthesizerDataset(dev_metadata_fpath, dev_mel_dir, dev_embed_dir, hparams)
|
||||
|
||||
# best_loss_file_path = "synthesizer_loss/best_loss.npy"
|
||||
# best_loss = np.load(best_loss_file_path)[0] if exists(best_loss_file_path) else 1000
|
||||
best_loss_file_path = "synthesizer_loss/best_loss.npy"
|
||||
best_loss = np.load(best_loss_file_path)[0] if exists(best_loss_file_path) else 1000
|
||||
|
||||
if not exists("synthesizer_loss"):
|
||||
os.makedirs("synthesizer_loss")
|
||||
|
||||
# profiler = Profiler(summarize_every=10, disabled=False)
|
||||
for i, session in enumerate(hparams.tts_schedule):
|
||||
@@ -263,9 +267,10 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
|
||||
|
||||
# Must save latest optimizer state to ensure that resuming training
|
||||
# doesn't produce artifacts
|
||||
# best_loss = dev_loss
|
||||
# np.save(best_loss_file_path, np.array([best_loss]))
|
||||
model.save(weights_fpath, optimizer)
|
||||
if dev_loss < best_loss:
|
||||
best_loss = dev_loss
|
||||
np.save(best_loss_file_path, np.array([best_loss]))
|
||||
model.save(weights_fpath, optimizer)
|
||||
|
||||
# Evaluate model to generate dev samples
|
||||
# epoch_eval = hparams.tts_eval_interval == -1 and i == steps_per_epoch # If epoch is done
|
||||
|
||||
@@ -47,5 +47,5 @@ is_crossfade = True # crossfading or not
|
||||
# Output Noise Reduce
|
||||
prop_decrease_low_freq = 0.6 # prop decrease for low dominant frequency
|
||||
prop_decrease_high_freq = 0.9 # prop decrease for high dominant frequency
|
||||
dry=0.1 # dry ratio for facebook denoiser
|
||||
dry = 0.1 # dry ratio for facebook denoiser
|
||||
sex = -1
|
||||
@@ -25,7 +25,7 @@ def train(run_id: str, syn_dir: Path, voc_dir: Path, models_dir: Path, ground_tr
|
||||
import tensorflow as tf
|
||||
import datetime
|
||||
# Hide GPU from visible devices
|
||||
log_dir = f"log/vocoder/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
log_dir = f"log/vc/vocoder/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
train_summary_writer = tf.summary.create_file_writer(log_dir)
|
||||
# Check to make sure the hop length is correctly factorised
|
||||
train_syn_dir = syn_dir.joinpath("train")
|
||||
|
||||
Reference in New Issue
Block a user