new commits

This commit is contained in:
liuhaozhe6788
2023-09-09 13:01:13 +08:00
parent eefa3da3ca
commit 949ba9fa74
31 changed files with 66 additions and 49 deletions

View File

@@ -71,7 +71,7 @@ python synthesizer_train.py <model_id> <datasets_root>/SV2TTS/synthesizer --use_
```
if you want to monitor the training progress, run
```
tensorboard --logdir log/synthesizer --host localhost --port 8088
tensorboard --logdir log/vc/synthesizer --host localhost --port 8088
```
### Vocoder
@@ -90,7 +90,7 @@ python vocoder_train.py <model_id> <datasets_root> --use_tb
```
if you want to monitor the training progress, run
```
tensorboard --logdir log/vocoder --host localhost --port 8080
tensorboard --logdir log/vc/vocoder --host localhost --port 8080
```
**Note:**

View File

@@ -11,7 +11,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--run_id", type=str, default="20230609", help= \
parser.add_argument("--run_id", type=str, default="default", help= \
"Name for this model. By default, training outputs will be stored to saved_models/<run_id>/. If a model state "
"from the same run ID was previously saved, the training will restart from there. Pass -f to overwrite saved "
"states and restart from scratch.")
@@ -138,10 +138,7 @@ if __name__ == '__main__':
while True:
# try:
# Get the reference audio filepath
# enter the number of reference audios
message1 = "Please enter the number of reference audios:\n"
num_of_input_audio = int(input(message1))
# num_of_input_audio = 1
num_of_input_audio = 1
for i in range(num_of_input_audio):
# Computing the embedding
@@ -219,7 +216,8 @@ if __name__ == '__main__':
start_syn = time.time()
# Generating the spectrogram
text = input("Write a sentence to be synthesized:\n")
# text = input("Write a sentence to be synthesized:\n")
text = "Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology."
# If seed is specified, reset torch seed and force synthesizer reload
if args.seed is not None:
@@ -236,19 +234,30 @@ if __name__ == '__main__':
texts = preprocess_text(text)
print(f"the list of inputs texts:\n{texts}")
embeds = [embed] * len(texts)
specs, alignments, stop_tokens = synthesizer.synthesize_spectrograms(texts, embeds, require_visualization=True)
# embeds = [embed] * len(texts)
specs = []
alignments = []
stop_tokens = []
for text in texts:
spec, align, stop_token = synthesizer.synthesize_spectrograms([text], [embed], require_visualization=True)
specs.append(spec[0])
alignments.append(align[0])
stop_tokens.append(stop_token[0])
breaks = [spec.shape[1] for spec in specs]
spec = np.concatenate(specs, axis=1)
alignments = np.array(alignments)
stop_tokens = np.array(stop_tokens)
## Save synthesizer visualization results
if not os.path.exists("syn_results"):
os.mkdir("syn_results")
# save_attention_multiple(alignments, "syn_results/attention")
# save_stop_tokens(stop_tokens, "syn_results/stop_tokens")
# save_spectrogram(spec, "syn_results/mel")
save_attention_multiple(alignments, "syn_results/attention")
save_stop_tokens(stop_tokens, "syn_results/stop_tokens")
save_spectrogram(spec, "syn_results/mel")
print("Created the mel spectrogram")
end_syn = time.time()

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -30,5 +30,5 @@ audio_norm_target_dBFS = -30
# 判断用户输入语音为男声或女声的分界频率
split_freq = 170
# embed去噪置零的阈值
set_zero_thres=0.04
set_zero_thres=0.08

View File

@@ -65,24 +65,24 @@
</td>
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
<td align = "center">
<audio controls src="demo_results/text1/260-123286-0000_syn_1.0.wav"></audio>
<a href="demo_results/text1/260-123286-0000_syn_1.0.wav">
<audio controls src="demo_results/text1/260-123286-0000_syn.wav"></audio>
<a href="demo_results/text1/260-123286-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
<td align = "center">
<audio controls src="demo_results/text2/260-123286-0000_syn_1.0.wav"></audio>
<a href="demo_results/text2/260-123286-0000_syn_1.0.wav">
<audio controls src="demo_results/text2/260-123286-0000_syn.wav"></audio>
<a href="demo_results/text2/260-123286-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
<td align = "center">
<audio controls src="demo_results/text3/260-123286-0000_syn_0.97.wav"></audio>
<a href="demo_results/text3/260-123286-0000_syn_0.97.wav">
<audio controls src="demo_results/text3/260-123286-0000_syn.wav"></audio>
<a href="demo_results/text3/260-123286-0000_syn.wav">
</a>
</td>
</tr>
@@ -102,8 +102,8 @@
<tr>
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
<td align = "center">
<audio controls src="demo_results/text2/1688-142285-0000_syn_0.77.wav"></audio>
<a href="demo_results/text2/1688-142285-0000_syn_0.77.wav">
<audio controls src="demo_results/text2/1688-142285-0000_syn.wav"></audio>
<a href="demo_results/text2/1688-142285-0000_syn.wav">
</a>
</td>
</tr>
@@ -123,24 +123,24 @@
</td>
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
<td align = "center">
<audio controls src="demo_results/text1/4294-9934-0000_syn_0.98.wav"></audio>
<a href="demo_results/text1/4294-9934-0000_syn_0.98.wav">
<audio controls src="demo_results/text1/4294-9934-0000_syn.wav"></audio>
<a href="demo_results/text1/4294-9934-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
<td align = "center">
<audio controls src="demo_results/text2/4294-9934-0000_syn_0.78.wav"></audio>
<a href="demo_results/text2/4294-9934-0000_syn_0.78.wav">
<audio controls src="demo_results/text2/4294-9934-0000_syn.wav"></audio>
<a href="demo_results/text2/4294-9934-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
<td align = "center">
<audio controls src="demo_results/text3/4294-9934-0000_syn_0.76.wav"></audio>
<a href="demo_results/text3/4294-9934-0000_syn_0.76.wav">
<audio controls src="demo_results/text3/4294-9934-0000_syn.wav"></audio>
<a href="demo_results/text3/4294-9934-0000_syn.wav">
</a>
</td>
</tr>
@@ -152,24 +152,24 @@
</td>
<td>Life was like a box of chocolates, you never know what you're gonna get.</td>
<td align = "center">
<audio controls src="demo_results/text1/7176-88083-0000_syn_1.13.wav"></audio>
<a href="demo_results/text1/7176-88083-0000_syn_1.13.wav">
<audio controls src="demo_results/text1/7176-88083-0000_syn.wav"></audio>
<a href="demo_results/text1/7176-88083-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>In 2014, P&G recorded $83.1 billion in sales. On August 1, 2014, P&G announced it was streamlining the company, dropping and selling off around 100 brands from its product portfolio in order to focus on the remaining 65 brands, which produced 95% of the company's profits.</td>
<td align = "center">
<audio controls src="demo_results/text2/7176-88083-0000_syn_0.76.wav"></audio>
<a href="demo_results/text2/7176-88083-0000_syn_0.76.wav">
<audio controls src="demo_results/text2/7176-88083-0000_syn.wav"></audio>
<a href="demo_results/text2/7176-88083-0000_syn.wav">
</a>
</td>
</tr>
<tr>
<td>Mechanics is a branch of physics that deals with the behavior of physical bodies under the influence of various forces. The study of mechanics is important in understanding the behavior of machines, the motion of objects, and the principles of engineering. Mechanics has been an essential part of physics since ancient times and has continued to evolve with advancements in science and technology. This paper will discuss the principles of mechanics, the laws of motion, and the applications of mechanics in engineering and technology.</td>
<td align = "center">
<audio controls src="demo_results/text3/7176-88083-0000_syn_0.8.wav"></audio>
<a href="demo_results/text3/7176-88083-0000_syn_0.8.wav">
<audio controls src="demo_results/text3/7176-88083-0000_syn.wav"></audio>
<a href="demo_results/text3/7176-88083-0000_syn.wav">
</a>
</td>
</tr>

View File

@@ -53,9 +53,12 @@ syn_hparams = HParams(
tts_schedule = [(2, 1e-3, 40_000, 12), # Progressive training schedule
(2, 5e-4, 80_000, 12), # (r, lr, step, batch_size)
(2, 2e-4, 160_000, 12), #
(2, 1e-4, 320_000, 12), # r = reduction factor (# of mel frames
(2, 3e-5, 1280_000, 12), # synthesized for each decoder iteration)
(2, 1e-5, 10_240_000, 32)], # lr = learning rate
(2, 1e-4, 320_000, 64), # r = reduction factor (# of mel frames
(2, 3e-5, 640_000, 64), # synthesized for each decoder iteration)
(2, 1e-5, 1280_000, 64),
(2, 5e-6, 2560_000, 64),
(2, 1e-6, 5120_000, 64)],
# lr = learning rate
tts_clip_grad_norm = 1.0, # clips the gradient norm to prevent explosion - set to None if not needed
tts_eval_interval = 100, # Number of steps between model evaluation (sample generation)

View File

@@ -445,7 +445,6 @@ class Tacotron(nn.Module):
# Need a couple of lists for outputs
mel_outputs, attn_scores, stop_outputs = [], [], []
first_stop_token = 0
# Run the decoder loop
for t in range(0, steps, self.r):
prenet_in = mel_outputs[-1][:, :, -1] if t > 0 else go_frame
@@ -456,10 +455,11 @@ class Tacotron(nn.Module):
attn_scores.append(scores)
stop_outputs.extend([stop_tokens] * self.r)
if t == 0:
first_stop_token = stop_tokens[0]
first_stop_token = stop_tokens
# Stop the loop when all stop tokens in batch exceed threshold compared with the 1st token and the sequence's length exceeds threshold
if (stop_tokens > first_stop_token * 8e3).all() and t > (20 * self.r): break
# if (stop_tokens > 0.5).all() and t > (20 * self.r): break
# if torch.gt(stop_tokens, first_stop_token*10).all() and t > (1 * self.r):
# break
if (stop_tokens > 0.01).all() and t > (20 * self.r): break
if torch.cuda.is_available():
torch.cuda.empty_cache()

View File

@@ -2,6 +2,7 @@ from datetime import datetime
from functools import partial
from pathlib import Path
from os.path import exists
import os
import torch
import torch.nn.functional as F
@@ -38,7 +39,7 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
import tensorflow as tf
import datetime
# Hide GPU from visible devices
log_dir = f"log/synthesizer/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = f"log/vc/synthesizer/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_summary_writer = tf.summary.create_file_writer(log_dir)
models_dir.mkdir(exist_ok=True)
@@ -135,8 +136,11 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
train_dataset = SynthesizerDataset(train_metadata_fpath, train_mel_dir, train_embed_dir, hparams)
dev_dataset = SynthesizerDataset(dev_metadata_fpath, dev_mel_dir, dev_embed_dir, hparams)
# best_loss_file_path = "synthesizer_loss/best_loss.npy"
# best_loss = np.load(best_loss_file_path)[0] if exists(best_loss_file_path) else 1000
best_loss_file_path = "synthesizer_loss/best_loss.npy"
best_loss = np.load(best_loss_file_path)[0] if exists(best_loss_file_path) else 1000
if not exists("synthesizer_loss"):
os.makedirs("synthesizer_loss")
# profiler = Profiler(summarize_every=10, disabled=False)
for i, session in enumerate(hparams.tts_schedule):
@@ -263,9 +267,10 @@ def train(run_id: str, syn_dir: Path, models_dir: Path, save_every: int, backup
# Must save latest optimizer state to ensure that resuming training
# doesn't produce artifacts
# best_loss = dev_loss
# np.save(best_loss_file_path, np.array([best_loss]))
model.save(weights_fpath, optimizer)
if dev_loss < best_loss:
best_loss = dev_loss
np.save(best_loss_file_path, np.array([best_loss]))
model.save(weights_fpath, optimizer)
# Evaluate model to generate dev samples
# epoch_eval = hparams.tts_eval_interval == -1 and i == steps_per_epoch # If epoch is done

View File

@@ -47,5 +47,5 @@ is_crossfade = True # crossfading or not
# Output Noise Reduce
prop_decrease_low_freq = 0.6 # prop decrease for low dominant frequency
prop_decrease_high_freq = 0.9 # prop decrease for high dominant frequency
dry=0.1 # dry ratio for facebook denoiser
dry = 0.1 # dry ratio for facebook denoiser
sex = -1

View File

@@ -25,7 +25,7 @@ def train(run_id: str, syn_dir: Path, voc_dir: Path, models_dir: Path, ground_tr
import tensorflow as tf
import datetime
# Hide GPU from visible devices
log_dir = f"log/vocoder/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
log_dir = f"log/vc/vocoder/tensorboard/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_summary_writer = tf.summary.create_file_writer(log_dir)
# Check to make sure the hop length is correctly factorised
train_syn_dir = syn_dir.joinpath("train")