new commits

2026-05-18 05:04:51 +02:00 · 2023-06-15 10:36:53 +08:00
parent 4989ddde52
commit e05b534129
4 changed files with 24 additions and 7 deletions
--- a/demo_cli.py
+++ b/demo_cli.py
@@ -45,9 +45,9 @@ if __name__ == '__main__':
    import encoder.inference
    import encoder.params_data 
    from synthesizer.inference import Synthesizer
-    from synthesizer.utils.cleaners import add_breaks, english_cleaners
+    from synthesizer.utils.cleaners import add_breaks, english_cleaners_predict
    from vocoder import inference as vocoder
-    from vocoder.display import save_attention, save_spectrogram, save_stop_tokens
+    from vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
    from utils.argutils import print_args
    from utils.default_models import ensure_default_models
    from speed_changer.fixSpeed import *
@@ -228,7 +228,7 @@ if __name__ == '__main__':
    # The synthesizer works in batch, so you need to put your data in a list or numpy array
    def preprocess_text(text):
        text = add_breaks(text) 
-        text = english_cleaners(text)
+        text = english_cleaners_predict(text)
        texts = [i.text.strip() for i in nlp(text).sents]  # split paragraph to sentences
        return texts

--- a/synthesizer/train.py
+++ b/synthesizer/train.py
@@ -329,7 +329,7 @@ def eval_model(attention, mel_prediction, target_spectrogram, input_seq, step,
               plot_dir, mel_output_dir, wav_dir, sample_num, loss, hparams, if_dev = None):
    # Save some results for evaluation
    attention_path = str(plot_dir.joinpath("{}_attention_step_{}_sample_{}".format(if_dev, step, sample_num)))
-    save_attention(attention, attention_path)
+    save_attention_multiple(attention, attention_path)

    # save predicted mel spectrogram to disk (debug)
    mel_output_fpath = mel_output_dir.joinpath("{}-mel-prediction-step-{}_sample_{}.npy".format(if_dev, step, sample_num))
--- a/synthesizer/utils/cleaners.py
+++ b/synthesizer/utils/cleaners.py
@@ -213,8 +213,9 @@ def transliteration_cleaners(text):
    return text


-def english_cleaners(text):
-    """Pipeline for English text, including number and abbreviation expansion."""
+def english_cleaners_predict(text):
+    """Pipeline for English text, including number and abbreviation expansion for prediction."""
+    text = convert_to_ascii(text)
    text = replace_special_char(text)
    text = expand_abbreviations(text)
    text = letter2pronunciation(text)
@@ -223,3 +224,12 @@ def english_cleaners(text):
    # text = split_conj(text) 
    text = collapse_whitespace(text)
    return text
+
+def english_cleaners(text):
+    """Pipeline for English text, including number and abbreviation expansion for training preprocessing."""
+    text = convert_to_ascii(text)
+    text = lowercase(text)
+    text = expand_numbers(text)
+    text = expand_abbreviations(text)
+    text = collapse_whitespace(text)
+    return text
--- a/vocoder/display.py
+++ b/vocoder/display.py
@@ -82,10 +82,17 @@ def time_since(started) :
    else :
        return f'{m}m {s}s'

-
 def save_attention(attn, path):
    import matplotlib.pyplot as plt

+    fig = plt.figure(figsize=(12, 6))
+    plt.imshow(attn.T, interpolation='nearest', aspect='auto')
+    fig.savefig(f'{path}.png', bbox_inches='tight')
+    plt.close(fig)
+
+def save_attention_multiple(attn, path):
+    import matplotlib.pyplot as plt
+
    num_plots = len(attn)
    fig = plt.figure(figsize=(12, 6 * num_plots))
    for i, a in enumerate(attn):