save results from toolbox

2026-05-18 05:04:51 +02:00 · 2023-06-26 16:10:33 +08:00
parent 995d0e6624
commit 5347c8c849
2 changed files with 24 additions and 10 deletions
--- a/toolbox/init.py
+++ b/toolbox/init.py
@@ -14,6 +14,7 @@ import encoder
 from encoder import inference as encoder_infer
 from synthesizer.inference import Synthesizer_infer
 from synthesizer.utils.cleaners import add_breaks, english_cleaners_predict
+from vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
 from synthesizer.hparams import syn_hparams
 from toolbox.ui import UI
 from toolbox.utterance import Utterance
@@ -65,6 +66,9 @@ class Toolbox:
        self.start_generate_time = None
        self.nlp = spacy.load('en_core_web_sm')

+        if not os.path.exists("toolbox_results"):
+            os.mkdir("toolbox_results")
+
        # Check for webrtcvad (enables removal of silences in vocoder output)
        try:
            import webrtcvad
@@ -210,6 +214,9 @@ class Toolbox:
        # Plot it
        self.ui.draw_embed(embed, name, "current")
        self.ui.draw_umap_projections(self.utterances)
+        self.ui.wav_ori_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
+        if len(self.utterances) >= self.ui.min_umap_points:
+            self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)

    def clear_utterances(self):
        self.utterances.clear()
@@ -251,6 +258,9 @@ class Toolbox:
        breaks = [spec.shape[1] for spec in specs]
        spec = np.concatenate(specs, axis=1)

+        save_attention_multiple(alignments, "toolbox_results/attention")
+        save_stop_tokens(stop_tokens, "toolbox_results/stop_tokens")
+
        self.ui.draw_spec(spec, "generated")
        self.current_generated = (self.ui.selected_utterance.speaker_name, spec, breaks, None)
        self.ui.set_loading(0)
@@ -355,6 +365,9 @@ class Toolbox:
        # Plot it
        self.ui.draw_embed(embed, name, "generated")
        self.ui.draw_umap_projections(self.utterances)
+        self.ui.wav_gen_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
+        if len(self.utterances) >= self.ui.min_umap_points:
+            self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)

    def init_encoder(self):
        model_fpath = self.ui.current_encoder_fpath
--- a/toolbox/ui.py
+++ b/toolbox/ui.py
@@ -449,14 +449,15 @@ class UI(QDialog):

        ## Projections
        # UMap
-        fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0")
-        fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.98)
-        self.projections_layout.addWidget(FigureCanvas(fig))
+        self.umap_fig, self.umap_ax = plt.subplots(1, 1, figsize=(3, 3), facecolor="#F0F0F0")
+        self.umap_fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.9)
+        self.projections_layout.addWidget(FigureCanvas(self.umap_fig))
        self.umap_hot = False
        self.clear_button = QPushButton("Clear")
        self.projections_layout.addWidget(self.clear_button)


+
        ## Browser
        # Dataset, speaker and utterance selection
        i = 0
@@ -539,15 +540,15 @@ class UI(QDialog):
        vis_layout.addStretch()

        gridspec_kw = {"width_ratios": [1, 4]}
-        fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
+        self.wav_ori_fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
                                            gridspec_kw=gridspec_kw)
-        fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
-        vis_layout.addWidget(FigureCanvas(fig))
+        self.wav_ori_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
+        vis_layout.addWidget(FigureCanvas(self.wav_ori_fig))

-        fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
+        self.wav_gen_fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
                                        gridspec_kw=gridspec_kw)
-        fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
-        vis_layout.addWidget(FigureCanvas(fig))
+        self.wav_gen_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
+        vis_layout.addWidget(FigureCanvas(self.wav_gen_fig))

        for ax in self.current_ax.tolist() + self.gen_ax.tolist():
            ax.set_facecolor("#F0F0F0")
@@ -599,7 +600,7 @@ class UI(QDialog):


        ## Set the size of the window and of the elements
-        max_size = QDesktopWidget().availableGeometry(self).size() * 0.8
+        max_size = QDesktopWidget().availableGeometry(self).size() 
        self.resize(max_size)

        ## Finalize the display