save results from toolbox

This commit is contained in:
liuhaozhe6788
2023-06-26 16:10:33 +08:00
parent 995d0e6624
commit 5347c8c849
2 changed files with 24 additions and 10 deletions

View File

@@ -14,6 +14,7 @@ import encoder
from encoder import inference as encoder_infer
from synthesizer.inference import Synthesizer_infer
from synthesizer.utils.cleaners import add_breaks, english_cleaners_predict
from vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
from synthesizer.hparams import syn_hparams
from toolbox.ui import UI
from toolbox.utterance import Utterance
@@ -65,6 +66,9 @@ class Toolbox:
self.start_generate_time = None
self.nlp = spacy.load('en_core_web_sm')
if not os.path.exists("toolbox_results"):
os.mkdir("toolbox_results")
# Check for webrtcvad (enables removal of silences in vocoder output)
try:
import webrtcvad
@@ -210,6 +214,9 @@ class Toolbox:
# Plot it
self.ui.draw_embed(embed, name, "current")
self.ui.draw_umap_projections(self.utterances)
self.ui.wav_ori_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
if len(self.utterances) >= self.ui.min_umap_points:
self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)
def clear_utterances(self):
self.utterances.clear()
@@ -251,6 +258,9 @@ class Toolbox:
breaks = [spec.shape[1] for spec in specs]
spec = np.concatenate(specs, axis=1)
save_attention_multiple(alignments, "toolbox_results/attention")
save_stop_tokens(stop_tokens, "toolbox_results/stop_tokens")
self.ui.draw_spec(spec, "generated")
self.current_generated = (self.ui.selected_utterance.speaker_name, spec, breaks, None)
self.ui.set_loading(0)
@@ -355,6 +365,9 @@ class Toolbox:
# Plot it
self.ui.draw_embed(embed, name, "generated")
self.ui.draw_umap_projections(self.utterances)
self.ui.wav_gen_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
if len(self.utterances) >= self.ui.min_umap_points:
self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)
def init_encoder(self):
model_fpath = self.ui.current_encoder_fpath

View File

@@ -449,14 +449,15 @@ class UI(QDialog):
## Projections
# UMap
fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0")
fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.98)
self.projections_layout.addWidget(FigureCanvas(fig))
self.umap_fig, self.umap_ax = plt.subplots(1, 1, figsize=(3, 3), facecolor="#F0F0F0")
self.umap_fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.9)
self.projections_layout.addWidget(FigureCanvas(self.umap_fig))
self.umap_hot = False
self.clear_button = QPushButton("Clear")
self.projections_layout.addWidget(self.clear_button)
## Browser
# Dataset, speaker and utterance selection
i = 0
@@ -539,15 +540,15 @@ class UI(QDialog):
vis_layout.addStretch()
gridspec_kw = {"width_ratios": [1, 4]}
fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
self.wav_ori_fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
gridspec_kw=gridspec_kw)
fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
vis_layout.addWidget(FigureCanvas(fig))
self.wav_ori_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
vis_layout.addWidget(FigureCanvas(self.wav_ori_fig))
fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
self.wav_gen_fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
gridspec_kw=gridspec_kw)
fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
vis_layout.addWidget(FigureCanvas(fig))
self.wav_gen_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
vis_layout.addWidget(FigureCanvas(self.wav_gen_fig))
for ax in self.current_ax.tolist() + self.gen_ax.tolist():
ax.set_facecolor("#F0F0F0")
@@ -599,7 +600,7 @@ class UI(QDialog):
## Set the size of the window and of the elements
max_size = QDesktopWidget().availableGeometry(self).size() * 0.8
max_size = QDesktopWidget().availableGeometry(self).size()
self.resize(max_size)
## Finalize the display