mirror of
https://github.com/liuhaozhe6788/voice-cloning-collab.git
synced 2026-05-18 05:04:51 +02:00
save results from toolbox
This commit is contained in:
@@ -14,6 +14,7 @@ import encoder
|
||||
from encoder import inference as encoder_infer
|
||||
from synthesizer.inference import Synthesizer_infer
|
||||
from synthesizer.utils.cleaners import add_breaks, english_cleaners_predict
|
||||
from vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
|
||||
from synthesizer.hparams import syn_hparams
|
||||
from toolbox.ui import UI
|
||||
from toolbox.utterance import Utterance
|
||||
@@ -65,6 +66,9 @@ class Toolbox:
|
||||
self.start_generate_time = None
|
||||
self.nlp = spacy.load('en_core_web_sm')
|
||||
|
||||
if not os.path.exists("toolbox_results"):
|
||||
os.mkdir("toolbox_results")
|
||||
|
||||
# Check for webrtcvad (enables removal of silences in vocoder output)
|
||||
try:
|
||||
import webrtcvad
|
||||
@@ -210,6 +214,9 @@ class Toolbox:
|
||||
# Plot it
|
||||
self.ui.draw_embed(embed, name, "current")
|
||||
self.ui.draw_umap_projections(self.utterances)
|
||||
self.ui.wav_ori_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
|
||||
if len(self.utterances) >= self.ui.min_umap_points:
|
||||
self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)
|
||||
|
||||
def clear_utterances(self):
|
||||
self.utterances.clear()
|
||||
@@ -251,6 +258,9 @@ class Toolbox:
|
||||
breaks = [spec.shape[1] for spec in specs]
|
||||
spec = np.concatenate(specs, axis=1)
|
||||
|
||||
save_attention_multiple(alignments, "toolbox_results/attention")
|
||||
save_stop_tokens(stop_tokens, "toolbox_results/stop_tokens")
|
||||
|
||||
self.ui.draw_spec(spec, "generated")
|
||||
self.current_generated = (self.ui.selected_utterance.speaker_name, spec, breaks, None)
|
||||
self.ui.set_loading(0)
|
||||
@@ -355,6 +365,9 @@ class Toolbox:
|
||||
# Plot it
|
||||
self.ui.draw_embed(embed, name, "generated")
|
||||
self.ui.draw_umap_projections(self.utterances)
|
||||
self.ui.wav_gen_fig.savefig(f"toolbox_results/{name}_info.png", dpi=500)
|
||||
if len(self.utterances) >= self.ui.min_umap_points:
|
||||
self.ui.umap_fig.savefig(f"toolbox_results/umap_{len(self.utterances)}.png", dpi=500)
|
||||
|
||||
def init_encoder(self):
|
||||
model_fpath = self.ui.current_encoder_fpath
|
||||
|
||||
@@ -449,14 +449,15 @@ class UI(QDialog):
|
||||
|
||||
## Projections
|
||||
# UMap
|
||||
fig, self.umap_ax = plt.subplots(figsize=(3, 3), facecolor="#F0F0F0")
|
||||
fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.98)
|
||||
self.projections_layout.addWidget(FigureCanvas(fig))
|
||||
self.umap_fig, self.umap_ax = plt.subplots(1, 1, figsize=(3, 3), facecolor="#F0F0F0")
|
||||
self.umap_fig.subplots_adjust(left=0.02, bottom=0.02, right=0.98, top=0.9)
|
||||
self.projections_layout.addWidget(FigureCanvas(self.umap_fig))
|
||||
self.umap_hot = False
|
||||
self.clear_button = QPushButton("Clear")
|
||||
self.projections_layout.addWidget(self.clear_button)
|
||||
|
||||
|
||||
|
||||
## Browser
|
||||
# Dataset, speaker and utterance selection
|
||||
i = 0
|
||||
@@ -539,15 +540,15 @@ class UI(QDialog):
|
||||
vis_layout.addStretch()
|
||||
|
||||
gridspec_kw = {"width_ratios": [1, 4]}
|
||||
fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
|
||||
self.wav_ori_fig, self.current_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
|
||||
gridspec_kw=gridspec_kw)
|
||||
fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
|
||||
vis_layout.addWidget(FigureCanvas(fig))
|
||||
self.wav_ori_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
|
||||
vis_layout.addWidget(FigureCanvas(self.wav_ori_fig))
|
||||
|
||||
fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
|
||||
self.wav_gen_fig, self.gen_ax = plt.subplots(1, 2, figsize=(10, 2.25), facecolor="#F0F0F0",
|
||||
gridspec_kw=gridspec_kw)
|
||||
fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
|
||||
vis_layout.addWidget(FigureCanvas(fig))
|
||||
self.wav_gen_fig.subplots_adjust(left=0, bottom=0.1, right=1, top=0.8)
|
||||
vis_layout.addWidget(FigureCanvas(self.wav_gen_fig))
|
||||
|
||||
for ax in self.current_ax.tolist() + self.gen_ax.tolist():
|
||||
ax.set_facecolor("#F0F0F0")
|
||||
@@ -599,7 +600,7 @@ class UI(QDialog):
|
||||
|
||||
|
||||
## Set the size of the window and of the elements
|
||||
max_size = QDesktopWidget().availableGeometry(self).size() * 0.8
|
||||
max_size = QDesktopWidget().availableGeometry(self).size()
|
||||
self.resize(max_size)
|
||||
|
||||
## Finalize the display
|
||||
|
||||
Reference in New Issue
Block a user