Files
AudioGPT/NeuralSeq/usr/__pycache__/diffspeech_task.cpython-37.pyc

37 lines
4.2 KiB
Plaintext
Raw Normal View History

2023-03-20 15:43:44 +08:00
B
2023-03-24 17:19:37 +08:00
<00>Xd<><00>@s<>ddlZddlZddlmZddlmZddlmZddlm Z ddl
2023-03-20 15:43:44 +08:00
m Z m Z ddl mZdd lmZd
d d <0C>iZGd d<0E>de <09>ZdS)<0F>N)<01>hparams<6D>)<01>DiffNet)<01>GaussianDiffusion)<01>
2023-03-24 17:19:37 +08:00
DiffFsTask)<02>get_vocoder_cls<6C> BaseVocoder)<01> denorm_f0)<01>FastSpeechDataset<65>wavenetcCs t|d<00>S)N<>audio_num_mel_bins)r)<01>hp<68>r<00>V/mnt/sdc/hongzhiqing/code/audio_chatgpt/text_to_sing/DiffSinger/usr/diffspeech_task.py<70><lambda> <00>rcsHeZdZ<02>fdd<02>Zdd<04>Zdd<06>Zddd <09>Zd
2023-03-20 15:43:44 +08:00
d <0B>Zdd d<0E>Z<08>Z S)<11>DiffSpeechTaskcs$tt|<00><02><02>t|_tt<06><01>|_dS)N)<08>superr<00>__init__r
<00> dataset_clsrr<00>vocoder)<01>self)<01> __class__rrrszDiffSpeechTask.__init__c
Cs<>td}t|j|ttdt<00>tdtdtdtdtdd<08>|_td d
krjtj|jjtd d d d <0A>x&|jj<07><08>D]\}}d|krxd|_ qxWdS)Nr <00>diff_decoder_type<70> timesteps<70>K_step<65>diff_loss_type<70>spec_min<69>spec_max)<08> phone_encoder<65>out_dims<6D>
denoise_fnrr<00> loss_typerr<00>fs2_ckpt<70><00>modelT)<01>strictZ predictorF)
rrr<00> DIFF_DECODERSr%<00>utils<6C> load_ckpt<70>fs2<73>named_parameters<72> requires_grad)r<00>mel_bins<6E>k<>vrrr<00>build_tts_models zDiffSpeechTask.build_tts_modelcCs@tjjtdd<02>|<01><04><00>tdtdtdftdd<07>|_}|S)NcSs|jS)N)r,)<01>prrrr*rz0DiffSpeechTask.build_optimizer.<locals>.<lambda><3E>lrZoptimizer_adam_beta1Zoptimizer_adam_beta2<61> weight_decay)r2<00>betasr3)<07>torch<63>optim<69>AdamW<6D>filter<65>
parametersr<00> optimizer)rr%r:rrr<00>build_optimizer(s zDiffSpeechTask.build_optimizerFc
Cs|d}|d}|d}|d}|d} |d}
tdsB|<02>d<08>n|<02>d <09>} td
d kr<>|d } |d } |d}|<01>| | ||<07>|d<}|||| ||| |
|d<10>}i}d|kr<>|d|d<|j|d|||d<14>tdr<>|<00>|||<10>tdr<>|<00>|d|
|<10>|<03>s|S||fSdS)N<>
txt_tokens<EFBFBD>mels<6C>mel2ph<70>f0<66>uv<75>energy<67>
use_spk_id<EFBFBD> spk_embed<65>spk_ids<64>
pitch_type<EFBFBD>cwt<77>cwt_spec<65>f0_mean<61>f0_std<74>f0_cwt)r>rC<00>ref_melsr?r@rA<00>infer<65> diff_loss<73>mel<65>dur)<01>losses<65>use_pitch_embed<65>use_energy_embed<65> energy_pred)r<00>get<65> cwt2f0_norm<72> add_dur_loss<73>add_pitch_loss<73>add_energy_loss)rr%<00>sample<6C> return_outputrLr<<00>targetr>r?r@rArCrGrHrI<00>outputrPrrr<00> run_model0s2  zDiffSpeechTask.run_modelc
Csi}|d}|d}tds&|<01>d<04>n|<01>d<05>}|d}|d}|d} i|d <|j|j|d
d d <0C>\|d <}
t|d <00><05><00>|d <|d|d<t<06>|<03>}|tdk<00>r |j||||| |dd
d<10>}
t|d|dt<00>} |j ||d|
dd
| |
<EFBFBD>d<13>d<14>|<00>
||d|
d<00>|S)Nr<rArBrCrDr>r?r@rPTF)rZrL<00>
total_loss<EFBFBD>nsamples<65>num_valid_plots)rCr>r?r@rArKrLr=<00>mel_out<75> f0_denorm)<03>is_mel<65>gt_f0r?) rrTr]r%<00>sum<75>valuesr(<00>tensors_to_scalarsr <00>plot_wav<61>plot_mel) rrY<00> batch_idx<64>outputsr<rArCr>r?r@<00> model_outrdrrr<00>validation_stepPs& 
$zDiffSpeechTask.validation_stepNcCs<>|d<00><00><00><01>}|d<00><00><00><01>}|d<00><00><00><01>}|d<00><00><00><01>}|rd|jj||d<02>}|jj||d<02>}|jjjd|<01><00>|td|jd<05>|jjjd|<01><00>|td|jd<05>dS)Nr)r?<00>gt_<74>audio_sample_rate)<02> sample_rate<74> global_stepZwav_) <09>cpu<70>numpyr<00>spec2wav<61>loggerZ
experimentZ add_audiorrq)rrjZgt_wav<61>wav_outrcrdr?<00>namerrrrhps"zDiffSpeechTask.plot_wav)FF)FNNN)
<EFBFBD>__name__<5F>
__module__<EFBFBD> __qualname__rr0r;r]rmrh<00> __classcell__rr)rrrs  
 r)r5r(<00> utils.hparamsr<00>diff.netr<00>diff.shallow_diffusion_ttsr<00>taskr<00>vocoders.base_vocoderrr<00>utils.pitch_utilsr <00>tasks.tts.fs2_utilsr
r'rrrrr<00><module>s