Files
AudioGPT/NeuralSeq/usr/__pycache__/diffspeech_task.cpython-38.pyc

37 lines
4.3 KiB
Plaintext
Raw Normal View History

2023-03-20 15:43:44 +08:00
U
2023-03-24 17:19:37 +08:00
<00><>d<><00>@s<>ddlZddlZddlmZddlmZddlmZddlm Z ddl
2023-03-20 15:43:44 +08:00
m Z m Z ddl mZdd lmZd
d d <0C>iZGd d<0E>de <09>ZdS)<0F>N)<01>hparams<6D><00><01>DiffNet)<01>GaussianDiffusion)<01>
DiffFsTask)<02>get_vocoder_cls<6C> BaseVocoder)<01> denorm_f0)<01>FastSpeechDataset<65>wavenetcCs t|d<00>S)N<>audio_num_mel_binsr)<01>hp<68>r<00>S/mnt/sdc/hongzhiqing/github/AudioGPT/text_to_sing/DiffSinger/usr/diffspeech_task.py<70><lambda> <00>rcsHeZdZ<02>fdd<02>Zdd<04>Zdd<06>Zddd <09>Zd
d <0B>Zdd d<0E>Z<08>Z S)<11>DiffSpeechTaskcs$tt|<00><02><02>t|_tt<06><01>|_dS<00>N)<08>superr<00>__init__r <00> dataset_clsrr<00>vocoder)<01>self<6C><01> __class__rrrszDiffSpeechTask.__init__c
Cs<>td}t|j|ttdt<00>tdtdtdtdtdd<08>|_td d
krjtj|jjtd d d d <0A>|jj<07><08>D]\}}d|krvd|_ qvdS)Nr <00>diff_decoder_type<70> timesteps<70>K_step<65>diff_loss_type<70>spec_min<69>spec_max)<08> phone_encoder<65>out_dims<6D>
denoise_fnrr<00> loss_typer r!<00>fs2_ckpt<70><00>modelT)<01>strictZ predictorF)
rrr"<00> DIFF_DECODERSr(<00>utils<6C> load_ckpt<70>fs2<73>named_parameters<72> requires_grad)r<00>mel_bins<6E>k<>vrrr<00>build_tts_models <06> zDiffSpeechTask.build_tts_modelcCs@tjjtdd<02>|<01><04><00>tdtdtdftdd<07>|_}|S)NcSs|jSr)r/)<01>prrrr*rz0DiffSpeechTask.build_optimizer.<locals>.<lambda><3E>lrZoptimizer_adam_beta1Zoptimizer_adam_beta2<61> weight_decay)r5<00>betasr6)<07>torch<63>optim<69>AdamW<6D>filter<65>
parametersr<00> optimizer)rr(r=rrr<00>build_optimizer(s<06> zDiffSpeechTask.build_optimizerFc
Cs|d}|d}|d}|d}|d} |d}
tdsB|<02>d<08>n|<02>d <09>} td
d kr<>|d } |d } |d}|<01>| | ||<07>|d<}|||| ||| |
|d<10>}i}d|kr<>|d|d<|j|d|||d<14>tdr<>|<00>|||<10>tdr<>|<00>|d|
|<10>|<03>s|S||fSdS)N<>
txt_tokens<EFBFBD>mels<6C>mel2ph<70>f0<66>uv<75>energy<67>
use_spk_id<EFBFBD> spk_embed<65>spk_ids<64>
pitch_type<EFBFBD>cwt<77>cwt_spec<65>f0_mean<61>f0_std<74>f0_cwt)rArF<00>ref_melsrBrCrD<00>infer<65> diff_loss<73>mel<65>dur)<01>losses<65>use_pitch_embed<65>use_energy_embed<65> energy_pred)r<00>get<65> cwt2f0_norm<72> add_dur_loss<73>add_pitch_loss<73>add_energy_loss)rr(<00>sample<6C> return_outputrOr?<00>targetrArBrCrDrFrJrKrL<00>outputrSrrr<00> run_model0s< <02> zDiffSpeechTask.run_modelc
Csi}|d}|d}tds&|<01>d<04>n|<01>d<05>}|d}|d}|d} i|d <|j|j|d
d d <0C>\|d <}
t|d <00><05><00>|d <|d|d<t<06>|<03>}|tdk<00>r |j||||| |dd
d<10>}
t|d|dt<00>} |j ||d|
dd
| |
<EFBFBD>d<13>d<14>|<00>
||d|
d<00>|S)Nr?rDrErFrGrArBrCrSTF)r]rO<00>
total_loss<EFBFBD>nsamples<65>num_valid_plots)rFrArBrCrDrNrOr@<00>mel_out<75> f0_denorm)<03>is_mel<65>gt_f0rB) rrWr`r(<00>sum<75>valuesr+<00>tensors_to_scalarsr
<00>plot_wav<61>plot_mel) rr\<00> batch_idx<64>outputsr?rDrFrArBrC<00> model_outrgrrr<00>validation_stepPs6 
<02>$zDiffSpeechTask.validation_stepNcCs<>|d<00><00><00><01>}|d<00><00><00><01>}|d<00><00><00><01>}|d<00><00><00><01>}|rd|jj||d<02>}|jj||d<02>}|jjjd|<01><00>|td|jd<05>|jjjd|<01><00>|td|jd<05>dS)Nr)rB<00>gt_<74>audio_sample_rate)<02> sample_rate<74> global_stepZwav_) <09>cpu<70>numpyr<00>spec2wav<61>loggerZ
experimentZ add_audiorrt)rrmZgt_wav<61>wav_outrfrgrB<00>namerrrrkps"zDiffSpeechTask.plot_wav)FF)FNNN)
<EFBFBD>__name__<5F>
__module__<EFBFBD> __qualname__rr3r>r`rprk<00> __classcell__rrrrrs  
 r)r8r+<00> utils.hparamsr<00>diff.netr<00>diff.shallow_diffusion_ttsr<00>taskr<00>vocoders.base_vocoderrr <00>utils.pitch_utilsr
<00>tasks.tts.fs2_utilsr r*rrrrr<00><module>s      <06>