2023-06-12 11:00:04 +08:00
"""
@author : Jiaxin Ye
@contact : jiaxin - ye @foxmail.com
"""
# -*- coding:UTF-8 -*-
import numpy as np
import os
import tensorflow as tf
from emotion_encoder . Model import TIMNET_Model
import argparse
from emotion_encoder . utils import get_mfcc
from tqdm import tqdm
parser = argparse . ArgumentParser ( )
2023-06-23 10:20:11 +08:00
parser . add_argument ( ' --test_path ' , type = str , default = ' saved_models/default/INTERSECT_46_dilation_8_dropout_05_add_esd_npairLoss ' )
2023-06-26 15:17:15 +08:00
parser . add_argument ( ' --data ' , type = str , default = ' ESD_test ' )
2023-06-12 11:00:04 +08:00
parser . add_argument ( ' --lr ' , type = float , default = 0.001 )
parser . add_argument ( ' --beta1 ' , type = float , default = 0.93 )
parser . add_argument ( ' --beta2 ' , type = float , default = 0.98 )
2023-06-19 19:18:34 +08:00
parser . add_argument ( ' --dropout ' , type = float , default = 0.5 )
2023-06-12 11:00:04 +08:00
parser . add_argument ( ' --random_seed ' , type = int , default = 46 )
parser . add_argument ( ' --activation ' , type = str , default = ' relu ' )
parser . add_argument ( ' --filter_size ' , type = int , default = 39 )
parser . add_argument ( ' --dilation_size ' , type = int , default = 8 ) # If you want to train model on IEMOCAP, you should modify this parameter to 10 due to the long duration of speech signals.
parser . add_argument ( ' --bidirection ' , type = bool , default = True )
parser . add_argument ( ' --kernel_size ' , type = int , default = 2 )
parser . add_argument ( ' --stack_size ' , type = int , default = 1 )
parser . add_argument ( ' --split_fold ' , type = int , default = 10 )
parser . add_argument ( ' --gpu ' , type = str , default = ' 0 ' )
args = parser . parse_args ( )
if args . data == " IEMOCAP " and args . dilation_size != 10 :
args . dilation_size = 10
2023-06-19 10:42:03 +08:00
else :
2023-06-12 11:00:04 +08:00
args . dilation_size = 8
os . environ [ ' CUDA_VISIBLE_DEVICES ' ] = args . gpu
gpus = tf . config . experimental . list_physical_devices ( device_type = ' GPU ' )
config = tf . compat . v1 . ConfigProto ( )
config . gpu_options . allow_growth = True
session = tf . compat . v1 . Session ( config = config )
print ( f " ###gpus: { gpus } " )
data = np . load ( " emotion_encoder/MFCC/ " + args . data + " .npy " , allow_pickle = True ) . item ( )
x_source = data [ " x " ]
y_source = np . argmax ( data [ " y " ] , axis = 1 )
CLASS_LABELS_finetune = ( " angry " , " fear " , " happy " , " neutral " , " sad " )
CASIA_CLASS_LABELS = ( " angry " , " fear " , " happy " , " neutral " , " sad " , " surprise " ) #CASIA
EMODB_CLASS_LABELS = ( " angry " , " boredom " , " disgust " , " fear " , " happy " , " neutral " , " sad " ) #EMODB
SAVEE_CLASS_LABELS = ( " angry " , " disgust " , " fear " , " happy " , " neutral " , " sad " , " surprise " ) #SAVEE
RAVDE_CLASS_LABELS = ( " angry " , " calm " , " disgust " , " fear " , " happy " , " neutral " , " sad " , " surprise " ) #rav
IEMOCAP_CLASS_LABELS = ( " angry " , " happy " , " neutral " , " sad " ) #iemocap
EMOVO_CLASS_LABELS = ( " angry " , " disgust " , " fear " , " happy " , " neutral " , " sad " , " surprise " ) #emovo
2023-06-19 19:18:34 +08:00
INTERSECT_CLASS_LABELS = ( " angry " , " happy " , " neutral " , " sad " , " surprise " )
2023-06-19 10:42:03 +08:00
ESD_CLASS_LABELS = ( " angry " , " happy " , " neutral " , " sad " , " surprise " )
2023-06-12 11:00:04 +08:00
CLASS_LABELS_dict = { " CASIA " : CASIA_CLASS_LABELS ,
" EMODB " : EMODB_CLASS_LABELS ,
" EMOVO " : EMOVO_CLASS_LABELS ,
" IEMOCAP " : IEMOCAP_CLASS_LABELS ,
" RAVDE " : RAVDE_CLASS_LABELS ,
" SAVEE " : SAVEE_CLASS_LABELS ,
" INTERSECT " : INTERSECT_CLASS_LABELS }
2023-06-19 10:42:03 +08:00
CLASS_LABELS = CLASS_LABELS_dict [ " INTERSECT " ]
2023-06-12 11:00:04 +08:00
model = TIMNET_Model ( args = args , input_shape = x_source . shape [ 1 : ] , class_label = CLASS_LABELS )
2023-06-15 16:40:30 +08:00
model . create_model ( )
x_feats = model . infer ( x_source , model_dir = args . test_path )
2023-06-12 11:00:04 +08:00
import matplotlib . pyplot as plt
from sklearn . manifold import TSNE
2023-06-26 15:17:15 +08:00
import umap
2023-06-12 11:00:04 +08:00
import pandas as pd
import seaborn as sns
2023-06-26 15:17:15 +08:00
# We want to get UMAP embedding with 2 dimensions
reducer = umap . UMAP ( int ( np . ceil ( np . sqrt ( y_source . size ) ) ) , metric = " cosine " )
umap_result = reducer . fit_transform ( x_feats )
print ( umap_result . shape )
2023-06-12 11:00:04 +08:00
# (1000, 2)
# Two dimensions for each of our images
2023-06-26 15:17:15 +08:00
# Plot the result of our UMAP with the label color coded
2023-06-12 11:00:04 +08:00
# A lot of the stuff here is about making the plot look pretty and not TSNE
2023-06-26 15:17:15 +08:00
umap_result_df = pd . DataFrame ( { ' x ' : umap_result [ : , 0 ] , ' y ' : umap_result [ : , 1 ] , ' label ' : y_source } )
umap_result_df [ " label " ] = umap_result_df [ " label " ] . apply ( lambda x : ESD_CLASS_LABELS [ x ] )
2023-06-12 11:00:04 +08:00
fig , ax = plt . subplots ( 1 )
2023-06-26 15:17:15 +08:00
sns . scatterplot ( x = ' x ' , y = ' y ' , hue = ' label ' , data = umap_result_df , ax = ax , s = 40 )
lim = ( umap_result . min ( ) - 5 , umap_result . max ( ) + 5 )
2023-06-12 11:00:04 +08:00
ax . set_xlim ( lim )
ax . set_ylim ( lim )
ax . set_aspect ( ' equal ' )
ax . legend ( bbox_to_anchor = ( 1.05 , 1 ) , loc = 2 , borderaxespad = 0.0 )
2023-06-26 15:17:15 +08:00
ax . set_title ( ' UMAP visualization of emotion speech test dataset ' )
2023-07-17 19:38:27 +08:00
if not os . path . exists ( " dim_reduction_results " ) :
os . mkdir ( " dim_reduction_results " )
plt . savefig ( " emotion_umap.png " , dpi = 500 )
plt . savefig ( " dim_reduction_results/emotion_umap.png " , dpi = 500 )