1
+ # -*- coding:utf-8 -*-
2
+
3
+ """
4
+ @author: Songgx
5
+ @file: 1200_mtt_2dCNN.py
6
+ @time: 2017/2/7 14:55
7
+ """
8
+
9
+ from __future__ import print_function
10
+
11
+ import numpy as np
12
+ import tensorflow as tf
13
+ from sklearn .metrics import roc_auc_score
14
+
15
+ # https://indico.io/blog/tensorflow-data-inputs-part1-placeholders-protobufs-queues/
16
+ top_50_tags_index = np .loadtxt ('data/top_50_tags.txt' , delimiter = ',' , skiprows = 0 , dtype = int )
17
+
18
+ # Parameters
19
+ x_height = 96
20
+ x_width = 1366
21
+
22
+ # 总共的tag数
23
+ n_total_tags = 50
24
+ learning_rate = 0.00001
25
+ training_epochs = 1000 * 200 # 1000 * 200 iterations, 200 epochs
26
+ display_step = 100
27
+ num_threads = 8
28
+ dropout = 0.5
29
+ #L2_norm = 1e-9
30
+ batch_size = 12
31
+
32
+
33
+ def read_and_decode (filename ):
34
+ filename_queue = tf .train .string_input_producer ([filename ])
35
+
36
+ reader = tf .TFRecordReader ()
37
+ _ , serialized_example = reader .read (filename_queue )
38
+ features = tf .parse_single_example (serialized_example ,
39
+ features = {
40
+ 'features_mel' : tf .FixedLenFeature ([], tf .string ),
41
+ 'label' : tf .FixedLenFeature ([n_total_tags ], tf .float32 ),
42
+ })
43
+
44
+ x = tf .decode_raw (features ['features_mel' ], tf .float32 )
45
+ x = tf .reshape (x , [x_height , x_width , 1 ])
46
+ y = tf .cast (features ['label' ], tf .float32 )
47
+ return x , y
48
+
49
+
50
+ def load_and_shuffle_to_batch_data (path , batch_size = batch_size ):
51
+ features , label = read_and_decode (path )
52
+ # 使用shuffle_batch可以随机打乱输入
53
+ audio_batch , label_batch = tf .train .shuffle_batch ([features , label ],
54
+ batch_size = batch_size , capacity = 2000 ,
55
+ min_after_dequeue = 1000 )
56
+ return audio_batch , label_batch
57
+
58
+
59
+ # tf Graph input
60
+ x = tf .placeholder (tf .float32 , (batch_size , x_height , x_width , 1 ), name = 'input_layer' )
61
+ y = tf .placeholder (tf .float32 , (batch_size , n_total_tags ), name = 'output_layer' )
62
+ keep_prob = tf .placeholder (tf .float32 ) #dropout (keep probability)
63
+ # phase_train = tf.placeholder(tf.bool, name='phase_train')
64
+
65
+
66
+ def batch_norm (x , n_out , phase_train , scope = 'bn' ):
67
+ with tf .variable_scope (scope ):
68
+ beta = tf .Variable (tf .constant (0.0 , shape = [n_out ]),name = 'beta' , trainable = True )
69
+ gamma = tf .Variable (tf .constant (1.0 , shape = [n_out ]),name = 'gamma' , trainable = True )
70
+ batch_mean , batch_var = tf .nn .moments (x , [0 ,1 ,2 ], name = 'moments' )
71
+ ema = tf .train .ExponentialMovingAverage (decay = 0.5 )
72
+
73
+ def mean_var_with_update ():
74
+ ema_apply_op = ema .apply ([batch_mean , batch_var ])
75
+ with tf .control_dependencies ([ema_apply_op ]):
76
+ return tf .identity (batch_mean ), tf .identity (batch_var )
77
+
78
+ mean , var = tf .cond (phase_train ,
79
+ mean_var_with_update ,
80
+ lambda : (ema .average (batch_mean ), ema .average (batch_var )))
81
+ normed = tf .nn .batch_normalization (x , mean , var , beta , gamma , 1e-3 )
82
+ return normed
83
+
84
+
85
+ # Create model
86
+ def conv_net (x , weights , phase_train = np .array (True )):
87
+
88
+ conv2_1 = tf .add (tf .nn .conv2d (x , weights ['wconv1' ], strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), weights ['bconv1' ])
89
+ conv2_1 = tf .nn .relu (batch_norm (conv2_1 , 128 , phase_train ))
90
+ mpool_1 = tf .nn .max_pool (conv2_1 , ksize = [1 , 2 , 4 , 1 ], strides = [1 , 2 , 4 , 1 ], padding = 'VALID' )
91
+ dropout_1 = tf .nn .dropout (mpool_1 , 0.5 )
92
+
93
+ conv2_2 = tf .add (tf .nn .conv2d (dropout_1 , weights ['wconv2' ], strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), weights ['bconv2' ])
94
+ conv2_2 = tf .nn .relu (batch_norm (conv2_2 , 384 , phase_train ))
95
+ mpool_2 = tf .nn .max_pool (conv2_2 , ksize = [1 , 4 , 5 , 1 ], strides = [1 , 4 , 5 , 1 ], padding = 'VALID' )
96
+ dropout_2 = tf .nn .dropout (mpool_2 , 0.5 )
97
+
98
+ conv2_3 = tf .add (tf .nn .conv2d (dropout_2 , weights ['wconv3' ], strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), weights ['bconv3' ])
99
+ conv2_3 = tf .nn .relu (batch_norm (conv2_3 , 768 , phase_train ))
100
+ mpool_3 = tf .nn .max_pool (conv2_3 , ksize = [1 , 3 , 8 , 1 ], strides = [1 , 3 , 8 , 1 ], padding = 'VALID' )
101
+ dropout_3 = tf .nn .dropout (mpool_3 , 0.5 )
102
+
103
+ conv2_4 = tf .add (tf .nn .conv2d (dropout_3 , weights ['wconv4' ], strides = [1 , 1 , 1 , 1 ], padding = 'SAME' ), weights ['bconv4' ])
104
+ conv2_4 = tf .nn .relu (batch_norm (conv2_4 , 2048 , phase_train ))
105
+ mpool_4 = tf .nn .max_pool (conv2_4 , ksize = [1 , 4 , 8 , 1 ], strides = [1 , 4 , 8 , 1 ], padding = 'VALID' )
106
+ dropout_4 = tf .nn .dropout (mpool_4 , 0.5 )
107
+
108
+ flat = tf .reshape (dropout_4 , [- 1 , weights ['woutput' ].get_shape ().as_list ()[0 ]])
109
+ fc_out = tf .nn .sigmoid (tf .add (tf .matmul (flat , weights ['woutput' ]), weights ['boutput' ]))
110
+
111
+ return fc_out
112
+
113
+ # Store layers weight & bias
114
+ def init_weights (shape ):
115
+ return tf .Variable (tf .random_normal (shape , stddev = 0.01 ))
116
+
117
+
118
+ def init_biases (shape ):
119
+ return tf .Variable (tf .zeros (shape ))
120
+
121
+
122
+ def get_roc_auc_scores (tags , logits ):
123
+ final_acc = 0.
124
+ num = batch_size
125
+ for i in range (batch_size ):
126
+ cur_tag_array = tags [i ]
127
+ cur_logits_array = logits [i ]
128
+ if is_zeros (cur_tag_array ):
129
+ if num == 1 :
130
+ continue
131
+ else :
132
+ num = num - 1
133
+ continue
134
+ roc_auc = roc_auc_score (cur_tag_array , cur_logits_array )
135
+ final_acc += roc_auc
136
+ return final_acc / num
137
+
138
+
139
+ def is_zeros (arr ):
140
+ for element in arr :
141
+ if element != 0 :
142
+ return False
143
+ return True
144
+
145
+ weights = {
146
+ 'wconv1' : init_weights ([3 , 3 , 1 , 128 ]),
147
+ 'wconv2' : init_weights ([3 , 3 , 128 , 384 ]),
148
+ 'wconv3' : init_weights ([3 , 3 , 384 , 768 ]),
149
+ 'wconv4' : init_weights ([3 , 3 , 768 , 2048 ]),
150
+ 'bconv1' : init_biases ([128 ]),
151
+ 'bconv2' : init_biases ([384 ]),
152
+ 'bconv3' : init_biases ([768 ]),
153
+ 'bconv4' : init_biases ([2048 ]),
154
+ 'woutput' : init_weights ([2048 , 50 ]),
155
+ 'boutput' : init_biases ([50 ])
156
+ }
157
+
158
+
159
+ # Construct model
160
+ logits = conv_net (x , weights )
161
+
162
+
163
+ # Define loss and optimizer & correct_prediction
164
+
165
+ # NaN bug
166
+ #cross_entropy = -tf.reduce_sum(y * tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
167
+
168
+ # cross_entropy_loss with L2 norm
169
+ # cross_entropy_loss = -tf.reduce_sum(y * tf.log(logits) + L2_norm * tf.nn.l2_loss(weights['wd1']))
170
+ cross_entropy_loss = tf .reduce_sum (tf .nn .softmax_cross_entropy_with_logits (logits , y ))
171
+
172
+ optimizer = tf .train .AdamOptimizer (learning_rate = learning_rate ).minimize (cross_entropy_loss )
173
+ # load data
174
+ audio_batch_training , label_batch_training = load_and_shuffle_to_batch_data ("data/merge/mtt_mel_training_filtered.tfrecords" , batch_size )
175
+ audio_batch_validation , label_batch_validation = load_and_shuffle_to_batch_data ("data/merge/mtt_mel_validation_filtered.tfrecords" , batch_size )
176
+ audio_batch_test , label_batch_test = load_and_shuffle_to_batch_data ("data/merge/mtt_mel_test_filtered.tfrecords" , batch_size )
177
+
178
+ # Launch the graph
179
+ # Initializing the variables
180
+ init = tf .global_variables_initializer ()
181
+ saver = tf .train .Saver ()
182
+
183
+ # Launch the graph
184
+ with tf .Session () as sess :
185
+ sess .run (init )
186
+
187
+ # Start input enqueue threads.
188
+ coord = tf .train .Coordinator ()
189
+ threads = tf .train .start_queue_runners (sess = sess , coord = coord )
190
+ # for epoch in range(int(8000/batch_size)):
191
+ valdation_accuracy_final = 0.
192
+ for epoch in range (training_epochs ):
193
+ # pass it in through the feed_dict
194
+ audio_batch_vals_training , label_batch_vals_training = sess .run ([audio_batch_training , label_batch_training ])
195
+ _ , loss_val , pred_ = sess .run ([optimizer , cross_entropy_loss , logits ], feed_dict = {x :audio_batch_vals_training , y :label_batch_vals_training , keep_prob : dropout })
196
+
197
+ #print("Epoch:", '%06d' % (epoch + 1), "cost=", "{:.9f}".format(loss_val))
198
+ #print(pred_, label_batch_vals_training)
199
+
200
+ # calculate accuracy at each display step
201
+ if (epoch + 1 ) % display_step == 0 :
202
+ validation_iterations = 100
203
+ cur_validation_acc = 0.
204
+ for _ in range (validation_iterations ):
205
+ audio_batch_validation_vals , label_batch_validation_vals = sess .run ([audio_batch_validation , label_batch_validation ])
206
+
207
+ logits_validation , loss_val_validation = sess .run ([logits , cross_entropy_loss ], feed_dict = {
208
+ x : audio_batch_validation_vals ,
209
+ y : label_batch_validation_vals ,
210
+ keep_prob : 1.0
211
+ })
212
+ validation_accuracy = get_roc_auc_scores (label_batch_validation_vals , logits_validation )
213
+ cur_validation_acc += validation_accuracy
214
+
215
+ cur_validation_acc /= validation_iterations
216
+ print ("iter %d, training loss: %f, validation accuracy: %f" % ((epoch + 1 ), loss_val , cur_validation_acc ))
217
+ print ("######### Training finished. #########" )
218
+
219
+ # Test model
220
+ # batch_test --> reduce_mean --> final_test_accuracy
221
+
222
+ test_epochs = 400
223
+ test_accuracy_final = 0.
224
+ for _ in range (test_epochs ):
225
+ audio_test_vals , label_test_vals = sess .run ([audio_batch_test , label_batch_test ])
226
+ logits_test , test_loss_val = sess .run ([logits , cross_entropy_loss ], feed_dict = {x : audio_test_vals , y :label_test_vals , keep_prob : 1.0 })
227
+ test_accuracy = get_roc_auc_scores (label_test_vals , logits_test )
228
+ test_accuracy_final += test_accuracy
229
+ print ("test epoch: %d, test loss: %f, test accuracy: %f" % (_ , test_loss_val , test_accuracy ))
230
+ test_accuracy_final /= test_epochs
231
+ print ("final test accuracy: %f" % test_accuracy_final )
232
+
233
+ coord .request_stop ()
234
+ coord .join (threads )
235
+ sess .close ()
236
+
237
+ # console results is in FCN-5.pdf
0 commit comments