Skip to content

Commit f128dac

Browse files
author
GuangxiaoSong
committed
FCN 4 layers & multi-layer BNLSTM
1 parent f1f884e commit f128dac

20 files changed

+2025
-44
lines changed

1100_roc_auc_hw.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import numpy as np
1010
from sklearn.metrics import roc_auc_score
11-
y_true = np.array([0, 0, 0, 0])
12-
y_scores = np.array([0.1, 0.4, 0.35, 0.8])
13-
print(roc_auc_score(y_true, y_scores, average="samples"))
11+
y_true = np.array([1., 1., 0., 0.])
12+
# y_scores = np.array([0.1, 0.4, 0.35, 0.8])
13+
y_scores = np.array([0.9, 0.9, 0.1, 0.1])
14+
print(roc_auc_score(y_true, y_scores, average='samples'))

1201_mtt_2dCNN_4layers.py

+237
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
# -*- coding:utf-8 -*-
2+
3+
"""
4+
@author: Songgx
5+
@file: 1200_mtt_2dCNN.py
6+
@time: 2017/2/7 14:55
7+
"""
8+
9+
from __future__ import print_function
10+
11+
import numpy as np
12+
import tensorflow as tf
13+
from sklearn.metrics import roc_auc_score
14+
15+
# https://indico.io/blog/tensorflow-data-inputs-part1-placeholders-protobufs-queues/
16+
top_50_tags_index = np.loadtxt('data/top_50_tags.txt', delimiter=',', skiprows=0, dtype=int)
17+
18+
# Parameters
19+
x_height = 96
20+
x_width = 1366
21+
22+
# 总共的tag数
23+
n_total_tags = 50
24+
learning_rate = 0.00001
25+
training_epochs = 1000 * 200 # 1000 * 200 iterations, 200 epochs
26+
display_step = 100
27+
num_threads = 8
28+
dropout = 0.5
29+
#L2_norm = 1e-9
30+
batch_size = 12
31+
32+
33+
def read_and_decode(filename):
34+
filename_queue = tf.train.string_input_producer([filename])
35+
36+
reader = tf.TFRecordReader()
37+
_, serialized_example = reader.read(filename_queue)
38+
features = tf.parse_single_example(serialized_example,
39+
features={
40+
'features_mel': tf.FixedLenFeature([], tf.string),
41+
'label': tf.FixedLenFeature([n_total_tags], tf.float32),
42+
})
43+
44+
x = tf.decode_raw(features['features_mel'], tf.float32)
45+
x = tf.reshape(x, [x_height, x_width, 1])
46+
y = tf.cast(features['label'], tf.float32)
47+
return x, y
48+
49+
50+
def load_and_shuffle_to_batch_data(path, batch_size=batch_size):
51+
features, label = read_and_decode(path)
52+
# 使用shuffle_batch可以随机打乱输入
53+
audio_batch, label_batch = tf.train.shuffle_batch([features, label],
54+
batch_size=batch_size, capacity=2000,
55+
min_after_dequeue=1000)
56+
return audio_batch, label_batch
57+
58+
59+
# tf Graph input
60+
x = tf.placeholder(tf.float32, (batch_size, x_height, x_width, 1), name='input_layer')
61+
y = tf.placeholder(tf.float32, (batch_size, n_total_tags), name='output_layer')
62+
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
63+
# phase_train = tf.placeholder(tf.bool, name='phase_train')
64+
65+
66+
def batch_norm(x, n_out, phase_train, scope='bn'):
67+
with tf.variable_scope(scope):
68+
beta = tf.Variable(tf.constant(0.0, shape=[n_out]),name='beta', trainable=True)
69+
gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),name='gamma', trainable=True)
70+
batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
71+
ema = tf.train.ExponentialMovingAverage(decay=0.5)
72+
73+
def mean_var_with_update():
74+
ema_apply_op = ema.apply([batch_mean, batch_var])
75+
with tf.control_dependencies([ema_apply_op]):
76+
return tf.identity(batch_mean), tf.identity(batch_var)
77+
78+
mean, var = tf.cond(phase_train,
79+
mean_var_with_update,
80+
lambda: (ema.average(batch_mean), ema.average(batch_var)))
81+
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
82+
return normed
83+
84+
85+
# Create model
86+
def conv_net(x, weights, phase_train=np.array(True)):
87+
88+
conv2_1 = tf.add(tf.nn.conv2d(x, weights['wconv1'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv1'])
89+
conv2_1 = tf.nn.relu(batch_norm(conv2_1, 128, phase_train))
90+
mpool_1 = tf.nn.max_pool(conv2_1, ksize=[1, 2, 4, 1], strides=[1, 2, 4, 1], padding='VALID')
91+
dropout_1 = tf.nn.dropout(mpool_1, 0.5)
92+
93+
conv2_2 = tf.add(tf.nn.conv2d(dropout_1, weights['wconv2'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv2'])
94+
conv2_2 = tf.nn.relu(batch_norm(conv2_2, 384, phase_train))
95+
mpool_2 = tf.nn.max_pool(conv2_2, ksize=[1, 4, 5, 1], strides=[1, 4, 5, 1], padding='VALID')
96+
dropout_2 = tf.nn.dropout(mpool_2, 0.5)
97+
98+
conv2_3 = tf.add(tf.nn.conv2d(dropout_2, weights['wconv3'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv3'])
99+
conv2_3 = tf.nn.relu(batch_norm(conv2_3, 768, phase_train))
100+
mpool_3 = tf.nn.max_pool(conv2_3, ksize=[1, 3, 8, 1], strides=[1, 3, 8, 1], padding='VALID')
101+
dropout_3 = tf.nn.dropout(mpool_3, 0.5)
102+
103+
conv2_4 = tf.add(tf.nn.conv2d(dropout_3, weights['wconv4'], strides=[1, 1, 1, 1], padding='SAME'), weights['bconv4'])
104+
conv2_4 = tf.nn.relu(batch_norm(conv2_4, 2048, phase_train))
105+
mpool_4 = tf.nn.max_pool(conv2_4, ksize=[1, 4, 8, 1], strides=[1, 4, 8, 1], padding='VALID')
106+
dropout_4 = tf.nn.dropout(mpool_4, 0.5)
107+
108+
flat = tf.reshape(dropout_4, [-1, weights['woutput'].get_shape().as_list()[0]])
109+
fc_out = tf.nn.sigmoid(tf.add(tf.matmul(flat, weights['woutput']), weights['boutput']))
110+
111+
return fc_out
112+
113+
# Store layers weight & bias
114+
def init_weights(shape):
115+
return tf.Variable(tf.random_normal(shape, stddev=0.01))
116+
117+
118+
def init_biases(shape):
119+
return tf.Variable(tf.zeros(shape))
120+
121+
122+
def get_roc_auc_scores(tags, logits):
123+
final_acc = 0.
124+
num = batch_size
125+
for i in range(batch_size):
126+
cur_tag_array = tags[i]
127+
cur_logits_array = logits[i]
128+
if is_zeros(cur_tag_array):
129+
if num == 1:
130+
continue
131+
else:
132+
num = num-1
133+
continue
134+
roc_auc = roc_auc_score(cur_tag_array, cur_logits_array)
135+
final_acc += roc_auc
136+
return final_acc/num
137+
138+
139+
def is_zeros(arr):
140+
for element in arr:
141+
if element != 0:
142+
return False
143+
return True
144+
145+
weights = {
146+
'wconv1': init_weights([3, 3, 1, 128]),
147+
'wconv2': init_weights([3, 3, 128, 384]),
148+
'wconv3': init_weights([3, 3, 384, 768]),
149+
'wconv4': init_weights([3, 3, 768, 2048]),
150+
'bconv1': init_biases([128]),
151+
'bconv2': init_biases([384]),
152+
'bconv3': init_biases([768]),
153+
'bconv4': init_biases([2048]),
154+
'woutput': init_weights([2048, 50]),
155+
'boutput': init_biases([50])
156+
}
157+
158+
159+
# Construct model
160+
logits = conv_net(x, weights)
161+
162+
163+
# Define loss and optimizer & correct_prediction
164+
165+
# NaN bug
166+
#cross_entropy = -tf.reduce_sum(y * tf.log(tf.clip_by_value(logits, 1e-10, 1.0)))
167+
168+
# cross_entropy_loss with L2 norm
169+
# cross_entropy_loss = -tf.reduce_sum(y * tf.log(logits) + L2_norm * tf.nn.l2_loss(weights['wd1']))
170+
cross_entropy_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits, y))
171+
172+
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy_loss)
173+
# load data
174+
audio_batch_training, label_batch_training = load_and_shuffle_to_batch_data("data/merge/mtt_mel_training_filtered.tfrecords", batch_size)
175+
audio_batch_validation, label_batch_validation = load_and_shuffle_to_batch_data("data/merge/mtt_mel_validation_filtered.tfrecords", batch_size)
176+
audio_batch_test, label_batch_test = load_and_shuffle_to_batch_data("data/merge/mtt_mel_test_filtered.tfrecords", batch_size)
177+
178+
# Launch the graph
179+
# Initializing the variables
180+
init = tf.global_variables_initializer()
181+
saver = tf.train.Saver()
182+
183+
# Launch the graph
184+
with tf.Session() as sess:
185+
sess.run(init)
186+
187+
# Start input enqueue threads.
188+
coord = tf.train.Coordinator()
189+
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
190+
# for epoch in range(int(8000/batch_size)):
191+
valdation_accuracy_final = 0.
192+
for epoch in range(training_epochs):
193+
# pass it in through the feed_dict
194+
audio_batch_vals_training, label_batch_vals_training = sess.run([audio_batch_training, label_batch_training])
195+
_, loss_val, pred_ = sess.run([optimizer, cross_entropy_loss, logits], feed_dict={x:audio_batch_vals_training, y:label_batch_vals_training, keep_prob: dropout})
196+
197+
#print("Epoch:", '%06d' % (epoch + 1), "cost=", "{:.9f}".format(loss_val))
198+
#print(pred_, label_batch_vals_training)
199+
200+
# calculate accuracy at each display step
201+
if (epoch + 1) % display_step == 0:
202+
validation_iterations = 100
203+
cur_validation_acc = 0.
204+
for _ in range(validation_iterations):
205+
audio_batch_validation_vals, label_batch_validation_vals = sess.run([audio_batch_validation, label_batch_validation])
206+
207+
logits_validation, loss_val_validation = sess.run([logits, cross_entropy_loss], feed_dict={
208+
x: audio_batch_validation_vals,
209+
y: label_batch_validation_vals,
210+
keep_prob: 1.0
211+
})
212+
validation_accuracy = get_roc_auc_scores(label_batch_validation_vals, logits_validation)
213+
cur_validation_acc += validation_accuracy
214+
215+
cur_validation_acc /= validation_iterations
216+
print("iter %d, training loss: %f, validation accuracy: %f" % ((epoch + 1), loss_val, cur_validation_acc))
217+
print("######### Training finished. #########")
218+
219+
# Test model
220+
# batch_test --> reduce_mean --> final_test_accuracy
221+
222+
test_epochs = 400
223+
test_accuracy_final = 0.
224+
for _ in range(test_epochs):
225+
audio_test_vals, label_test_vals = sess.run([audio_batch_test, label_batch_test])
226+
logits_test, test_loss_val= sess.run([logits, cross_entropy_loss], feed_dict={x: audio_test_vals, y:label_test_vals, keep_prob: 1.0})
227+
test_accuracy = get_roc_auc_scores(label_test_vals, logits_test)
228+
test_accuracy_final += test_accuracy
229+
print("test epoch: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
230+
test_accuracy_final /= test_epochs
231+
print("final test accuracy: %f" % test_accuracy_final)
232+
233+
coord.request_stop()
234+
coord.join(threads)
235+
sess.close()
236+
237+
# console results is in FCN-5.pdf

1300_mtt_LSTM.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def get_roc_auc_scores(tags, logits):
157157
# print("test iter: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
158158
cur_validation_acc /= validation_epochs
159159
print("training iter: %d, mini-batch loss: %f, validation accuracy: %f" % (
160-
(epoch + 1), loss_val, validation_accuracy))
160+
(epoch + 1), loss_val, cur_validation_acc))
161161
# print(pred_, label_batch_vals)
162162
# print(sess.run(weights))
163163

1301_mtt_GRU.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
batch_size = 10
1212
num_steps = 96 # number of truncated backprop steps
13-
state_size = 1024
14-
learning_rate = 0.001
13+
state_size = 1536
14+
learning_rate = 0.000001
1515
training_epochs = 1500 * 150 # 1500 iterations, 150 epochs
1616
display_step = 100
1717
dropout = 0.75
@@ -119,6 +119,7 @@ def get_roc_auc_scores(tags, logits):
119119
audio_batch_test, label_batch_test = load_and_shuffle_to_batch_data("data/merge/mtt_mel_test_filtered.tfrecords", batch_size)
120120

121121
logits = RNN(x, weights, biases)
122+
pred_prob = tf.nn.softmax(logits)
122123
cross_entropy_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits, y))
123124
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy_loss)
124125

@@ -139,7 +140,7 @@ def get_roc_auc_scores(tags, logits):
139140
for epoch in range(training_epochs):
140141
# pass it in through the feed_dict
141142
audio_batch_vals_training, label_batch_vals_training = sess.run([audio_batch_training, label_batch_training])
142-
_, loss_val, pred_ = sess.run([optimizer, cross_entropy_loss, logits], feed_dict={x:audio_batch_vals_training, y:label_batch_vals_training})
143+
_, loss_val, pred_ = sess.run([optimizer, cross_entropy_loss, pred_prob], feed_dict={x:audio_batch_vals_training, y:label_batch_vals_training})
143144
#print(pred_, sess.run(weights))
144145
#print("Epoch:", '%06d' % (epoch + 1), "cost=", "{:.9f}".format(loss_val))
145146
#print(pred_, label_batch_vals_training)
@@ -150,15 +151,15 @@ def get_roc_auc_scores(tags, logits):
150151
for _ in range(validation_epochs):
151152
audio_batch_validation_vals, label_batch_validation_vals = sess.run(
152153
[audio_batch_validation, label_batch_validation])
153-
logits_validation, loss_val_validation = sess.run([logits, cross_entropy_loss],
154+
pred_prob_validation, loss_val_validation = sess.run([pred_prob, cross_entropy_loss],
154155
feed_dict={x: audio_batch_validation_vals,
155156
y: label_batch_validation_vals})
156-
validation_accuracy = get_roc_auc_scores(label_batch_validation_vals, logits_validation)
157+
validation_accuracy = get_roc_auc_scores(label_batch_validation_vals, pred_prob_validation)
157158
cur_validation_acc += validation_accuracy
158159
# print("test iter: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
159160
cur_validation_acc /= validation_epochs
160161
print("training iter: %d, mini-batch loss: %f, validation accuracy: %f" % (
161-
(epoch + 1), loss_val, validation_accuracy))
162+
(epoch + 1), loss_val, cur_validation_acc))
162163
# print(pred_, label_batch_vals)
163164
# print(sess.run(weights))
164165

@@ -175,8 +176,8 @@ def get_roc_auc_scores(tags, logits):
175176
test_accuracy_final = 0.
176177
for _ in range(test_epochs):
177178
audio_test_vals, label_test_vals = sess.run([audio_batch_test, label_batch_test])
178-
logits_test, test_loss_val= sess.run([logits, cross_entropy_loss], feed_dict={x: audio_test_vals, y:label_test_vals})
179-
test_accuracy = get_roc_auc_scores(label_test_vals, logits_test)
179+
pred_prob_test, test_loss_val= sess.run([pred_prob, cross_entropy_loss], feed_dict={x: audio_test_vals, y:label_test_vals})
180+
test_accuracy = get_roc_auc_scores(label_test_vals, pred_prob_test)
180181
test_accuracy_final += test_accuracy
181182
print("test epoch: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
182183
test_accuracy_final /= test_epochs

1302_mtt_PLSTM.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def get_roc_auc_scores(tags, logits):
157157
# print("test iter: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
158158
cur_validation_acc /= validation_epochs
159159
print("training iter: %d, mini-batch loss: %f, validation accuracy: %f" % (
160-
(epoch + 1), loss_val, validation_accuracy))
160+
(epoch + 1), loss_val, cur_validation_acc))
161161
# print(pred_, label_batch_vals)
162162
# print(sess.run(weights))
163163

1303_mtt_PGRU.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def get_roc_auc_scores(tags, logits):
156156
cur_validation_acc += validation_accuracy
157157
#print("test iter: %d, test loss: %f, test accuracy: %f" % (_, test_loss_val, test_accuracy))
158158
cur_validation_acc /= validation_epochs
159-
print("training iter: %d, mini-batch loss: %f, validation accuracy: %f" % ((epoch + 1), loss_val, validation_accuracy))
159+
print("training iter: %d, mini-batch loss: %f, validation accuracy: %f" % ((epoch + 1), loss_val, cur_validation_acc))
160160
# print(pred_, label_batch_vals)
161161
#print(sess.run(weights))
162162

1305_mtt_BNLSTM.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,15 @@ def RNN(x, weights, biases):
8282

8383
# Define a lstm cell with tensorflow
8484
lstm_cell = bnlstm.BNLSTMCell(state_size, True)
85+
tf.zeros_initializer()
86+
# c, h
87+
initialState = (
88+
tf.random_normal([batch_size, state_size], stddev=0.1),
89+
tf.random_normal([batch_size, state_size], stddev=0.1))
90+
8591
lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=dropout)
8692
# Get lstm cell output
87-
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32)
93+
outputs, states = tf.nn.rnn(lstm_cell, x,initial_state=initialState, dtype=tf.float32)
8894

8995
# Linear activation, using rnn inner loop last output
9096
return tf.matmul(outputs[-1], weights['out']) + biases['out']

0 commit comments

Comments
 (0)