|
| 1 | +# -*- coding:utf-8 -*- |
| 2 | + |
| 3 | +""" |
| 4 | +@author: Songgx |
| 5 | +@file: 0700_raw_CNN_overfitting.py |
| 6 | +@time: 2017/1/7 16:21 |
| 7 | +""" |
| 8 | + |
| 9 | +from __future__ import print_function |
| 10 | + |
| 11 | +import numpy as np |
| 12 | +import tensorflow as tf |
| 13 | + |
| 14 | + |
| 15 | +def dense_to_one_hot(labels_dense, num_classes=10): |
| 16 | + """Convert class labels from scalars to one-hot vectors.""" |
| 17 | + num_labels = labels_dense.shape[0] |
| 18 | + index_offset = np.arange(num_labels) * num_classes |
| 19 | + labels_one_hot = np.zeros((num_labels, num_classes)) |
| 20 | + labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 |
| 21 | + return labels_one_hot |
| 22 | + |
| 23 | + |
| 24 | +def read_and_decode(filename): |
| 25 | + filename_queue = tf.train.string_input_producer([filename]) |
| 26 | + |
| 27 | + reader = tf.TFRecordReader() |
| 28 | + _, serialized_example = reader.read(filename_queue) |
| 29 | + features = tf.parse_single_example(serialized_example, |
| 30 | + features={ |
| 31 | + 'label': tf.FixedLenFeature([n_classes], tf.float32), |
| 32 | + 'features_raw': tf.FixedLenFeature([x_len], tf.float32), |
| 33 | + }) |
| 34 | + |
| 35 | + x = tf.cast(features['features_raw'], tf.float32) |
| 36 | + y = tf.cast(features['label'], tf.float32) |
| 37 | + return x, y |
| 38 | + |
| 39 | +# Parameters |
| 40 | +x_len = 131072 |
| 41 | +learning_rate = 0.001 |
| 42 | +training_epochs = 2000 |
| 43 | +display_step = 10 |
| 44 | +num_threads = 8 |
| 45 | +dropout = 0.75 |
| 46 | +L2_norm = 1e-9 |
| 47 | +batch_size = 100 |
| 48 | +training_size = 8000 |
| 49 | +test_size = 2000 |
| 50 | + |
| 51 | +n_classes = 10 # total classes (0-9 digits) |
| 52 | + |
| 53 | +# tf Graph input |
| 54 | + |
| 55 | +x = tf.placeholder(tf.float32, (batch_size, x_len), name='input_layer') |
| 56 | +y = tf.placeholder(tf.float32, (batch_size, n_classes), name='output_layer') |
| 57 | +keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) |
| 58 | + |
| 59 | +# Create some wrappers for simplicity |
| 60 | +def conv2d(x, W, b, name, strides=1): |
| 61 | + # Conv2D wrapper, with bias and relu activation |
| 62 | + # NHWC |
| 63 | + x = tf.nn.conv2d(x, W, strides=[1, 1, strides, 1], padding="VALID", name=name) |
| 64 | + x = tf.nn.bias_add(x, b) |
| 65 | + return tf.nn.relu(x) |
| 66 | + |
| 67 | + |
| 68 | +def maxpool2d(x, k=4): |
| 69 | + # MaxPool2D wrapper |
| 70 | + return tf.nn.max_pool(x, ksize=[1, 1, k, 1], strides=[1, 1, k, 1], |
| 71 | + padding="VALID") |
| 72 | + |
| 73 | +# Create model |
| 74 | +def conv_net(x, weights, biases, dropout): |
| 75 | + # Reshape input picture |
| 76 | + x = tf.reshape(x, shape=[-1, 1, x_len, 1]) # shape = (batch_size, 1, 131072, 1) |
| 77 | + |
| 78 | + # Convolution Layer 1 |
| 79 | + conv1 = conv2d(x ,weights['wc1'], biases['bc1'], 'conv1', strides=8) |
| 80 | + # Max Pooling (down-sampling) |
| 81 | + conv1 = maxpool2d(conv1, k=2) |
| 82 | + |
| 83 | + # shape = (batch_size, 1, 8192, 32) |
| 84 | + |
| 85 | + # Convolution Layer 2 |
| 86 | + # (8192-4+0)/4 + 1 = 2048 |
| 87 | + conv2 = conv2d(conv1, weights['wc2'], biases['bc2'], 'conv2', strides=4) |
| 88 | + # Max Pooling (down-sampling) |
| 89 | + # 2048/2 = 1024 |
| 90 | + conv2 = maxpool2d(conv2, k=2) |
| 91 | + # shape = (batch_size, 1, 1024, 64) |
| 92 | + |
| 93 | + # Convolution Layer 3 |
| 94 | + # (1024-2+0)/2 + 1 = 512 |
| 95 | + conv3 = conv2d(conv2, weights['wc3'], biases['bc3'], 'conv3', strides=2) |
| 96 | + # Max Pooling (down-sampling) |
| 97 | + # 512/2 = 256 |
| 98 | + conv3 = maxpool2d(conv3, k=2) |
| 99 | + # shape = (batch_size, 1, 256, 128) |
| 100 | + |
| 101 | + # Fully connected layer |
| 102 | + # Reshape conv3 output to fit fully connected layer input |
| 103 | + # 128 * 128 = 16384 |
| 104 | + fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]]) |
| 105 | + fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) |
| 106 | + fc1 = tf.nn.relu(fc1) |
| 107 | + # Apply Dropout |
| 108 | + fc1 = tf.nn.dropout(fc1, dropout) |
| 109 | + |
| 110 | + # Output, class prediction |
| 111 | + fc_out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) |
| 112 | + |
| 113 | + fc_out = tf.divide(fc_out, 10e9) |
| 114 | + # softmax output |
| 115 | + out = tf.nn.softmax(fc_out) |
| 116 | + return out |
| 117 | + |
| 118 | + |
| 119 | +def accuracy(logits, labels): |
| 120 | + correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) |
| 121 | + accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) |
| 122 | + tf.scalar_summary("accuracy", accuracy) |
| 123 | + return accuracy |
| 124 | + |
| 125 | +# Store layers weight & bias |
| 126 | +weights = { |
| 127 | + # height*width*depth 1*8*1 conv, 1 input, 32 outputs |
| 128 | + 'wc1': tf.Variable(tf.random_normal([1, 8, 1, 32])), |
| 129 | + # h*w*d 1*4*32 conv, 32 inputs, 64 outputs |
| 130 | + 'wc2': tf.Variable(tf.random_normal([1, 4, 32, 64])), |
| 131 | + # h*w*d 1*2*64 conv, 64 inputs, 128 outputs |
| 132 | + 'wc3': tf.Variable(tf.random_normal([1, 2, 64, 128])), |
| 133 | + # fully connected, 256*128 inputs, 1024 outputs |
| 134 | + 'wd1': tf.Variable(tf.random_normal([256*128, 512])), |
| 135 | + # 1024 inputs, 10 outputs (class prediction) |
| 136 | + 'out': tf.Variable(tf.random_normal([512, n_classes])) |
| 137 | +} |
| 138 | + |
| 139 | +biases = { |
| 140 | + 'bc1': tf.Variable(tf.zeros([32])), |
| 141 | + 'bc2': tf.Variable(tf.zeros([64])), |
| 142 | + 'bc3': tf.Variable(tf.zeros([128])), |
| 143 | + 'bd1': tf.Variable(tf.zeros([512])), |
| 144 | + 'out': tf.Variable(tf.zeros([n_classes])) |
| 145 | +} |
| 146 | + |
| 147 | +# Construct model |
| 148 | +logits = conv_net(x, weights, biases, keep_prob) |
| 149 | + |
| 150 | + |
| 151 | +# Define loss and optimizer & correct_prediction |
| 152 | + |
| 153 | +# NaN bug |
| 154 | +#cross_entropy = -tf.reduce_sum(y * tf.log(tf.clip_by_value(logits, 1e-10, 1.0))) |
| 155 | + |
| 156 | +# cross_entropy_loss with L2 norm |
| 157 | +# cross_entropy_loss = -tf.reduce_sum(y * tf.log(logits) + L2_norm * tf.nn.l2_loss(weights['wd1'])) |
| 158 | +cross_entropy_loss = -tf.reduce_sum(y * tf.log(logits)) |
| 159 | +tf.scalar_summary("cross_entropy", cross_entropy_loss) |
| 160 | + |
| 161 | +# accuracy |
| 162 | +acc = accuracy(logits, y) |
| 163 | + |
| 164 | +optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy_loss) |
| 165 | + |
| 166 | +# Launch the graph |
| 167 | + |
| 168 | +features, label = read_and_decode("data/merge/raw_data_training.tfrecords") |
| 169 | +features_test, label_test = read_and_decode("data/merge/raw_data_test.tfrecords") |
| 170 | + |
| 171 | +#使用shuffle_batch可以随机打乱输入 |
| 172 | +audio_batch, label_batch = tf.train.shuffle_batch([features, label], |
| 173 | + batch_size=batch_size, capacity=2000, |
| 174 | + min_after_dequeue=1000) |
| 175 | + |
| 176 | +audio_batch_test, label_batch_test = tf.train.shuffle_batch([features_test, label_test], |
| 177 | + batch_size=batch_size, capacity=2000, |
| 178 | + min_after_dequeue=1000) |
| 179 | + |
| 180 | +# Initializing the variables |
| 181 | +init = tf.global_variables_initializer() |
| 182 | + |
| 183 | +saver = tf.train.Saver() |
| 184 | + |
| 185 | + |
| 186 | + |
| 187 | +# Launch the graph |
| 188 | +with tf.Session() as sess: |
| 189 | + sess.run(init) |
| 190 | + |
| 191 | + # for TensorBoard |
| 192 | + summary_op = tf.merge_all_summaries() |
| 193 | + summary_writer = tf.train.SummaryWriter('model/', sess.graph) |
| 194 | + |
| 195 | + # Start input enqueue threads. |
| 196 | + coord = tf.train.Coordinator() |
| 197 | + threads = tf.train.start_queue_runners(sess=sess, coord=coord) |
| 198 | + # for epoch in range(int(8000/batch_size)): |
| 199 | + for epoch in range(training_epochs): |
| 200 | + # pass it in through the feed_dict |
| 201 | + audio_batch_vals, label_batch_vals = sess.run([audio_batch, label_batch]) |
| 202 | + |
| 203 | + _, loss_val, pred_ = sess.run([optimizer, cross_entropy_loss, logits], feed_dict={x:audio_batch_vals, y:label_batch_vals, keep_prob: dropout}) |
| 204 | + |
| 205 | + #print("Epoch:", '%06d' % (epoch + 1), "cost=", "{:.9f}".format(loss_val)) |
| 206 | + #print(pred_, label_batch_vals) |
| 207 | + |
| 208 | + # calculate accuracy at each step |
| 209 | + if (epoch+1) % display_step == 0: |
| 210 | + train_accuracy = sess.run(acc, feed_dict={x:audio_batch_vals, y:label_batch_vals, keep_prob:1.0}) |
| 211 | + print ("training epoch: %d, mini-batch loss: %f, mini-batch training accuracy: %f" % ((epoch+1), loss_val, train_accuracy)) |
| 212 | + # print(pred_, label_batch_vals) |
| 213 | + #print(sess.run(weights)) |
| 214 | + |
| 215 | + # add value for Tensorboard at each step |
| 216 | + #summary_str = sess.run(summary_op, feed_dict={x:audio_batch_vals, y:label_batch_vals, keep_prob: 1.0}) |
| 217 | + #summary_writer.add_summary(summary_str, (epoch+1)) |
| 218 | + save_path = saver.save(sess, "model/model_cnn_raw_data.ckpt") |
| 219 | + print("######### Training finished && model saved. #########") |
| 220 | + |
| 221 | + # Test model |
| 222 | + # batch_test --> reduce_mean --> final_test_accuracy |
| 223 | + |
| 224 | + test_epochs = int(test_size / batch_size) |
| 225 | + test_accuracy_final = 0. |
| 226 | + for _ in range(test_epochs): |
| 227 | + audio_test_vals, label_test_vals = sess.run([audio_batch_test, label_batch_test]) |
| 228 | + test_accuracy = sess.run(acc, feed_dict={x: audio_test_vals, y: label_test_vals, keep_prob: 1.0}) |
| 229 | + test_accuracy_final += test_accuracy |
| 230 | + print("test epoch: %d, test accuracy: %f" % (_, test_accuracy)) |
| 231 | + test_accuracy_final /= test_epochs |
| 232 | + print("test accuracy %f" % test_accuracy_final) |
| 233 | + |
| 234 | + coord.request_stop() |
| 235 | + coord.join(threads) |
| 236 | + sess.close() |
0 commit comments