forked from YCG09/tf-text-classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_rnn.py
111 lines (94 loc) · 5.92 KB
/
text_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#-*- coding:utf-8 -*-
import tensorflow as tf
import numpy as np
class TextRNN(object):
"""
RNN with Attention mechanism for text classification
"""
def __init__(self, vocab_size, embedding_size, sequence_length, rnn_size, num_layers,
attention_size, num_classes, learning_rate, grad_clip):
"""
- vocab_size : vocabulary size
- embedding_size: word embedding dimension
- sequence_length : sequence length after sentence padding
- rnn_size : hidden layer dimension
- num_layers : number of rnn layers
- attention_size : attention layer dimension
- num_classes : number of target labels
- learning_rate : initial learning rate
- grad_clip : gradient clipping threshold
"""
self.input_x = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_x')
self.input_y = tf.placeholder(tf.float32, shape=[None, num_classes], name='input_y')
self.seq_len = tf.placeholder(tf.int32, shape=[None], name='seq_len')
self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
self.global_step = tf.Variable(0, trainable=False, name='global_step')
# Define Forward RNN Cell
with tf.name_scope('fw_rnn'):
fw_basic_cell = tf.contrib.rnn.GRUCell(rnn_size)
# fw_basic_cell = tf.contrib.rnn.LSTMCell(rnn_size)
fw_rnn_cell = tf.contrib.rnn.MultiRNNCell([fw_basic_cell for _ in range(num_layers)])
fw_rnn_cell = tf.contrib.rnn.DropoutWrapper(fw_rnn_cell, output_keep_prob=self.keep_prob)
# Define Backward RNN Cell
with tf.name_scope('bw_rnn'):
bw_basic_cell = tf.contrib.rnn.GRUCell(rnn_size)
# bw_basic_cell = tf.contrib.rnn.LSTMCell(rnn_size)
bw_rnn_cell = tf.contrib.rnn.MultiRNNCell([bw_basic_cell for _ in range(num_layers)])
bw_rnn_cell = tf.contrib.rnn.DropoutWrapper(bw_rnn_cell, output_keep_prob=self.keep_prob)
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope('embedding'):
self.embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), trainable=True, name='W')
# self.input_x shape: (batch_size, sequence_length)
embedding_inputs = tf.nn.embedding_lookup(self.embedding, self.input_x)
with tf.name_scope('bi_rnn'):
# embedding_inputs shape: (batch_size, sequence_length, embedding_size)
# rnn_output, _ = tf.nn.dynamic_rnn(fw_rnn_cell, inputs=embedding_inputs, sequence_length=self.seq_len, dtype=tf.float32)
rnn_output, _ = tf.nn.bidirectional_dynamic_rnn(fw_rnn_cell, bw_rnn_cell, inputs=embedding_inputs, sequence_length=self.seq_len, dtype=tf.float32)
# In case of Bi-RNN, concatenate the forward and the backward RNN outputs
if isinstance(rnn_output, tuple):
rnn_output = tf.concat(rnn_output, 2)
# Attention Layer
with tf.name_scope('attention'):
input_shape = rnn_output.shape # (batch_size, sequence_length, hidden_size)
sequence_size = input_shape[1].value # the length of sequences processed in the RNN layer
hidden_size = input_shape[2].value # hidden size of the RNN layer
attention_w = tf.Variable(tf.truncated_normal([hidden_size, attention_size], stddev=0.1), name='attention_w')
attention_b = tf.Variable(tf.constant(0.1, shape=[attention_size]), name='attention_b')
attention_u = tf.Variable(tf.truncated_normal([attention_size], stddev=0.1), name='attention_u')
z_list = []
for t in range(sequence_size):
u_t = tf.tanh(tf.matmul(rnn_output[:, t, :], attention_w) + tf.reshape(attention_b, [1, -1]))
z_t = tf.matmul(u_t, tf.reshape(attention_u, [-1, 1]))
z_list.append(z_t)
# Transform to batch_size * sequence_size
attention_z = tf.concat(z_list, axis=1)
self.alpha = tf.nn.softmax(attention_z)
# Transform to batch_size * sequence_size * 1 , same rank as rnn_output
attention_output = tf.reduce_sum(rnn_output * tf.reshape(self.alpha, [-1, sequence_size, 1]), 1)
# Add dropout
with tf.name_scope('dropout'):
# attention_output shape: (batch_size, hidden_size)
self.final_output = tf.nn.dropout(attention_output, self.keep_prob)
# Fully connected layer
with tf.name_scope('output'):
fc_w = tf.Variable(tf.truncated_normal([hidden_size, num_classes], stddev=0.1), name='fc_w')
fc_b = tf.Variable(tf.zeros([num_classes]), name='fc_b')
self.logits = tf.matmul(self.final_output, fc_w) + fc_b
self.predictions = tf.argmax(self.logits, 1, name='predictions')
# Calculate cross-entropy loss
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y)
self.loss = tf.reduce_mean(cross_entropy)
# Create optimizer
with tf.name_scope('optimization'):
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients, variables = zip(*optimizer.compute_gradients(self.loss))
gradients, _ = tf.clip_by_global_norm(gradients, grad_clip)
self.train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=self.global_step)
# Calculate accuracy
with tf.name_scope('accuracy'):
correct_pred = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
if __name__ == '__main__':
model = TextRNN(vocab_size=8000, embedding_size=150, sequence_length=100, rnn_size=100, num_layers=2,
attention_size=50, num_classes=30, learning_rate=0.001, grad_clip=5.0)