|
28 | 28 | # Network Parameters
|
29 | 29 | num_units = 32 # number of neurons for the LSTM layer.
|
30 | 30 |
|
31 |
| - run = Run() |
32 |
| - run.init(metadata={'dataset.num_classes': num_classes, |
33 |
| - 'dataset.seq_max_len': seq_max_len, |
34 |
| - 'dataset.seq_min_len': seq_min_len, |
35 |
| - 'dataset.masking_val': masking_val, |
36 |
| - 'training.learning_rate': learning_rate, |
37 |
| - 'training.training_steps': training_steps, |
38 |
| - 'training.batch_size': batch_size, |
39 |
| - 'network.num_units': num_units}, |
40 |
| - description="TensorFlow 2.0 implementation of a Recurrent Neural Network (LSTM) that performs dynamic " |
41 |
| - "computation over sequences with variable length. This example is using a toy dataset to " |
42 |
| - "classify linear sequences. The generated sequences have variable length.") |
43 |
| - run.save('dynamic_rnn.py', 'code') |
44 |
| - |
45 |
| - # ==================== |
46 |
| - # TOY DATA GENERATOR |
47 |
| - # ==================== |
48 |
| - |
49 |
| - def toy_sequence_data(): |
50 |
| - """ Generate sequence of data with dynamic length. |
51 |
| - This function generates toy samples for training: |
52 |
| - - Class 0: linear sequences (i.e. [1, 2, 3, 4, ...]) |
53 |
| - - Class 1: random sequences (i.e. [9, 3, 10, 7,...]) |
54 |
| - |
55 |
| - NOTICE: |
56 |
| - We have to pad each sequence to reach 'seq_max_len' for TensorFlow |
57 |
| - consistency (we cannot feed a numpy array with inconsistent |
58 |
| - dimensions). The dynamic calculation will then be perform and ignore |
59 |
| - the masked value (here -1). |
60 |
| - """ |
61 |
| - while True: |
62 |
| - # Set variable sequence length. |
63 |
| - seq_len = random.randint(seq_min_len, seq_max_len) |
64 |
| - rand_start = random.randint(0, max_value - seq_len) |
65 |
| - # Add a random or linear int sequence (50% prob). |
66 |
| - if random.random() < .5: |
67 |
| - # Generate a linear sequence. |
68 |
| - seq = np.arange(start=rand_start, stop=rand_start+seq_len) |
69 |
| - # Rescale values to [0., 1.]. |
70 |
| - seq = seq / max_value |
71 |
| - # Pad sequence until the maximum length for dimension consistency. |
72 |
| - # Masking value: -1. |
73 |
| - seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
74 |
| - label = 0 |
75 |
| - else: |
76 |
| - # Generate a random sequence. |
77 |
| - seq = np.random.randint(max_value, size=seq_len) |
78 |
| - # Rescale values to [0., 1.]. |
79 |
| - seq = seq / max_value |
80 |
| - # Pad sequence until the maximum length for dimension consistency. |
81 |
| - # Masking value: -1. |
82 |
| - seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
83 |
| - label = 1 |
84 |
| - yield np.array(seq, dtype=np.float32), np.array(label, dtype=np.float32) |
85 |
| - |
86 |
| - # Use tf.data API to shuffle and batch data. |
87 |
| - train_data = tf.data.Dataset.from_generator(toy_sequence_data, output_types=(tf.float32, tf.float32)) |
88 |
| - train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1) |
89 |
| - |
90 |
| - # Create LSTM Model. |
91 |
| - class LSTM(Model): |
92 |
| - # Set layers. |
93 |
| - def __init__(self): |
94 |
| - super(LSTM, self).__init__() |
95 |
| - # Define a Masking Layer with -1 as mask. |
96 |
| - self.masking = layers.Masking(mask_value=masking_val) |
97 |
| - # Define a LSTM layer to be applied over the Masking layer. |
98 |
| - # Dynamic computation will automatically be performed to ignore -1 values. |
99 |
| - self.lstm = layers.LSTM(units=num_units) |
100 |
| - # Output fully connected layer (2 classes: linear or random seq). |
101 |
| - self.out = layers.Dense(num_classes) |
102 |
| - |
103 |
| - # Set forward pass. |
104 |
| - def call(self, x, is_training=False): |
105 |
| - # A RNN Layer expects a 3-dim input (batch_size, seq_len, num_features). |
106 |
| - x = tf.reshape(x, shape=[-1, seq_max_len, 1]) |
107 |
| - # Apply Masking layer. |
108 |
| - x = self.masking(x) |
109 |
| - # Apply LSTM layer. |
110 |
| - x = self.lstm(x) |
111 |
| - # Apply output layer. |
112 |
| - x = self.out(x) |
113 |
| - if not is_training: |
114 |
| - # tf cross entropy expect logits without softmax, so only |
115 |
| - # apply softmax when not training. |
116 |
| - x = tf.nn.softmax(x) |
117 |
| - return x |
118 |
| - |
119 |
| - # Build LSTM model. |
120 |
| - lstm_net = LSTM() |
121 |
| - |
122 |
| - # Cross-Entropy Loss. |
123 |
| - # Note that this will apply 'softmax' to the logits. |
124 |
| - def cross_entropy_loss(x, y): |
125 |
| - # Convert labels to int 64 for tf cross-entropy function. |
126 |
| - y = tf.cast(y, tf.int64) |
127 |
| - # Apply softmax to logits and compute cross-entropy. |
128 |
| - loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) |
129 |
| - # Average loss across the batch. |
130 |
| - return tf.reduce_mean(loss) |
131 |
| - |
132 |
| - # Accuracy metric. |
133 |
| - def accuracy(y_pred, y_true): |
134 |
| - # Predicted class is the index of highest score in prediction vector (i.e. argmax). |
135 |
| - correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) |
136 |
| - return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) |
137 |
| - |
138 |
| - # Adam optimizer. |
139 |
| - optimizer = tf.optimizers.Adam(learning_rate) |
140 |
| - |
141 |
| - # Optimization process. |
142 |
| - def run_optimization(x, y): |
143 |
| - # Wrap computation inside a GradientTape for automatic differentiation. |
144 |
| - with tf.GradientTape() as g: |
145 |
| - # Forward pass. |
146 |
| - pred = lstm_net(x, is_training=True) |
147 |
| - # Compute loss. |
148 |
| - loss = cross_entropy_loss(pred, y) |
149 |
| - |
150 |
| - # Variables to update, i.e. trainable variables. |
151 |
| - trainable_variables = lstm_net.trainable_variables |
152 |
| - |
153 |
| - # Compute gradients. |
154 |
| - gradients = g.gradient(loss, trainable_variables) |
155 |
| - |
156 |
| - # Update weights following gradients. |
157 |
| - optimizer.apply_gradients(zip(gradients, trainable_variables)) |
158 |
| - |
159 |
| - # Run training for the given number of steps. |
160 |
| - for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): |
161 |
| - # Run the optimization to update W and b values. |
162 |
| - run_optimization(batch_x, batch_y) |
163 |
| - |
164 |
| - pred = lstm_net(batch_x, is_training=True) |
165 |
| - loss = cross_entropy_loss(pred, batch_y) |
166 |
| - acc = accuracy(pred, batch_y) |
167 |
| - run.log_metrics({'loss': float(loss), 'accuracy': float(acc)}) |
168 |
| - |
169 |
| - run.update_metadata({'loss': float(loss), 'accuracy': float(acc)}) |
170 |
| - run.close() |
| 31 | + with Run() as run: |
| 32 | + run.init(metadata={'dataset.num_classes': num_classes, |
| 33 | + 'dataset.seq_max_len': seq_max_len, |
| 34 | + 'dataset.seq_min_len': seq_min_len, |
| 35 | + 'dataset.masking_val': masking_val, |
| 36 | + 'training.learning_rate': learning_rate, |
| 37 | + 'training.training_steps': training_steps, |
| 38 | + 'training.batch_size': batch_size, |
| 39 | + 'network.num_units': num_units}, |
| 40 | + description="TensorFlow 2.0 implementation of a Recurrent Neural Network (LSTM) that performs dynamic " |
| 41 | + "computation over sequences with variable length. This example is using a toy dataset to " |
| 42 | + "classify linear sequences. The generated sequences have variable length.") |
| 43 | + run.save('dynamic_rnn.py', 'code') |
| 44 | + |
| 45 | + # ==================== |
| 46 | + # TOY DATA GENERATOR |
| 47 | + # ==================== |
| 48 | + |
| 49 | + def toy_sequence_data(): |
| 50 | + """ Generate sequence of data with dynamic length. |
| 51 | + This function generates toy samples for training: |
| 52 | + - Class 0: linear sequences (i.e. [1, 2, 3, 4, ...]) |
| 53 | + - Class 1: random sequences (i.e. [9, 3, 10, 7,...]) |
| 54 | +
|
| 55 | + NOTICE: |
| 56 | + We have to pad each sequence to reach 'seq_max_len' for TensorFlow |
| 57 | + consistency (we cannot feed a numpy array with inconsistent |
| 58 | + dimensions). The dynamic calculation will then be perform and ignore |
| 59 | + the masked value (here -1). |
| 60 | + """ |
| 61 | + while True: |
| 62 | + # Set variable sequence length. |
| 63 | + seq_len = random.randint(seq_min_len, seq_max_len) |
| 64 | + rand_start = random.randint(0, max_value - seq_len) |
| 65 | + # Add a random or linear int sequence (50% prob). |
| 66 | + if random.random() < .5: |
| 67 | + # Generate a linear sequence. |
| 68 | + seq = np.arange(start=rand_start, stop=rand_start+seq_len) |
| 69 | + # Rescale values to [0., 1.]. |
| 70 | + seq = seq / max_value |
| 71 | + # Pad sequence until the maximum length for dimension consistency. |
| 72 | + # Masking value: -1. |
| 73 | + seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
| 74 | + label = 0 |
| 75 | + else: |
| 76 | + # Generate a random sequence. |
| 77 | + seq = np.random.randint(max_value, size=seq_len) |
| 78 | + # Rescale values to [0., 1.]. |
| 79 | + seq = seq / max_value |
| 80 | + # Pad sequence until the maximum length for dimension consistency. |
| 81 | + # Masking value: -1. |
| 82 | + seq = np.pad(seq, mode='constant', pad_width=(0, seq_max_len-seq_len), constant_values=masking_val) |
| 83 | + label = 1 |
| 84 | + yield np.array(seq, dtype=np.float32), np.array(label, dtype=np.float32) |
| 85 | + |
| 86 | + # Use tf.data API to shuffle and batch data. |
| 87 | + train_data = tf.data.Dataset.from_generator(toy_sequence_data, output_types=(tf.float32, tf.float32)) |
| 88 | + train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1) |
| 89 | + |
| 90 | + # Create LSTM Model. |
| 91 | + class LSTM(Model): |
| 92 | + # Set layers. |
| 93 | + def __init__(self): |
| 94 | + super(LSTM, self).__init__() |
| 95 | + # Define a Masking Layer with -1 as mask. |
| 96 | + self.masking = layers.Masking(mask_value=masking_val) |
| 97 | + # Define a LSTM layer to be applied over the Masking layer. |
| 98 | + # Dynamic computation will automatically be performed to ignore -1 values. |
| 99 | + self.lstm = layers.LSTM(units=num_units) |
| 100 | + # Output fully connected layer (2 classes: linear or random seq). |
| 101 | + self.out = layers.Dense(num_classes) |
| 102 | + |
| 103 | + # Set forward pass. |
| 104 | + def call(self, x, is_training=False): |
| 105 | + # A RNN Layer expects a 3-dim input (batch_size, seq_len, num_features). |
| 106 | + x = tf.reshape(x, shape=[-1, seq_max_len, 1]) |
| 107 | + # Apply Masking layer. |
| 108 | + x = self.masking(x) |
| 109 | + # Apply LSTM layer. |
| 110 | + x = self.lstm(x) |
| 111 | + # Apply output layer. |
| 112 | + x = self.out(x) |
| 113 | + if not is_training: |
| 114 | + # tf cross entropy expect logits without softmax, so only |
| 115 | + # apply softmax when not training. |
| 116 | + x = tf.nn.softmax(x) |
| 117 | + return x |
| 118 | + |
| 119 | + # Build LSTM model. |
| 120 | + lstm_net = LSTM() |
| 121 | + |
| 122 | + # Cross-Entropy Loss. |
| 123 | + # Note that this will apply 'softmax' to the logits. |
| 124 | + def cross_entropy_loss(x, y): |
| 125 | + # Convert labels to int 64 for tf cross-entropy function. |
| 126 | + y = tf.cast(y, tf.int64) |
| 127 | + # Apply softmax to logits and compute cross-entropy. |
| 128 | + loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=x) |
| 129 | + # Average loss across the batch. |
| 130 | + return tf.reduce_mean(loss) |
| 131 | + |
| 132 | + # Accuracy metric. |
| 133 | + def accuracy(y_pred, y_true): |
| 134 | + # Predicted class is the index of highest score in prediction vector (i.e. argmax). |
| 135 | + correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64)) |
| 136 | + return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1) |
| 137 | + |
| 138 | + # Adam optimizer. |
| 139 | + optimizer = tf.optimizers.Adam(learning_rate) |
| 140 | + |
| 141 | + # Optimization process. |
| 142 | + def run_optimization(x, y): |
| 143 | + # Wrap computation inside a GradientTape for automatic differentiation. |
| 144 | + with tf.GradientTape() as g: |
| 145 | + # Forward pass. |
| 146 | + pred = lstm_net(x, is_training=True) |
| 147 | + # Compute loss. |
| 148 | + loss = cross_entropy_loss(pred, y) |
| 149 | + |
| 150 | + # Variables to update, i.e. trainable variables. |
| 151 | + trainable_variables = lstm_net.trainable_variables |
| 152 | + |
| 153 | + # Compute gradients. |
| 154 | + gradients = g.gradient(loss, trainable_variables) |
| 155 | + |
| 156 | + # Update weights following gradients. |
| 157 | + optimizer.apply_gradients(zip(gradients, trainable_variables)) |
| 158 | + |
| 159 | + # Run training for the given number of steps. |
| 160 | + for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1): |
| 161 | + # Run the optimization to update W and b values. |
| 162 | + run_optimization(batch_x, batch_y) |
| 163 | + |
| 164 | + pred = lstm_net(batch_x, is_training=True) |
| 165 | + loss = cross_entropy_loss(pred, batch_y) |
| 166 | + acc = accuracy(pred, batch_y) |
| 167 | + run.log_metrics({'loss': float(loss), 'accuracy': float(acc)}) |
| 168 | + |
| 169 | + run.update_metadata({'loss': float(loss), 'accuracy': float(acc)}) |
| 170 | + run.close() |
0 commit comments