Skip to content

Commit f9ea51b

Browse files
committed
multiple models
1 parent 9f2a02b commit f9ea51b

File tree

6 files changed

+497
-17
lines changed

6 files changed

+497
-17
lines changed

config.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ def __init__(self):
1717
self.use_cuda = False
1818

1919
# Global dimension params
20-
self.embedding_dim = 50
20+
self.embedding_dim = 200
2121
self.hidden_size = self.embedding_dim
2222
self.context_len = 600
2323
self.question_len = 30
2424

2525
# Training params
2626
self.num_epochs = 10
27-
self.learning_rate = 0.00001
27+
self.learning_rate = 0.001
2828
self.batch_size = 32
2929
self.l2_norm = 0.1
3030
self.max_grad_norm = 5
@@ -52,6 +52,7 @@ def __init__(self):
5252
# Logs
5353
self.print_every = 5
5454
self.save_every = 100
55+
self.evaluate_every = 20
5556

5657
# Vectors
5758
self.glove_base_url = "http://nlp.stanford.edu/data/"

data_util/vocab.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_glove(glove_path, glove_dim):
4545
"""
4646

4747
print("Loading GLoVE vectors from file: %s" % glove_path)
48-
vocab_size = int(4e5) # this is the vocab size of the corpus we've downloaded
48+
vocab_size = int(4e5) # this is the vocab size of the corpus we've downloaded
4949

5050
emb_matrix = np.zeros((vocab_size + len(_START_VOCAB), glove_dim))
5151
word2id = {}
@@ -70,7 +70,9 @@ def get_glove(glove_path, glove_dim):
7070
word = line[0]
7171
vector = list(map(float, line[1:]))
7272
if glove_dim != len(vector):
73-
raise Exception("You set --glove_path=%s but --embedding_size=%i. If you set --glove_path yourself then make sure that --embedding_size matches!" % (glove_path, glove_dim))
73+
raise Exception(
74+
"You set --glove_path=%s but --embedding_size=%i. If you set --glove_path yourself then make sure that --embedding_size matches!" % (
75+
glove_path, glove_dim))
7476
emb_matrix[idx, :] = vector
7577
word2id[word] = idx
7678
id2word[idx] = word

networks.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,14 @@ def __init__(self, emb_matrix,
3333
self.batch_size = batch_size
3434
self.hidden_size = hidden_size
3535

36-
self.encoder = nn.GRU(input_size=hidden_size, hidden_size=hidden_size, bidirectional=bidirectional,
37-
num_layers=num_layers,
38-
batch_first=True)
36+
# self.encoder = nn.GRU(input_size=hidden_size, hidden_size=hidden_size, bidirectional=bidirectional,
37+
# num_layers=num_layers,
38+
# batch_first=True)
39+
40+
self.encoder = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, bidirectional=bidirectional,
41+
num_layers=num_layers,
42+
batch_first=True)
43+
3944
self.hidden = self.init_hidden()
4045
self.sentinel = nn.Parameter(torch.rand(hidden_size, ))
4146

@@ -70,7 +75,10 @@ def forward(self, inputs, mask):
7075
return output
7176

7277
def init_hidden(self):
73-
return torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size)
78+
# return torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size)
79+
80+
return (torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size),
81+
torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size))
7482

7583

7684
# TODO : Takes input and produces out of same dimension our reference implementation
@@ -86,9 +94,14 @@ def __init__(self, dropout_rate,
8694
self.batch_size = batch_size
8795
self.hidden_size = hidden_size
8896

89-
self.fusion_bilstm = nn.GRU(num_layers=num_layers, input_size=hidden_size * 3, hidden_size=hidden_size,
90-
batch_first=True,
91-
bidirectional=True)
97+
# self.fusion_bilstm = nn.GRU(num_layers=num_layers, input_size=hidden_size * 3, hidden_size=hidden_size,
98+
# batch_first=True,
99+
# bidirectional=True)
100+
101+
self.fusion_bilstm = nn.LSTM(num_layers=num_layers, input_size=hidden_size * 3, hidden_size=hidden_size,
102+
batch_first=True,
103+
bidirectional=True)
104+
92105
self.hidden = self.init_hidden()
93106
self.dropout = nn.Dropout(p=dropout_rate)
94107

@@ -122,7 +135,10 @@ def forward(self, inputs, mask):
122135
return output
123136

124137
def init_hidden(self):
125-
return torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size)
138+
# return torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size)
139+
140+
return (torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size),
141+
torch.zeros(self.num_directions * self.num_layers, self.batch_size, self.hidden_size))
126142

127143

128144
class DynamicDecoder(nn.Module):
@@ -194,10 +210,10 @@ def forward(self, U, d_mask, target_span):
194210

195211
# Get hidden state
196212
# TODO : There could be problem with the dimension
197-
h_i = self.gru(u_cat.unsqueeze(1), h_i)[1]
213+
output, h_i = self.gru(u_cat.unsqueeze(1), h_i)
198214

199215
# Get new start estimate and start loss
200-
s_i, _, start_loss_i = self.start_hmn(h_i, U, None, s_i, u_cat, None, s_target)
216+
s_i, _, start_loss_i = self.start_hmn(output, U, None, s_i, u_cat, None, s_target)
201217
# s_i, start_loss_i = self.start_hmn(h_i, U, u_cat, s_target)
202218

203219
# Update embedding at start estimate
@@ -207,7 +223,7 @@ def forward(self, U, d_mask, target_span):
207223
u_cat = torch.cat((u_s_i, u_e_i), 1) # batch_size x 4l
208224

209225
# Get new end estimate and end loss
210-
e_i, _, end_loss_i = self.end_hmn(h_i, U, None, e_i, u_cat, None, e_target)
226+
e_i, _, end_loss_i = self.end_hmn(output, U, None, e_i, u_cat, None, e_target)
211227
# e_i, end_loss_i = self.end_hmn(h_i, U, u_cat, e_target)
212228

213229
# Update cumulative loss if computing loss
@@ -220,6 +236,7 @@ def forward(self, U, d_mask, target_span):
220236
loss = cumulative_loss / self.max_dec_steps
221237
return loss, s_i, e_i
222238

239+
223240
class CoattentionNetwork(nn.Module):
224241
def __init__(self, device,
225242
hidden_size,

0 commit comments

Comments
 (0)