-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrawTensorFlow.py
More file actions
176 lines (154 loc) · 7.5 KB
/
Copy pathrawTensorFlow.py
File metadata and controls
176 lines (154 loc) · 7.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import tensorflow as tf
import numpy as np
import cPickle
class Model():
def __init__(self, max_output_length,max_input_length,embed_size, embeddings, hidden_size, n_classes, batch_size, lr):
self.hidden_size = hidden_size
self.max_output_length = max_output_length
self.max_input_length = max_input_length
self.embed_size = embed_size
self.pretrained_embeddings = embeddings
self.n_classes = n_classes
self.batch_size = batch_size
self.lr = lr
self.embeddings_var = tf.Variable(self.pretrained_embeddings)
def create_placeholders(self):
self.input_placeholder = tf.placeholder(tf.int32, (None, self.max_input_length))
self.labels_placeholder = tf.placeholder(tf.int32, (None, self.max_output_length))
self.mask_placeholder = tf.placeholder(tf.bool, (None, self.max_output_length))
self.dropout_placeholder = tf.placeholder(tf.float32)
def create_feed_dict(self,inputs_batch, mask_batch, dropout, labels):
feed_dict = {
self.input_placeholder : inputs_batch,
self.mask_placeholder : mask_batch,
self.dropout_placeholder: dropout
}
if labels is not None:
feed_dict[self.labels_placeholder] = labels
return feed_dict
def add_embedding(self):
lookup = tf.nn.embedding_lookup(self.embeddings_var, self.input_placeholder)
embeddings = tf.reshape(lookup, (-1, self.max_input_length, self.embed_size))
### END YOUR CODE
return embeddings
def output_embedding(self):
lookup = tf.nn.embedding_lookup(self.embeddings_var, self.labels_placeholder)
embeddings = tf.reshape(lookup, (-1, self.max_output_length, self.embed_size))
### END YOUR CODE
return embeddings
def add_prediction_op(self):
x = self.add_embedding()
dropout_rate = self.dropout_placeholder
y = self.output_embedding()
preds = [] # Predicted output at each timestep should go here!
EncoderCell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
DecoderCell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
# Define U and b2 as variables.
# Initialize state as vector of zeros.
U = tf.get_variable('U', (self.hidden_size, self.n_classes), tf.float32, tf.contrib.layers.xavier_initializer())
b_2 = tf.get_variable('b_2', (self.n_classes,), tf.float32, tf.constant_initializer(0.0))
h_c = tf.zeros((self.batch_size,self.hidden_size), tf.float32)
h_m = tf.zeros((self.batch_size,self.hidden_size), tf.float32)
h = (h_c, h_m)
for time_step in range(self.max_input_length):
if time_step > 0: tf.get_variable_scope().reuse_variables()
o_t, h = EncoderCell(x[:,time_step,:], h)
decoderH = h
for time_step in range(self.max_output_length):
if time_step > 0: tf.get_variable_scope().reuse_variables()
if time_step == 0:
inputs = tf.zeros((self.batch_size, self.embed_size))
else:
inputs = y[:,time_step-1,:]
o_t, decoderH = DecoderCell(inputs, decoderH)
o_drop_t = tf.nn.dropout(o_t, dropout_rate)
y_t = tf.matmul(o_drop_t, U)+ b_2
preds.append(y_t)
testPreds = []
decoderH = h
for time_step in range(self.max_output_length):
if time_step > 0: tf.get_variable_scope().reuse_variables()
if time_step == 0:
inputs = tf.zeros((self.batch_size, self.embed_size))
else:
inputs = tf.nn.embedding_lookup(self.embeddings_var,tf.argmax(testPreds[-1], 1))
o_t, decoderH = DecoderCell(inputs, decoderH)
o_drop_t = tf.nn.dropout(o_t, dropout_rate)
y_t = tf.matmul(o_drop_t, U)+ b_2
testPreds.append(y_t)
# Make sure to reshape @preds here.
preds = (tf.pack(preds,1), tf.pack(testPreds, 1))
### YOUR CODE HERE (~2-4 lines)
### END YOUR CODE
#assert preds.get_shape().as_list() == [None, self.max_output_length, self.n_classes], "predictions are not of the right shape. Expected {}, got {}".format([None, self.max_output_length, self.n_classes], preds.get_shape().as_list())
return preds
def convertToLabels(self, testPreds):
print testPreds
words = tf.argmax(testPreds, 2)
return words
def add_loss_op(self, preds):
"""Adds Ops for the loss function to the computational graph.
TODO: Compute averaged cross entropy loss for the predictions.
Importantly, you must ignore the loss for any masked tokens.
Hint: You might find tf.boolean_mask useful to mask the losses on masked tokens.
Hint: You can use tf.nn.sparse_softmax_cross_entropy_with_logits to simplify your
implementation. You might find tf.reduce_mean useful.
Args:
pred: A tensor of shape (batch_size, max_length, n_classes) containing the output of the neural
network before the softmax layer.
Returns:
loss: A 0-d tensor (scalar)
"""
### YOUR CODE HERE (~2-4 lines)
sftmx_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(preds, self.labels_placeholder)
loss = tf.reduce_mean(tf.boolean_mask(sftmx_ce,self.mask_placeholder))
### END YOUR CODE
return loss
def add_training_op(self, loss):
"""Sets up the training Ops.
Creates an optimizer and applies the gradients to all trainable variables.
The Op returned by this function is what must be passed to the
`sess.run()` call to cause the model to train. See
https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer
for more information.
Use tf.train.AdamOptimizer for this model.
Calling optimizer.minimize() will return a train_op object.
Args:
loss: Loss tensor, from cross_entropy_loss.
Returns:
train_op: The Op for training.
"""
### YOUR CODE HERE (~1-2 lines)
train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
### END YOUR CODE
return train_op
def main():
X = np.load('X')
Y = np.load('Y')
mask = np.load('outputMask')
embeddings = np.load('embeddings').astype(np.float32)
model = Model(31,31,300, embeddings, 128, 30004, 10, 0.001)
model.create_placeholders()
preds = model.add_prediction_op()
loss = model.add_loss_op(preds[0])
training_op = model.add_training_op(loss)
init = tf.global_variables_initializer()
words = model.convertToLabels(preds[1])
batch_size = 10
count = 0
with tf.Session() as sess:
sess.run(init)
for i in range(int((X.shape[0]-1000)/batch_size)):
feed = model.create_feed_dict(X[batch_size*i:batch_size*(i+1), :],mask[batch_size*i:batch_size*(i+1), :],1,Y[batch_size*i:batch_size*(i+1), :])
newLoss, train = sess.run([loss,training_op], feed_dict = feed)
print newLoss
print count
count += 1
if count == 1000:
break
labelsDict = cPickle.load(open('labelsDict'))
labelsDict = {labelsDict[key]:key for key in labelsDict}
for i in range(batch_size*count+2, batch_size*count + 10):
sent = sess.run(words, feed_dict = model.create_feed_dict(X[batch_size*i:batch_size*(i+1), :],mask[batch_size*i:batch_size*(i+1), :],1,None))
print[[labelsDict[key] for key in arr] for arr in sent]
main()