-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathsiamese.py
160 lines (133 loc) · 7.11 KB
/
siamese.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# -*- coding: utf-8 -*-
"""
Created on Thu May 24 00:27:33 2018
@author: mwahdan
"""
from tensorflow.python.keras import layers
from tensorflow.python.keras import Input
from tensorflow.python.keras.models import Model, model_from_json
from tensorflow.python.keras.optimizers import Adadelta
from tensorflow.python.keras.callbacks import EarlyStopping
import tensorflow.python.keras.backend as K
import matplotlib.pyplot as plt
from metrics import pearson_correlation
import numpy as np
class SiameseModel:
def __init__(self, use_cudnn_lstm=True, plot_model_architecture=False):
n_hidden = 50
input_dim = 300
# unit_forget_bias: Boolean. If True, add 1 to the bias of the forget gate at initialization. Setting it to true will also force bias_initializer="zeros". This is recommended in Jozefowicz et al.
# he_normal: Gaussian initialization scaled by fan_in (He et al., 2014)
if use_cudnn_lstm:
# Use CuDNNLSTM instead of LSTM, because it is faster
lstm = layers.CuDNNLSTM(n_hidden, unit_forget_bias=True,
kernel_initializer='he_normal',
kernel_regularizer='l2',
name='lstm_layer')
else:
lstm = layers.LSTM(n_hidden, unit_forget_bias=True,
kernel_initializer='he_normal',
kernel_regularizer='l2',
name='lstm_layer')
# Building the left branch of the model: inputs are variable-length sequences of vectors of size 128.
left_input = Input(shape=(None, input_dim), name='input_1')
# left_masked_input = layers.Masking(mask_value=0)(left_input)
left_output = lstm(left_input)
# Building the right branch of the model: when you call an existing layer instance, you reuse its weights.
right_input = Input(shape=(None, input_dim), name='input_2')
# right_masked_input = layers.Masking(mask_value=0)(right_input)
right_output = lstm(right_input)
# Builds the classifier on top
l1_norm = lambda x: 1 - K.abs(x[0] - x[1])
merged = layers.Lambda(function=l1_norm, output_shape=lambda x: x[0],
name='L1_distance')([left_output, right_output])
predictions = layers.Dense(1, activation='sigmoid', name='Similarity_layer')(merged)
# Instantiating and training the model: when you train such a model, the weights of the LSTM layer are updated based on both inputs.
self.model = Model([left_input, right_input], predictions)
self.__compile()
print(self.model.summary())
if plot_model_architecture:
from tensorflow.python.keras.utils import plot_model
plot_model(self.model, to_file='siamese_architecture.png')
def __compile(self):
optimizer = Adadelta() # gradient clipping is not there in Adadelta implementation in keras
# optimizer = 'adam'
self.model.compile(loss = 'mse', optimizer = optimizer, metrics=[pearson_correlation])
def fit(self, left_data, right_data, targets, validation_data, epochs=5, batch_size=128):
# The paper employ early stopping based on a validation, but they didn't mention parameters.
early_stopping_monitor = EarlyStopping(monitor = 'val_pearson_correlation', mode='max', patience = 20)
# callbacks = [early_stopping_monitor]
callbacks = []
history = self.model.fit([left_data, right_data], targets, validation_data=validation_data,
epochs = epochs, batch_size = batch_size#)
, callbacks = callbacks)
self.visualize_metric(history.history, 'loss')
self.visualize_metric(history.history, 'pearson_correlation')
self.load_activation_model()
def visualize_metric(self, history_dic, metric_name):
plt.plot(history_dic[metric_name])
legend = ['train']
if 'val_' + metric_name in history_dic:
plt.plot(history_dic['val_' + metric_name])
legend.append('test')
plt.title('model ' + metric_name)
plt.ylabel(metric_name)
plt.xlabel('epoch')
plt.legend(legend, loc='upper left')
plt.show()
def predict(self, left_data, right_data):
return self.model.predict([left_data, right_data])
def evaluate(self, left_data, right_data, targets, batch_size=128):
return self.model.evaluate([left_data, right_data], targets, batch_size=batch_size)
def load_activation_model(self):
self.activation_model = Model(inputs=self.model.input[0], outputs=self.model.get_layer('lstm_layer').output)
def visualize_activation(self, data):
activations = self.activation_model.predict(data)
plt.figure(figsize=(10,100), dpi=80)
plt.imshow(activations, cmap='Blues')
plt.grid()
plt.xticks(ticks=range(0, 50))
plt.yticks(ticks=range(0,data.shape[0]))
plt.show()
def visualize_specific_activation(self, data, dimension_idx):
activations = self.activation_model.predict(data)
if dimension_idx >= activations.shape[1]:
raise ValueError('dimension_idx must be less than %d' % activations.shape[1])
fig = plt.figure(figsize=(10,1), dpi=80)
ax = fig.add_subplot(111)
plt.title('dimension_idx = %d' % dimension_idx)
weights = activations[:, dimension_idx]
plt.yticks(ticks=[0, 1])
plt.plot(weights, np.zeros_like(weights), 'o')
for i, txt in enumerate(weights):
ax.annotate((i + 1), (weights[i], 0))
plt.show()
def save(self, model_folder='./model/'):
# serialize model to JSON
model_json = self.model.to_json()
with open(model_folder + 'model.json', 'w') as json_file:
json_file.write(model_json)
# serialize weights to HDF5
self.model.save_weights(model_folder + 'model.h5')
print('Saved model to disk')
def save_pretrained_weights(self, model_wieghts_path='./model/pretrained_weights.h5'):
self.model.save_weights(model_wieghts_path)
print('Saved pretrained weights to disk')
def load(self, model_folder='./model/'):
# load json and create model
json_file = open(model_folder + 'model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(model_folder + 'model.h5')
print('Loaded model from disk')
self.model = loaded_model
# loaded model should be compiled
self.__compile()
self.load_activation_model()
def load_pretrained_weights(self, model_wieghts_path='./model/pretrained_weights.h5'):
# load weights into new model
self.model.load_weights(model_wieghts_path)
print('Loaded pretrained weights from disk')
self.__compile()