forked from adriguerra/DL-MiniProject1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcnn_without_ws.py
124 lines (102 loc) · 5.32 KB
/
cnn_without_ws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from helper_functions import *
from torch import nn
from torch.nn import functional as F
# Number of training samples
N = 1000
# Create training and testing datasets
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(N)
train_input, test_input, train_classes, test_classes = preprocess_data(train_input,
test_input,
train_classes,
test_classes)
class CNNWWS(nn.Module):
"""Convolutional Neural Network without weight sharing."""
def __init__(self):
super().__init__()
nb_hidden = 100
self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
self.conv1_bn = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 32, kernel_size=2)
self.conv2_bn = nn.BatchNorm2d(32)
self.conv3 = nn.Conv2d(32, 64, kernel_size=2)
self.conv3_bn = nn.BatchNorm2d(64)
self.drop1 = nn.Dropout(p=0.5)
self.fc1 = nn.Linear(64, nb_hidden)
self.drop2 = nn.Dropout(p=0.5)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1_bn(self.conv1(x)), kernel_size=2))
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), kernel_size=2))
x = F.relu(self.conv3_bn(self.conv3(x)))
x = self.drop1(x)
x = F.relu(self.fc1(x.view(-1, 64)))
x = self.drop2(x)
return x
def train_model(model1, model2, train_input1, train_input2, train_target1, train_target2, train_target3,
mini_batch_size, classifier1, classifier2, classifier3, digit_scalar=1,
binary_target_scalar=1):
"""
Method to train the models of two images.
:param model1: the model that trains the first image
:param model2: the model that trains the second image
:param train_input1: the training input of our first image
:param train_input2: the training input of our second image
:param train_target1: the training target of our first image
:param train_target2: the training target of our second image
:param train_target3: the training target of the oredering
:param mini_batch_size: the batch size on which the sgd is trained
:param classifier1: the classifier of the first digit
:param classifier2: the classifier of the second digit
:param classifier3: the classifier of the main ordering
:param digit_scalar: the weight used to calibrate the loss of the digit prediction
:param binary_target_scalar: the weight used to calibrate the loss of the ordering prediction
"""
criterion = nn.CrossEntropyLoss()
eta = 1e-1
optimizer1 = torch.optim.SGD(model1.parameters(), lr=eta, momentum=0)
optimizer2 = torch.optim.SGD(model2.parameters(), lr=eta, momentum=0) # check the lectures
for e in range(25):
sum_loss = 0
for b in range(0, train_input1.size(0), mini_batch_size):
encoded_img1 = model1(train_input1.narrow(0, b, mini_batch_size))
encoded_img2 = model2(train_input2.narrow(0, b, mini_batch_size))
output_x = classifier1(encoded_img1)
output_y = classifier2(encoded_img2)
input = torch.cat([encoded_img1, encoded_img2], 1)
output_binary_target = classifier3(input)
loss_x = criterion(output_x, train_target1.narrow(0, b, mini_batch_size).long())
loss_y = criterion(output_y, train_target2.narrow(0, b, mini_batch_size).long())
loss_binary_target = criterion(output_binary_target, train_target3.narrow(0, b, mini_batch_size).long())
loss = digit_scalar * (loss_x + loss_y) + binary_target_scalar * loss_binary_target
model1.zero_grad()
model2.zero_grad()
loss.backward()
sum_loss = sum_loss + loss.item()
optimizer1.step()
optimizer2.step()
def train_without_ws(digit_scalar):
"""
Train without weight-sharing.
:param digit_scalar: the digit scalar calibrating the auxiliary loss, 0 if we don't want to use auxiliary loss
"""
print("----Train model----")
model1 = CNNWWS()
model2 = CNNWWS()
classifier1 = nn.Sequential(nn.Linear(100, 10), nn.Sigmoid())
classifier2 = nn.Sequential(nn.Linear(100, 10), nn.Sigmoid())
classifier3 = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(100 * 2, 2), nn.Sigmoid())
print("Auxiliary loss = " + str(digit_scalar))
train_model(model1, model2, train_input[0], train_input[1], train_classes[0], train_classes[1], train_target,
mini_batch_size, classifier1, classifier2, classifier3, digit_scalar)
model1.eval()
model2.eval()
classifier3.eval()
encoder1 = model1(test_input[0])
encoder2 = model2(test_input[1])
output1 = classifier1(encoder1)
output2 = classifier2(encoder2)
prediction = classifier3(torch.cat([encoder1, encoder2], 1))
print("Accuracy based on classes prediction:")
print(100-compute_error_(compare_and_predict(output1.max(1)[1], output2.max(1)[1]), test_target))
print("Accuracy based on target prediction:")
print(100-compute_error_(prediction.max(1)[1], test_target))
return compute_error_(compare_and_predict(output1.max(1)[1], output2.max(1)[1]), test_target), compute_error_(prediction.max(1)[1], test_target)