-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpractical_2.py
More file actions
146 lines (119 loc) · 4.92 KB
/
Copy pathpractical_2.py
File metadata and controls
146 lines (119 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import torch
from torch import Tensor
import practical_prologue as prologue
def nearest_classification(train_input, train_target, x):
"""
Get a training set and a test sample and return the label
of the training point closest to the test
Parameters
----------
train_input : 2d float tensor
dimensions n x d
containing training vectors
train_target : 1d long tensor
dimension n
containing training labels
x : 1d float tensor
dimension d
contains test vector
Returns
-------
y : long tensor
the class of the train sample closest to x for the L2 norm
"""
L2 = torch.sum(torch.pow((train_input - x), 2), 1)
sorted_norm, indices = torch.sort(L2)
y_pred = train_target[indices[0]]
return y_pred
def compute_nb_errors(train_input, train_target, test_input, test_target,
mean=None, proj=None):
"""
Take vectors train_input and test_input, apply operator proj (if it is
not None) to both and return the number of classification errors using
the 1-nearest-neighbour rule on the resulting data
Parameters
----------
train_input : 2d float tensor
dimension n x d containing train vectors
train_target : 1d long tensor
dimension n containing train labels
test_input : 2d float tensor
dimension m x d containing test vectors
test_target : 1d long tensor
dimension m containing test labels
mean : None or 1d float tensor
1d float tensor dimension d
proj : 2d float tensor
dim cxd
used as a basis to reconstruct the input data
Returns
-------
err : number of classification errors
"""
if mean is not None:
train_input = train_input - mean
test_input = test_input - mean
if proj is not None:
train_input = torch.mm(train_input, proj.t())
test_input = torch.mm(test_input, proj.t())
y_pred = torch.empty(test_input.shape[0])
for i, x in enumerate(test_input):
y_pred[i] = nearest_classification(train_input, train_target, x)
# count number of incorrect classifications by comparing to test_target
incorrect = (y_pred != test_target).sum()
return incorrect
def PCA(x):
"""
Take a 2D float tensor and return the mean and the PCA basis ranked
in decreasing order of eigenvalues
Parameters
----------
x : 2d float tensor
dimension n x d
Returns
-------
mean : 1d vector dimension d
basis : 2d tensor dxd
mean vector and PCA basis ranked in decreasing order
of eigenvalues
"""
mean = torch.mean(train_input, 0)
cov = (1 / (x.size()[1]) -1 ) * torch.mm((x-mean).t(), (x-mean))
# get PCA basis from eigendecomposition
eigvals, eigvecs = torch.eig(cov, eigenvectors=True)
# keep real data -is this ok ?
eigvals = eigvals[:,0]
# sorted in decreasing order of eigenvalues
sorted_eigvals, indices = torch.sort(torch.abs(eigvals), descending=True)
basis = eigvecs[indices]
return mean, basis
if __name__ == "__main__":
# FIRST MNIST
train_input, train_target, test_input, test_target = prologue.load_data()
# project data on random 100d subspace
mean = torch.mean(train_input, 0)
random_basis = torch.empty((100, train_input.shape[1])).normal_()
n_incorrect_random = compute_nb_errors(train_input, train_target, test_input,
test_target, mean=mean, proj=random_basis)
print('n incorrect for random projection', n_incorrect_random)
for n in [3, 10, 50, 100]:
mean, basis = PCA(train_input)
basis = basis[:n]
n_incorrect_pca_n = compute_nb_errors(train_input, train_target, test_input,
test_target, mean=mean, proj=basis)
print('n incorrect for pca with {} dim'.format(n), n_incorrect_pca_n)
# this is suspicious. probably did something wrong. shouldn't get so many wrong
# THEN CIFAR
train_input, train_target, test_input, test_target = prologue.load_data(cifar=True)
# project data on random 100d subspace
mean = torch.mean(train_input, 0)
random_basis = torch.empty((100, train_input.shape[1])).normal_()
n_incorrect_random = compute_nb_errors(train_input, train_target, test_input,
test_target, mean=mean, proj=random_basis)
print('n incorrect for random projection', n_incorrect_random)
for n in [3, 10, 50, 100]:
mean, basis = PCA(train_input)
basis = basis[:n]
n_incorrect_pca_n = compute_nb_errors(train_input, train_target, test_input,
test_target, mean=mean, proj=basis)
print('n incorrect for pca with {} dim'.format(n), n_incorrect_pca_n)