-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPCA.py
110 lines (81 loc) · 3.1 KB
/
PCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader
from customize_data import CustomData
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Build a basic neural network model with one hidden layer
class NN(nn.Module):
def __init__(self,input_size,num_classes):
super(NN,self).__init__()
self.fc1 = nn.Linear(input_size,50)
self.fc2 = nn.Linear(50,num_classes)
def forward(self,x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
# load mnist dataset from torch.datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=False, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data',train=False,download=False,transform = transforms.ToTensor())
# Vectorize the data along the first dimension
train = train_dataset.data.reshape(-1, train_dataset.data.shape[1]*train_dataset.data.shape[2])
test = test_dataset.data.reshape(-1, test_dataset.data.shape[1]*test_dataset.data.shape[2])
# Dimensionality reduction using PCA
scaler = StandardScaler()
pca = PCA(n_components=100)
train = pca.fit_transform(scaler.fit_transform(train))
test = pca.transform(scaler.transform(test))
# set hyperparameters
input_size = train[0].shape[0]
num_classes = 10
learning_rate = 0.001
epochs = 10
batch_size = 64
# Load data into dataloader
train_dataset = CustomData(train, train_dataset.targets)
test_dataset = CustomData(test, test_dataset.targets)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)
# Initialize model
model = NN(input_size=input_size,num_classes=num_classes).to(device)
# Define Loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = learning_rate)
# train the model
for i in range(epochs):
for _,(data,label) in enumerate(train_loader):
data = data.reshape(data.shape[0],-1)
data = data.float().to(device)
label = label.to(device)
# forward pass
out = model(data)
loss = loss_fn(out,label)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.reshape(x.shape[0],-1)
x = x.float().to(device=device)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
model.train()
return num_correct/num_samples
print('Accuracy on training set: %2f' % (100*check_accuracy(train_loader, model)))
print('Accuracy on test set: %2f' % (100*check_accuracy(test_loader, model)))