-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils_gradientGP.py
137 lines (98 loc) · 4.52 KB
/
utils_gradientGP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import numpy as np
from tqdm import tqdm
def gradient_descent_dropout(x, y, p1, p2, tau, ep=0.0001, max_iter=1000, alpha = 0.0001, decrease_lr = False):
res = []
converged = False
iter = 0
N = x.shape[0] # number of samples
Q = x.shape[1]
D = y.shape[1]
K = 100
# initial weight matrices
M1 = np.random.normal(0., 1.,(Q, K))
M2 = np.random.normal(0., 1., (K, D))
m = np.random.normal(0., 1., (1,K))
L_GP_MC = 0
tau = p1/(2*0.01*N)
for it in tqdm(range(max_iter), desc="main loop"):
z1v = np.zeros((Q,N))
z2v = np.zeros((K,N))
for i in range(N):
z1v[:,i] = np.random.binomial(size=Q, n=1, p=p1)
z2v[:,i] = np.random.binomial(size=K, n=1, p=p2)
grad_M1 = np.zeros((Q,K))
grad_M2 = np.zeros((K,D))
grad_m = np.zeros((1,K))
L_tmp = L_GP_MC
L_GP_MC = - p1*np.linalg.norm(M1)**2 - p2*np.linalg.norm(M2)**2 - np.linalg.norm(m)**2
for i in range(N):
#Bernoulli vectors for dropouts
z1 = np.diag(z1v[:,i])
z2 = np.diag(z2v[:,i])
# Computing prediction for the x
y_pred = (1/K)**(1/2)*np.matmul(np.maximum(0,(np.matmul(x[i],np.matmul(z1,M1))+m)),np.matmul(z2,M2))
ynew = y_pred - y[i]
L_GP_MC -= tau*np.linalg.norm(ynew)**2
partial_Y_tild = 2*(y_pred - y[i])*tau
z1 = np.diag(z1v[:,i])
z2 = np.diag(z2v[:,i])
# Computing Gradient for each parameters
grad_M2_tmp = np.dot(np.transpose(np.maximum(0, np.dot(np.matrix(x[i]), np.dot(z1, M1)) + m)), np.matrix(partial_Y_tild))
grad_M2 += np.dot(z2, grad_M2_tmp)
grad_m1 = (np.dot(np.matrix(x[i]), np.dot(z1, M1)) + m > 0)
grad_m2 = (np.dot(np.matrix(partial_Y_tild), np.transpose(np.dot(z2, M2))))
grad_m += np.multiply(grad_m1, grad_m2)
grad_M11 = np.transpose(np.matrix(x[i]))
grad_M12 = np.multiply(np.dot(np.matrix(partial_Y_tild), np.transpose(np.dot(z2, M2))), (np.dot(np.matrix(x[i]), np.dot(z1, M1)) + m > 0))
grad_M1 += np.dot(z1, np.dot(grad_M11, grad_M12))
grad_M1 += p1 * M1
grad_M2 += p2 * M2
grad_m += m
# Using decreasing step
if(decrease_lr):
alpha_tmp = alpha/(1.+(0.0001 * iter))**(0.25)
else:
alpha_tmp = alpha
# update M
M1 -= alpha_tmp * grad_M1
M2 -= alpha_tmp * grad_M2
m -= alpha_tmp * grad_m
res.append(L_GP_MC)
if(np.abs(L_tmp - L_GP_MC) < ep):
print("break")
return res, M1, M2, m
return res, M1, M2, m
class Gaussian_Process:
def __init__(self, p1=0.8, p2=0.9, hidden_units=100):
self.p1=p1
self.p2=p2
self.K=hidden_units
self.M1 = 0
self.M2 = 0
self.m = 0
def fit(self, X_train, Y_train, p1=0.8, p2=0.9, tau=0.1, ep=0.0001, max_iter=1000, alpha = 0.0005, verbose=False):
history_loss, self.M1, self.M2, self.m = gradient_descent_dropout(X_train, Y_train, p1, p2, 0.1, ep=1e-7, max_iter=max_iter, alpha = alpha)
return history_loss
def predict(self, X_test, p1=0.8, p2=0.9):
Q = self.M1.shape[0]
K = self.K
N = X_test.shape[0] # number of samples
if(X_test.shape[1]!= Q):
print("ERREUR DE DIM")
Y_predict = []
for i in range(N):
z1 = np.diag(np.random.binomial(size=Q, n=1, p=p1))
z2 = np.diag(np.random.binomial(size=K, n=1, p=p2))
# Computing prediction for the x
y_pred = (1/K)**(1/2)*np.matmul(np.maximum(0,(np.matmul(X_test[i],np.matmul(z1,self.M1))+self.m)),
np.matmul(z2,self.M2))
Y_predict.append(y_pred)
return Y_predict
def accuracy(self, X_test, Y_test, p1 = 0.8, p2=0.9):
Y_predict = self.predict(X_test, p1, p2)
acc = (np.linalg.norm(Y_predict - Y_test)**2)/(np.linalg.norm(Y_test)**2)
return acc
def plot_acc(self, X_test, Y_test, p1=0.8, p2=0.9):
Y_predict = self.predict(X_test, p1, p2)
plt.plot(Y_predict, Y_test)
plt.show()