-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
79 lines (73 loc) · 2.71 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import numpy as np
from scipy import stats
class OLS:
def __init__(self):
self.X = []
self.Y = []
self.beta = []
self.residuals = []
self.yhat = []
self.rss = 0
self.tss = 0
self.r2 = 0
self.adj_r2 = 0
self.mse = 0
self.se = []
self.t = []
self.p = []
self.dw = 0
self.mape = 0
self.intercept = ''
def fit(self, X, Y, intercept=True):
Ymean = Y.mean()
if intercept == True:
X_matrix = np.insert(X, 0, 1, axis=1)
self.intercept=True
else:
self.intercept = False
X_matrix = X
col = X.shape[1]
row = X.shape[0]
self.X = X_matrix
self.beta = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(X_matrix), X_matrix)), np.transpose(X_matrix)), Y)
self.yhat = np.dot(X_matrix,self.beta)
self.Y = Y
self.residuals = self.Y - self.yhat
self.tss = sum([(i-Ymean)**2 for i in self.Y])
self.rss = sum([i**2 for i in self.residuals])
if self.intercept == True:
self.r2 = 1 - (self.rss/self.tss)
else:
self.r2 = 1 - (self.rss/sum([i**2 for i in self.Y]))
self.adj_r2 = 1 - (((1-self.r2) * (row - 1)) / (row - col - 1))
self.mse = self.rss / row
self.se = np.sqrt(np.diagonal(np.dot(np.dot(np.transpose(self.residuals), self.residuals) / (row - col), np.linalg.inv(np.dot(np.transpose(X_matrix), X_matrix)))))
for i in range(len(self.beta)):
self.t.append(self.beta[i]/self.se[i])
self.p = [2*(1-stats.t.cdf(np.abs(i),(row-col))) for i in self.t]
esum = sum([(self.residuals[i] - self.residuals[i-1])**2 for i in range(row) if i != 0])
self.dw = esum/self.rss
self.mape = np.mean(np.abs((self.Y - self.yhat) / self.Y)) * 100
return self.beta
def test(self, X, Y):
Ymean = Y.mean()
if self.intercept == True:
X_matrix = np.insert(X, 0, 1, axis=1)
else:
X_matrix = X
col = X.shape[1]
row = X.shape[0]
pred = np.dot(X_matrix, self.beta)
residuals = Y - pred
tss = sum([(i-Ymean)**2 for i in Y])
rss = sum([i**2 for i in residuals])
if self.intercept == True:
r2 = 1 - (rss/tss)
else:
r2 = 1 - (rss/sum([i**2 for i in Y]))
adj_r2 = 1 - (((1-r2) * (row - 1)) / (row - col - 1))
mse = rss / row
esum = sum([(residuals[i] - residuals[i-1])**2 for i in range(row) if i != 0])
dw = esum/rss
mape = np.mean(np.abs((Y - pred) / Y)) * 100
return residuals, r2, adj_r2, mse, dw, pred, mape