-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathLinearRegression.h
156 lines (139 loc) · 4.82 KB
/
LinearRegression.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#ifndef CH1_LINEARREGRESSION_H
#define CH1_LINEARREGRESSION_H
#include <vector>
#include <stdexcept>
#include <cmath>
#include <ostream>
#include <sstream>
#include <iomanip>
using std::vector;
using std::runtime_error;
using std::ostream;
using std::setprecision;
using std::ostringstream;
/**
* The {@code LinearRegression} class performs a simple linear regression
* on an set of <em>n</em> data points (<em>y<sub>i</sub></em>, <em>x<sub>i</sub></em>).
* That is, it fits a straight line <em>y</em> = α + β <em>x</em>,
* (where <em>y</em> is the response variable, <em>x</em> is the predictor variable,
* α is the <em>y-intercept</em>, and β is the <em>slope</em>)
* that minimizes the sum of squared residuals of the linear regression model.
* It also computes associated statistics, including the coefficient of
* determination <em>R</em><sup>2</sup> and the standard deviation of the
* estimates for the slope and <em>y</em>-intercept.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
class LinearRegression {
private:
double intercept, slope;
double r2;
double svar0, svar1;
public:
/**
* Performs a linear regression on the data points {@code (y[i], x[i])}.
*
* @param x the values of the predictor variable
* @param y the corresponding values of the response variable
* @throws IllegalArgumentException if the lengths of the two arrays are not equal
*/
LinearRegression(vector<double> &x, vector<double> &y) {
if (x.size() != y.size()) {
throw runtime_error("array lengths are not equal");
}
int n = x.size();
// first pass
double sumx = 0.0, sumy = 0.0, sumx2 = 0.0;
for (int i = 0; i < n; i++) {
sumx += x[i];
sumx2 += x[i] * x[i];
sumy += y[i];
}
double xbar = sumx / n;
double ybar = sumy / n;
// second pass: compute summary statistics
double xxbar = 0.0, yybar = 0.0, xybar = 0.0;
for (int i = 0; i < n; i++) {
xxbar += (x[i] - xbar) * (x[i] - xbar);
yybar += (y[i] - ybar) * (y[i] - ybar);
xybar += (x[i] - xbar) * (y[i] - ybar);
}
slope = xybar / xxbar;
intercept = ybar - slope * xbar;
// more statistical analysis
double rss = 0.0; // residual sum of squares
double ssr = 0.0; // regression sum of squares
for (int i = 0; i < n; i++) {
double fit = slope * x[i] + intercept;
rss += (fit - y[i]) * (fit - y[i]);
ssr += (fit - ybar) * (fit - ybar);
}
int degreesOfFreedom = n - 2;
r2 = ssr / yybar;
double svar = rss / degreesOfFreedom;
svar1 = svar / xxbar;
svar0 = svar / n + xbar * xbar * svar1;
}
/**
* Returns the <em>y</em>-intercept α of the best of the best-fit line <em>y</em> = α + β <em>x</em>.
*
* @return the <em>y</em>-intercept α of the best-fit line <em>y = α + β x</em>
*/
double get_intercept() const {
return intercept;
}
/**
* Returns the slope β of the best of the best-fit line <em>y</em> = α + β <em>x</em>.
*
* @return the slope β of the best-fit line <em>y</em> = α + β <em>x</em>
*/
double get_slope() const {
return slope;
}
/**
* Returns the coefficient of determination <em>R</em><sup>2</sup>.
*
* @return the coefficient of determination <em>R</em><sup>2</sup>,
* which is a real number between 0 and 1
*/
double R2() const {
return r2;
}
/**
* Returns the standard error of the estimate for the intercept.
*
* @return the standard error of the estimate for the intercept
*/
double interceptStdErr() const {
return sqrt(svar0);
}
/**
* Returns the standard error of the estimate for the slope.
*
* @return the standard error of the estimate for the slope
*/
double slopeStdErr() const {
return sqrt(svar1);
}
/**
* Returns the expected response {@code y} given the value of the predictor
* variable {@code x}.
*
* @param x the value of the predictor variable
* @return the expected response {@code y} given the value of the predictor
* variable {@code x}
*/
double predict(double x) const {
return slope * x + intercept;
}
friend ostream &operator<<(ostream &stream, const LinearRegression &lr);
};
ostream &operator<<(ostream &stream, const LinearRegression &lr) {
ostringstream ss;
ss << setprecision(2) << lr.slope << " n + " << lr.intercept;
ss << " (R^2 = " << setprecision(3) << lr.R2() << ")";
stream << ss.str();
return stream;
}
#endif //CH1_LINEARREGRESSION_H