Skip to content

Commit 23651a4

Browse files
committed
Create regression_modelspolybasis.cpp
1 parent a25ad98 commit 23651a4

File tree

1 file changed

+189
-0
lines changed

1 file changed

+189
-0
lines changed
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# regression_models_poly_gridsearch.cpp
2+
3+
#include <iostream>
4+
#include <vector>
5+
#include <cmath>
6+
#include <random>
7+
#include <fstream>
8+
#include <numeric>
9+
#include <Eigen/Dense>
10+
#include <algorithm>
11+
#include <functional>
12+
13+
using namespace std;
14+
using namespace Eigen;
15+
16+
// === Utility Functions ===
17+
double mean_squared_error(const VectorXd& y_true, const VectorXd& y_pred) {
18+
return (y_true - y_pred).squaredNorm() / y_true.size();
19+
}
20+
21+
double r2_score(const VectorXd& y_true, const VectorXd& y_pred) {
22+
double mean_y = y_true.mean();
23+
double total = (y_true.array() - mean_y).square().sum();
24+
double residual = (y_true - y_pred).squaredNorm();
25+
return 1.0 - residual / total;
26+
}
27+
28+
void save_csv(const string& filename, const MatrixXd& X, const VectorXd& y_true, const VectorXd& y_pred) {
29+
ofstream file(filename);
30+
file << "X1,X2,...,True Y,Predicted Y\n";
31+
for (int i = 0; i < X.rows(); ++i) {
32+
for (int j = 0; j < X.cols(); ++j)
33+
file << X(i, j) << (j == X.cols() - 1 ? "," : ",");
34+
file << y_true(i) << "," << y_pred(i) << "\n";
35+
}
36+
file.close();
37+
}
38+
39+
// === Polynomial Expansion ===
40+
MatrixXd polynomial_expand(const MatrixXd& X, int degree) {
41+
int n = X.rows();
42+
int d = X.cols();
43+
vector<VectorXd> terms;
44+
terms.push_back(VectorXd::Ones(n)); // bias
45+
46+
for (int deg = 1; deg <= degree; ++deg) {
47+
function<void(int, int, VectorXi)> generate;
48+
generate = [&](int pos, int rem_deg, VectorXi powers) {
49+
if (pos == d) {
50+
if (rem_deg == 0) {
51+
VectorXd term = VectorXd::Ones(n);
52+
for (int i = 0; i < d; ++i)
53+
term = term.array() * X.col(i).array().pow(powers(i));
54+
terms.push_back(term);
55+
}
56+
return;
57+
}
58+
for (int i = 0; i <= rem_deg; ++i) {
59+
powers(pos) = i;
60+
generate(pos + 1, rem_deg - i, powers);
61+
}
62+
};
63+
generate(0, deg, VectorXi::Zero(d));
64+
}
65+
66+
MatrixXd result(n, terms.size());
67+
for (int i = 0; i < terms.size(); ++i)
68+
result.col(i) = terms[i];
69+
return result;
70+
}
71+
72+
// === Cross-validation ===
73+
void cross_validate(const MatrixXd& X, const VectorXd& y, int k,
74+
function<void(const MatrixXd&, const VectorXd&)> fit_func,
75+
function<VectorXd(const MatrixXd&)> predict_func,
76+
double& avg_mse, double& avg_r2) {
77+
78+
int n = X.rows();
79+
vector<int> indices(n);
80+
iota(indices.begin(), indices.end(), 0);
81+
random_shuffle(indices.begin(), indices.end());
82+
83+
avg_mse = 0.0;
84+
avg_r2 = 0.0;
85+
86+
for (int i = 0; i < k; ++i) {
87+
int start = i * n / k;
88+
int end = (i + 1) * n / k;
89+
90+
vector<int> test_idx(indices.begin() + start, indices.begin() + end);
91+
vector<int> train_idx;
92+
for (int j = 0; j < n; ++j)
93+
if (j < start || j >= end) train_idx.push_back(indices[j]);
94+
95+
MatrixXd X_train(train_idx.size(), X.cols());
96+
VectorXd y_train(train_idx.size());
97+
for (int j = 0; j < train_idx.size(); ++j) {
98+
X_train.row(j) = X.row(train_idx[j]);
99+
y_train(j) = y(train_idx[j]);
100+
}
101+
102+
MatrixXd X_test(test_idx.size(), X.cols());
103+
VectorXd y_test(test_idx.size());
104+
for (int j = 0; j < test_idx.size(); ++j) {
105+
X_test.row(j) = X.row(test_idx[j]);
106+
y_test(j) = y(test_idx[j]);
107+
}
108+
109+
fit_func(X_train, y_train);
110+
VectorXd y_pred = predict_func(X_test);
111+
avg_mse += mean_squared_error(y_test, y_pred);
112+
avg_r2 += r2_score(y_test, y_pred);
113+
}
114+
115+
avg_mse /= k;
116+
avg_r2 /= k;
117+
}
118+
119+
// === Ridge Regression ===
120+
class RidgeRegression {
121+
public:
122+
VectorXd weights;
123+
double alpha;
124+
125+
RidgeRegression(double alpha = 1.0) : alpha(alpha) {}
126+
127+
void fit(const MatrixXd& X, const VectorXd& y) {
128+
MatrixXd X_bias(X.rows(), X.cols() + 1);
129+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
130+
MatrixXd I = MatrixXd::Identity(X_bias.cols(), X_bias.cols());
131+
I(0, 0) = 0;
132+
weights = (X_bias.transpose() * X_bias + alpha * I).ldlt().solve(X_bias.transpose() * y);
133+
}
134+
135+
VectorXd predict(const MatrixXd& X) const {
136+
MatrixXd X_bias(X.rows(), X.cols() + 1);
137+
X_bias << MatrixXd::Ones(X.rows(), 1), X;
138+
return X_bias * weights;
139+
}
140+
};
141+
142+
// === Main ===
143+
int main() {
144+
// Synthetic data
145+
int n_samples = 150, n_features = 2;
146+
MatrixXd X = MatrixXd::Random(n_samples, n_features);
147+
VectorXd y = 2 + X * VectorXd::LinSpaced(n_features, 1.0, 2.0) + VectorXd::Random(n_samples) * 0.3;
148+
149+
int best_degree = 1;
150+
double best_alpha = 0.0;
151+
double best_mse = 1e9;
152+
153+
for (int degree : {1, 2, 3}) {
154+
MatrixXd X_poly = polynomial_expand(X, degree);
155+
for (double alpha : {0.01, 0.1, 1.0, 10.0}) {
156+
RidgeRegression model(alpha);
157+
double avg_mse, avg_r2;
158+
cross_validate(X_poly, y, 5,
159+
[&](const MatrixXd& Xtr, const VectorXd& ytr){ model.fit(Xtr, ytr); },
160+
[&](const MatrixXd& Xte){ return model.predict(Xte); },
161+
avg_mse, avg_r2);
162+
163+
cout << "Degree=" << degree << ", Alpha=" << alpha
164+
<< " -> MSE=" << avg_mse << ", R2=" << avg_r2 << endl;
165+
166+
if (avg_mse < best_mse) {
167+
best_mse = avg_mse;
168+
best_alpha = alpha;
169+
best_degree = degree;
170+
}
171+
}
172+
}
173+
174+
cout << "\nBest model: Degree=" << best_degree
175+
<< ", Alpha=" << best_alpha
176+
<< ", MSE=" << best_mse << endl;
177+
178+
MatrixXd X_poly_best = polynomial_expand(X, best_degree);
179+
RidgeRegression best_model(best_alpha);
180+
best_model.fit(X_poly_best, y);
181+
VectorXd y_pred = best_model.predict(X_poly_best);
182+
183+
save_csv("predictions_poly_ridge.csv", X, y, y_pred);
184+
185+
return 0;
186+
}
187+
188+
189+
// g++ regression_models_poly_gridsearch.cpp -o poly_model -I /path/to/eigen ./poly_model

0 commit comments

Comments
 (0)