Skip to content

Commit

Permalink
Adam opt
Browse files Browse the repository at this point in the history
  • Loading branch information
SermetPekin committed Dec 8, 2024
1 parent fbce948 commit f23500c
Show file tree
Hide file tree
Showing 8 changed files with 355 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LDFLAGS += -fsanitize=address
TARGET = main

# Source files for the main program
SRCS = easy_df.cpp
SRCS = easy_df_adam.cpp

# Object files (generated from source files)
OBJS = $(SRCS:.cpp=.o)
Expand Down
46 changes: 46 additions & 0 deletions easy_adam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "micrograd.hpp"
using namespace microgradCpp;

/*
g++ -std=c++17 -Iinclude -O2 -o main easy_adam.cpp
*/
int main()
{

DatasetType dataset = get_iris();
shuffle(dataset);
double TRAIN_SIZE{0.8};

// Create MLP model
// Input: 4 features, hidden layers: [7,7], output: 3 classes
// Define the model and hyperparameters
MLP model(4, {7, 7, 3});
// Collect parameters from the model
auto params = model.parameters();
double learning_rate = 0.001;
int epochs = 1000;


// Initialize Adam optimizer
AdamOptimizer optimizer(params, learning_rate);

// Train and evaluate the model
// train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);

return 0;
}

/*
Notes
-----------
g++ -std=c++17 -Iinclude -O2 -o main main_easy.cpp
// or
make run
*/
9 changes: 9 additions & 0 deletions easy_df.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,16 @@ int main()

return 0;
}
/*
// Initialize Adam optimizer
AdamOptimizer optimizer(params, learning_rate);
// Train and evaluate the model
// train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);
*/
/*
Notes
-----------
Expand Down
67 changes: 67 additions & 0 deletions easy_df_adam.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "micrograd.hpp"

#include "value.hpp"
#include "mlp.hpp"

using namespace microgradCpp;

int main()
{

// DatasetType dataset = get_iris();

DataFrame df;
df.from_csv("./data/iris.csv");
df.normalize();
df.encode_column("variety");

df.print();
df.shuffle();
df.print();

// stop();

// return 0;
// shuffle(dataset);
double TRAIN_SIZE{0.8};

// Create MLP model
// Input: 4 features, hidden layers: [7,7], output: 3 classes
// Define the model and hyperparameters
// MLP model(4, {10, 10, 3});
MLP model(4, {16, 16, 3});


auto params = model.parameters();
double learning_rate = 0.001;
int epochs = 100;


// Initialize Adam optimizer
AdamOptimizer optimizer(params, learning_rate);

// Train and evaluate the model
// train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
// train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);

// Train and evaluate the model
train_eval(df, TRAIN_SIZE, model, optimizer, epochs);

return 0;
}
/*
*/
/*
Notes
-----------
g++ -std=c++17 -Iinclude -O2 -o main easy_df.cpp
// or
make run
*/
64 changes: 64 additions & 0 deletions include/adam.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#ifndef ADAM_HPP
#define ADAM_HPP

#include "value.hpp"
#include <vector>
#include <memory>
#include <cmath>

class AdamOptimizer {
public:
double lr; // Learning rate
double beta1; // Exponential decay rate for the first moment
double beta2; // Exponential decay rate for the second moment
double epsilon; // Small constant for numerical stability
int t; // Time step (iteration count)

// For storing moments for each parameter
std::vector<std::shared_ptr<Value>> params;
std::unordered_map<Value*, double> m; // First moment estimates
std::unordered_map<Value*, double> v; // Second moment estimates

// Constructor
AdamOptimizer(std::vector<std::shared_ptr<Value>> parameters,
double lr = 0.001,
double beta1 = 0.9,
double beta2 = 0.999,
double epsilon = 1e-8)
: lr(lr), beta1(beta1), beta2(beta2), epsilon(epsilon), t(0), params(parameters) {
for (auto& param : params) {
m[param.get()] = 0.0;
v[param.get()] = 0.0;
}
}

// Step function to update parameters
void step() {
t++; // Increment time step
for (auto& param : params) {
double g = param->grad; // Gradient of the parameter

// Update first moment estimate (mean)
m[param.get()] = beta1 * m[param.get()] + (1.0 - beta1) * g;

// Update second moment estimate (uncentered variance)
v[param.get()] = beta2 * v[param.get()] + (1.0 - beta2) * g * g;

// Compute bias-corrected estimates
double m_hat = m[param.get()] / (1.0 - std::pow(beta1, t));
double v_hat = v[param.get()] / (1.0 - std::pow(beta2, t));

// Update parameter
param->data -= lr * m_hat / (std::sqrt(v_hat) + epsilon);
}
}

// Zero gradients for the next step
void zero_grad() {
for (auto& param : params) {
param->grad = 0.0;
}
}
};

#endif // ADAM_HPP
6 changes: 3 additions & 3 deletions include/easy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ THE SOFTWARE.
#include "types.hpp"
#include "loss.hpp"
#include "mlp.hpp"
#include "iris.hpp"
#include "sgd.hpp"
#include "dataprocessor.hpp"
#include "datasetType.hpp"
Expand Down Expand Up @@ -88,8 +89,7 @@ inline void shuffle(DatasetType &dataset)
gen.seed(42); // A fixed seed for reproducibility
std::shuffle(dataset.begin(), dataset.end(), gen);
}
inline void train_test_split(
const DatasetType &dataset,
inline void train_test_split( DatasetType &dataset,
double TRAIN_SIZE,
ColRows &train_inputs,
ColRows &train_targets,
Expand All @@ -112,7 +112,7 @@ inline void train_test_split(
}

inline
void train_eval(const DatasetType &dataset, double TRAIN_SIZE, MLP &model, double lr = 0.01, int epochs = 100)
void train_eval( DatasetType &dataset, double TRAIN_SIZE, MLP &model, double lr = 0.01, int epochs = 100)
{

// Split into train and test sets (80-20 split)
Expand Down
1 change: 1 addition & 0 deletions include/micrograd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ THE SOFTWARE.
#include "dataframe_utils.hpp"
#include "sp_testing_utils.hpp"
#include "train_eval.hpp"
#include "adam.hpp"

#include "value.hpp"
#include "iris.hpp"
Expand Down
Loading

0 comments on commit f23500c

Please sign in to comment.