Adam opt

SermetPekin · Dec 8, 2024 · f23500c · f23500c
1 parent fbce948
commit f23500c
Show file tree

Hide file tree

Showing 8 changed files with 355 additions and 15 deletions.
diff --git a/Makefile b/Makefile
@@ -14,7 +14,7 @@ LDFLAGS += -fsanitize=address
 TARGET = main
 
 # Source files for the main program
-SRCS = easy_df.cpp
+SRCS = easy_df_adam.cpp
 
 # Object files (generated from source files)
 OBJS = $(SRCS:.cpp=.o)

diff --git a/easy_adam.cpp b/easy_adam.cpp
@@ -0,0 +1,46 @@
+#include "micrograd.hpp"
+using namespace microgradCpp;
+
+/*
+
+g++ -std=c++17 -Iinclude -O2 -o main easy_adam.cpp
+
+*/
+int main()
+{
+
+    DatasetType dataset = get_iris();
+    shuffle(dataset);
+    double TRAIN_SIZE{0.8};
+
+    // Create MLP model
+    // Input: 4 features, hidden layers: [7,7], output: 3 classes
+    // Define the model and hyperparameters
+    MLP model(4, {7, 7, 3});
+    // Collect parameters from the model
+    auto params = model.parameters();
+    double learning_rate = 0.001;
+    int epochs = 1000;
+
+
+    // Initialize Adam optimizer
+    AdamOptimizer optimizer(params, learning_rate);
+
+    // Train and evaluate the model
+    // train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
+    train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);
+
+    return 0;
+}
+
+/*
+Notes
+-----------
+
+g++ -std=c++17 -Iinclude -O2 -o main main_easy.cpp
+
+// or
+make run
+
+
+*/
diff --git a/easy_df.cpp b/easy_df.cpp
@@ -39,7 +39,16 @@ int main()
 
     return 0;
 }
+/*
+
+   // Initialize Adam optimizer
+    AdamOptimizer optimizer(params, learning_rate);
 
+    // Train and evaluate the model
+    // train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
+    train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);
+    
+    */
 /*
 Notes
 -----------

diff --git a/easy_df_adam.cpp b/easy_df_adam.cpp
@@ -0,0 +1,67 @@
+#include "micrograd.hpp"
+
+#include "value.hpp"
+#include "mlp.hpp"
+
+using namespace microgradCpp;
+
+int main()
+{
+
+    // DatasetType dataset = get_iris();
+
+    DataFrame df;
+    df.from_csv("./data/iris.csv");
+    df.normalize();
+    df.encode_column("variety");
+
+    df.print();
+    df.shuffle();
+    df.print();
+
+    // stop();
+
+    // return 0;
+    // shuffle(dataset);
+    double TRAIN_SIZE{0.8};
+
+    // Create MLP model
+    // Input: 4 features, hidden layers: [7,7], output: 3 classes
+    // Define the model and hyperparameters
+    // MLP model(4, {10, 10, 3});
+    MLP model(4, {16, 16, 3});
+
+
+    auto params = model.parameters();
+    double learning_rate = 0.001;
+    int epochs = 100;
+
+
+    // Initialize Adam optimizer
+    AdamOptimizer optimizer(params, learning_rate);
+
+    // Train and evaluate the model
+    // train_eval(dataset, TRAIN_SIZE, model, learning_rate, epochs);
+    // train_eval(dataset, TRAIN_SIZE, model, optimizer, epochs);
+
+    // Train and evaluate the model
+    train_eval(df, TRAIN_SIZE, model, optimizer, epochs);
+
+    return 0;
+}
+/*
+
+
+
+    */
+/*
+Notes
+-----------
+
+g++ -std=c++17 -Iinclude -O2 -o main easy_df.cpp
+
+// or
+make run
+
+
+*/
diff --git a/include/adam.hpp b/include/adam.hpp
@@ -0,0 +1,64 @@
+#ifndef ADAM_HPP
+#define ADAM_HPP
+
+#include "value.hpp"
+#include <vector>
+#include <memory>
+#include <cmath>
+
+class AdamOptimizer {
+public:
+    double lr;         // Learning rate
+    double beta1;      // Exponential decay rate for the first moment
+    double beta2;      // Exponential decay rate for the second moment
+    double epsilon;    // Small constant for numerical stability
+    int t;             // Time step (iteration count)
+
+    // For storing moments for each parameter
+    std::vector<std::shared_ptr<Value>> params;
+    std::unordered_map<Value*, double> m; // First moment estimates
+    std::unordered_map<Value*, double> v; // Second moment estimates
+
+    // Constructor
+    AdamOptimizer(std::vector<std::shared_ptr<Value>> parameters,
+                  double lr = 0.001,
+                  double beta1 = 0.9,
+                  double beta2 = 0.999,
+                  double epsilon = 1e-8)
+        : lr(lr), beta1(beta1), beta2(beta2), epsilon(epsilon), t(0), params(parameters) {
+        for (auto& param : params) {
+            m[param.get()] = 0.0;
+            v[param.get()] = 0.0;
+        }
+    }
+
+    // Step function to update parameters
+    void step() {
+        t++; // Increment time step
+        for (auto& param : params) {
+            double g = param->grad; // Gradient of the parameter
+
+            // Update first moment estimate (mean)
+            m[param.get()] = beta1 * m[param.get()] + (1.0 - beta1) * g;
+
+            // Update second moment estimate (uncentered variance)
+            v[param.get()] = beta2 * v[param.get()] + (1.0 - beta2) * g * g;
+
+            // Compute bias-corrected estimates
+            double m_hat = m[param.get()] / (1.0 - std::pow(beta1, t));
+            double v_hat = v[param.get()] / (1.0 - std::pow(beta2, t));
+
+            // Update parameter
+            param->data -= lr * m_hat / (std::sqrt(v_hat) + epsilon);
+        }
+    }
+
+    // Zero gradients for the next step
+    void zero_grad() {
+        for (auto& param : params) {
+            param->grad = 0.0;
+        }
+    }
+};
+
+#endif // ADAM_HPP
diff --git a/include/easy.hpp b/include/easy.hpp
@@ -38,6 +38,7 @@ THE SOFTWARE.
 #include "types.hpp"
 #include "loss.hpp"
 #include "mlp.hpp"
+#include "iris.hpp"
 #include "sgd.hpp"
 #include "dataprocessor.hpp"
 #include "datasetType.hpp"
@@ -88,8 +89,7 @@ inline void shuffle(DatasetType &dataset)
     gen.seed(42); // A fixed seed for reproducibility
     std::shuffle(dataset.begin(), dataset.end(), gen);
 }
-inline void train_test_split(
-    const DatasetType &dataset,
+inline void train_test_split(       DatasetType &dataset,
     double TRAIN_SIZE,
     ColRows &train_inputs,
     ColRows &train_targets,
@@ -112,7 +112,7 @@ inline void train_test_split(
 }
 
 inline
-void train_eval(const DatasetType &dataset, double TRAIN_SIZE,   MLP &model, double lr = 0.01, int epochs = 100)
+void train_eval( DatasetType &dataset, double TRAIN_SIZE,   MLP &model, double lr = 0.01, int epochs = 100)
 {
 
     // Split into train and test sets (80-20 split)

diff --git a/include/micrograd.hpp b/include/micrograd.hpp
@@ -33,6 +33,7 @@ THE SOFTWARE.
 #include "dataframe_utils.hpp"
 #include "sp_testing_utils.hpp"
 #include "train_eval.hpp"
+#include "adam.hpp"
 
 #include "value.hpp"
 #include "iris.hpp"