Skip to content

Commit

Permalink
ch1 mismatch shared_ptr Value
Browse files Browse the repository at this point in the history
  • Loading branch information
SermetPekin committed Dec 8, 2024
1 parent 88c05e8 commit 7874199
Show file tree
Hide file tree
Showing 13 changed files with 466 additions and 23 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ clean:
rm -f $(OBJS) $(TARGET) $(TEST_TARGET)

test: clean test_only
pytest: clean test_only


.PHONY: clean run test
42 changes: 42 additions & 0 deletions easy_df.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include "micrograd.hpp"
using namespace microgradCpp;

int main()
{

// DatasetType dataset = get_iris();

DataFrame df;
df.from_csv("./data/iris.csv");

df.encode_column("variety");

df.print();

// return 0;
// shuffle(dataset);
double TRAIN_SIZE{0.8};

// Create MLP model
// Input: 4 features, hidden layers: [7,7], output: 3 classes
// Define the model and hyperparameters
MLP model(4, {10, 10, 3});
double learning_rate = 0.01;
int epochs = 2;
// Train and evaluate the model
train_eval(df, TRAIN_SIZE, model, learning_rate, epochs);

return 0;
}

/*
Notes
-----------
g++ -std=c++17 -Iinclude -O2 -o main easy_df.cpp
// or
make run
*/
13 changes: 8 additions & 5 deletions include/console_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,22 @@ namespace microgradCpp

std::exit(EXIT_FAILURE);
}

inline void epic_out_of_range(const std::string &reason )
inline void epic_out_of_range(const std::string &reason)
{
std::cout << "\n💥💥💥 BOOM! 💥💥💥" << std::endl;
std::cout << "❌ Uh-oh! Something went wrong: [ 🔥 " << reason << " 🔥 ] " << std::endl;
std::cout << "🚀 Exiting the program... like a failed rocket launch!" << std::endl;
std::cout << "📉 Better luck next time, brave coder!" << std::endl;
std::cout << "🔥🔥🔥 Program terminated. 🔥🔥🔥\n"
<< std::endl;

throw std::out_of_range(reason) ;
}

throw std::out_of_range(reason);
}
inline void stop(const std::string &reason = "...")
{
epic_out_of_range(reason);
}
// Function to format shapes for display

inline std::string format_shape(size_t rows, size_t cols)
Expand Down
13 changes: 13 additions & 0 deletions include/data_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,22 @@
#include <unordered_map>
#include "mlp.hpp"
#include "console_utils.hpp"
#include "types.hpp"
using namespace microgradCpp;

using vv_string = std::vector<std::vector<std::string>>;
using vv_double = std::vector<std::vector<double>>;

static inline v_shared_Value one_hot_encode(int class_index, int num_classes)
{

v_shared_Value target(num_classes, std::make_shared<Value>(0.0));

target[class_index] = std::make_shared<Value>(1.0);

return target;
}

inline void log_model_info(const std::vector<int> &layer_sizes,
size_t input_features,
size_t output_targets,
Expand Down
94 changes: 88 additions & 6 deletions include/dataframe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include "header.hpp"
#include "range.hpp"
#include "console_utils.hpp"
#include "types.hpp"

namespace microgradCpp

Expand All @@ -78,6 +79,12 @@ namespace microgradCpp

static inline bool DEFAULT_INPLACE = true;

int size() const
{

return get_all_row_indices().size();
}

DataFrame operator()(const std::initializer_list<int> &row_indices, const std::vector<std::string> &col_names)
{
return this->slice(std::vector<size_t>(row_indices.begin(), row_indices.end()), col_names, DEFAULT_INPLACE);
Expand All @@ -96,6 +103,82 @@ namespace microgradCpp
return this->slice(numbers, col_names, DEFAULT_INPLACE);
}

DataFrame rows(const Range &range)
{

auto numbers = range.to_vector<size_t>();

return this->slice(numbers, column_order, DEFAULT_INPLACE);
}

v_string v(const Range &column_range)
{
v_string items;
for (size_t i = 0; i < column_order.size(); ++i)
{
if (column_range.includes(i))
{
items.push_back(column_order[i]);
}
}
return items;
}

vv_double to_vv_double() const
{
vv_double result;

if (columns.empty())
return result;

// Determine the number of rows based on the first column
size_t num_rows = columns.begin()->second.size();

// Iterate through each row
for (size_t i = 0; i < num_rows; ++i)
{
std::vector<double> row;
for (const auto &col_name : column_order)
{
const auto &col = columns.at(col_name);
if (i < col.size())
{
const auto &cell = col[i];
if (std::holds_alternative<double>(cell))
{
row.push_back(std::get<double>(cell));
}
else
{
row.push_back(0.0);
}
}
}
result.push_back(row);
}

return result;
}

// vv_string v(const Range &colum_range){

// vv_string items ;
// for(int i =0 ; i< column_order.size() ; i++ ){
// if( colum_range.includes( i ))
// items.push_back( column_order[ i ]) ;

// }
// return items ;

// }
DataFrame subset(const Range &range, const Range &colum_range)
{

auto numbers = range.to_vector<size_t>();

return this->slice(numbers, column_order, DEFAULT_INPLACE);
}

DataFrame operator()(const Range &range)
{

Expand All @@ -115,6 +198,11 @@ namespace microgradCpp
return this->slice(get_all_row_indices(), column_order, DEFAULT_INPLACE);
}

// DataFrame operator()(const Range &range)
// {
// return this->slice(range.to_vector<size_t>(), column_order, DEFAULT_INPLACE);
// }

DataFrame operator()(const std::vector<size_t> &row_indices)
{
return this->slice(row_indices, column_order, DEFAULT_INPLACE);
Expand All @@ -131,12 +219,6 @@ namespace microgradCpp

return this->slice(get_all_row_indices(), col_names, inplace);
}







DataFrame slice(const std::vector<size_t> &row_indices, const std::vector<std::string> &col_names, bool inplace = DEFAULT_INPLACE)
{
Expand Down
38 changes: 38 additions & 0 deletions include/datasetType.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <iostream>
#include "value.hpp"
#include "types.hpp"
#include "dataframe.hpp"
using namespace microgradCpp;

inline DatasetType convert_to_dataset(const vv_double &data, int target_column = -1)
Expand Down Expand Up @@ -43,6 +44,43 @@ inline DatasetType convert_to_dataset(const vv_double &data, int target_column =
return dataset;
}

inline DatasetType convert_to_dataset(const DataFrame &df, int target_column = -1)
{
DatasetType dataset;
vv_double data = df.to_vv_double();


for (const auto &row : data)
{
if (row.empty())
{
continue; // Skip empty rows
}

// Determine target column index
int target_idx = (target_column == -1) ? row.size() - 1 : target_column;
// Create inputs and targets
std::vector<std::shared_ptr<Value>> inputs;
std::vector<std::shared_ptr<Value>> targets;
for (size_t i = 0; i < row.size(); ++i)
{
if (static_cast<int>(i) == target_idx)
{
targets.push_back(std::make_shared<Value>(row[i]));
}
else
{
inputs.push_back(std::make_shared<Value>(row[i]));
}
}

// Add the pair to the dataset
dataset.emplace_back(inputs, targets);
}

return dataset;
}

#include <fstream>
#include <iostream>
#include <vector>
Expand Down
31 changes: 20 additions & 11 deletions include/loss.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,40 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/


#include "value.hpp"
#include <vector>
#include <memory>
#include <cmath>
#include <iostream>
#include "value.hpp"
#include "console_utils.hpp"
#include "data_utils.hpp"

using namespace microgradCpp ;




class Loss {
class Loss
{
public:
static std::shared_ptr<Value> cross_entropy(
const std::vector<std::shared_ptr<Value>>& predictions,
const std::vector<std::shared_ptr<Value>>& targets
) {
const std::vector<std::shared_ptr<Value>> &predictions,
const std::vector<std::shared_ptr<Value>> &targets)
{
// Assumes:
// 1. predictions are already probabilities (from softmax in MLP forward)
// 2. targets are one-hot encoded: exactly one element is 1, others are 0
// cross entropy = -sum_i t_i * log(p_i)

auto loss = std::make_shared<Value>(0.0);

for (size_t i = 0; i < predictions.size(); ++i) {
// auto XX = one_hot_encode( targets , 3 ) ;

if (predictions.size() != targets.size() || !(predictions.size() > 0) ){

std::cout << predictions.size() << " predictions <== ==> targets " <<targets.size() ;
// stop( "problem") ;
}
auto loss = std::make_shared<Value>(0.0);

for (size_t i = 0; i < predictions.size(); ++i)
{
// log(p_i)
auto logp = predictions[i]->log();
// accumulate t_i * log(p_i)
Expand Down
1 change: 1 addition & 0 deletions include/micrograd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ THE SOFTWARE.
#include "dataframe.hpp"
#include "dataframe_utils.hpp"
#include "sp_testing_utils.hpp"
#include "train_eval.hpp"

#include "value.hpp"
#include "iris.hpp"
Expand Down
Loading

0 comments on commit 7874199

Please sign in to comment.