cosmetic changes to hello world

STEllAR-GROUP · Mar 31, 2024 · bd918e4 · bd918e4
1 parent 102a353
commit bd918e4
Showing 1 changed file with 70 additions and 94 deletions.
diff --git a/examples/hello_world.cc b/examples/hello_world.cc
@@ -2,27 +2,21 @@
 Copyright (c) 2019-2026, Hossein Moein
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-* Neither the name of Hossein Moein and/or the DataFrame nor the
-  names of its contributors may be used to endorse or promote products
-  derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL Hossein Moein BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
+conditions are met:
+* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+  disclaimer in the documentation and/or other materials provided with the distribution.
+* Neither the name of Hossein Moein and/or the DataFrame nor the names of its contributors may be used to endorse or promote
+  products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL Hossein Moein BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
 */
 
 #include <DataFrame/DataFrame.h>                   // Main DataFrame header
@@ -69,14 +63,13 @@ struct  MyData  {
 //
 int main(int, char *[])  {
 
-    // If you want to fully take advantage of DataFrame parallel computing logic,
-    // it is recommended to call the following at the beginning of your program.
+    // If you want to fully take advantage of DataFrame parallel computing logic, it is recommended to call the following
+    // at the beginning of your program.
     //
-    // NOTE: make sure you read and understand the Multithreading section
-    //       in the documentations (threads could potentially hinder performance).
-    //       This program (hello world) is a perfect example. Since I know this program doesn’t
-    //       deal with large datasets to trigger multithreaded algorithms, populating the thread-pool
-    //       with threads (i.e. calling set_optimum_thread_level()) is a waste of resources.
+    // NOTE: make sure you read and understand the Multithreading section in the documentations (threads could
+    //       potentially hinder performance). This program (hello world) is a perfect example. Since I know this program
+    //       doesn’t deal with large datasets to trigger multithreaded algorithms, populating the thread-pool with threads
+    //       (i.e. calling set_optimum_thread_level()) is a waste of resources.
     //
     ThreadGranularity::set_optimum_thread_level();
 
@@ -87,12 +80,11 @@ int main(int, char *[])  {
 
     ULDataFrame ul_df1;
 
-    // One way to load data into the DataFrame is one column at a time.
-    // A DataFrame column could be at most as long as its index column. So, you must load the indexfirst before
-    // loading any column.
+    // One way to load data into the DataFrame is one column at a time. A DataFrame column could be at most as long as its
+    // index column. So, you must load the indexfirst before loading any column.
     //
-    // Once you load a column or index, the data is moved to DataFrame. The original vectors are now empty.
-    // There are other ways of loading data without the move.
+    // Once you load a column or index, the data is moved to DataFrame. The original vectors are now empty. There are other
+    // ways of loading data without the move.
     //
     ul_df1.load_index(std::move(idx_col1));
     ul_df1.load_column("dbl_col", std::move(dbl_col1));
@@ -107,8 +99,7 @@ int main(int, char *[])  {
 
     ULDataFrame ul_df2;
 
-    // Also, you can load data into a DataFrame all at once. In this case again the data is moved to the
-    // DataFrame.
+    // Also, you can load data into a DataFrame all at once. In this case again the data is moved to the DataFrame.
     //
     ul_df2.load_data(std::move(idx_col2),
                      std::make_pair("string col",  str_col),
@@ -117,18 +108,17 @@ int main(int, char *[])  {
 
     StrDataFrame    ibm_df;
 
-    // Also, you can load data into a DataFrame from a file, supporting a few different formats.
-    // If the file cannot be found, an exception will be thrown.
-    // If the DataFrame root directory is your current directory when running this, it should work fine.
+    // Also, you can load data into a DataFrame from a file, supporting a few different formats. If the file cannot be found,
+    // an exception will be thrown. If the DataFrame root directory is your current directory when running this, it should
+    // work fine.
     //
     ibm_df.read("data/IBM.csv", io_format::csv2);
 
-    // To access a column, you must know its name (or index) and its type.
-    // In case of a "standard" DataFrame (not a view), the columns are returned as a reference to a
-    // std::vector of type of that column.
+    // To access a column, you must know its name (or index) and its type. In case of a "standard" DataFrame (not a view),
+    // the columns are returned as a reference to a std::vector of type of that column.
     //
-    // get_column() involves 1 or sometimes 2 hash-table lookups.
-    // So, you should not call it repeatedly in a loop. Instead get a reference to it and use the reference.
+    // get_column() involves 1 or sometimes 2 hash-table lookups. So, you should not call it repeatedly in a loop. Instead
+    // get a reference to it and use the reference.
     //
     const auto  &cool_col_ref = ul_df2.get_column<std::string>("Cool Column");
     const auto  &str_col_ref = ul_df2.get_column<std::string>("string col");
@@ -139,27 +129,24 @@ int main(int, char *[])  {
         std::cout << str << ", ";
     std::cout << std::endl;
 
-    std::cout << "There are " << ibm_df.get_column<double>("IBM_Close").size()
-              << " IBM close prices" << std::endl;
+    std::cout << "There are " << ibm_df.get_column<double>("IBM_Close").size() << " IBM close prices" << std::endl;
     std::cout << "There are " << ibm_df.get_index().size() << " IBM indices" << std::endl;
 
-    // You can write the data to a file or stdout in a few formats. You must specify all the column types,
-    // but only once. When writing to a file, the file name/path must be create-able.
+    // You can write the data to a file or stdout in a few formats. You must specify all the column types, but only once.
+    // When writing to a file, the file name/path must be create-able.
     //
     ul_df2.write<std::ostream, std::string, double>(std::cout, io_format::csv2);
     ibm_df.write<double, long>("/tmp/test.json", io_format::json);
 
-    // You can convert a DataFrame to a string and from a string back into a DataFrame.
-    // This could be used to transmit a DataFrame from one place to another or store a DataFrame in
-    // databases, caches, …
+    // You can convert a DataFrame to a string and from a string back into a DataFrame. This could be used to transmit a
+    // DataFrame from one place to another or store a DataFrame in databases, caches, ...
     //
     const std::string  ibm_df_as_str = ibm_df.to_string<double, long>();
     StrDataFrame       ibm_df_2;
 
-    // Since we convert from native type to string and back, if you have floating point numbers with
-    // long precisions, you may run into precision mismatches.
-    // to_string() has a precision parameter you can adjust. The default is 12 which is a relatively
-    // high precision.
+    // Since we convert from native type to string and back, if you have floating point numbers with long precisions, you may
+    // run into precision mismatches. to_string() has a precision parameter you can adjust. The default is 12 which is a
+    // relatively high precision.
     //
     ibm_df_2.from_string(ibm_df_as_str.c_str());
     // std::cout << ibm_df_as_str << std::endl;  // Large output
@@ -176,8 +163,7 @@ int main(int, char *[])  {
     // You must specify all the column types, but only once.
     //
     auto    above_150_fun = [](const std::string &, const double &val)-> bool { return (val > 150.0); };
-    auto    above_150_df =
-        ibm_df.get_data_by_sel<double, decltype(above_150_fun), double, long>("IBM_Close", above_150_fun);
+    auto    above_150_df = ibm_df.get_data_by_sel<double, decltype(above_150_fun), double, long>("IBM_Close", above_150_fun);
 
     // Or, you could choose to get a view. See docs for views.
     //
@@ -187,58 +173,52 @@ int main(int, char *[])  {
     // You can get another DataFrame by group-bying on one or multiple columns.
     // You must specify only the type(s) of column(s), you are group-bying.
     //
-    // Group-by column dbl_col, and I am specifying how to summarize the index column and each of the
-    // other columns.
+    // Group-by column dbl_col, and I am specifying how to summarize the index column and each of the other columns.
     //
-    auto    gb_df =
-        ul_df1.groupby1<double>("dbl_col",
-                                LastVisitor<ul_idx_t, ul_idx_t>(),
-                                std::make_tuple("integers",    "sum_int",      SumVisitor<int>()),
-                                std::make_tuple("my_data_col", "last_my_data", LastVisitor<MyData>()));
+    auto    gb_df = ul_df1.groupby1<double>("dbl_col",
+                                            LastVisitor<ul_idx_t, ul_idx_t>(),
+                                            std::make_tuple("integers",    "sum_int",      SumVisitor<int>()),
+                                            std::make_tuple("my_data_col", "last_my_data", LastVisitor<MyData>()));
 
-    // You can run statistical, financial, ML, … algorithms on one or multiple columns by using visitors.
-    // You must specify the column(s) type(s).
-    // The visitor's data column is of type double and its index column is of type std::string.
+    // You can run statistical, financial, ML, … algorithms on one or multiple columns by using visitors. You must specify
+    // the column(s) type(s). The visitor's data column is of type double and its index column is of type std::string.
     //
     StdVisitor<double, std::string> stdev_v;
 
     ibm_df.visit<double>("IBM_Close", stdev_v);
     std::cout << "Standard deviation of IBM close prices: " << stdev_v.get_result() << std::endl;
 
-    // Now let’s declare two DataFrames with index type of DateTime which is a handy object for
-    // date/time manipulations.
+    // Now let’s declare two DataFrames with index type of DateTime which is a handy object for date/time manipulations.
     //
     DTDataFrame ibm_dt_df;
     DTDataFrame aapl_dt_df;
 
-    // Let’s read the AAPL and IBM market data from their files. The data for these two stocks start and end
-    // at different dates. But there is overlapping data between them.
+    // Let’s read the AAPL and IBM market data from their files. The data for these two stocks start and end at different
+    // dates. But there is overlapping data between them.
     //
     ibm_dt_df.read("data/DT_IBM.csv", io_format::csv2);
     aapl_dt_df.read("data/DT_AAPL.csv", io_format::csv2);
 
     // First let’s make sure if there are missing data in our important columns, we fill them up.
     //
-    ibm_dt_df.fill_missing<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" },
-                                   fill_policy::linear_interpolate);
+    ibm_dt_df.fill_missing<double>({ "IBM_Close", "IBM_Open", "IBM_High", "IBM_Low" }, fill_policy::linear_interpolate);
 
     // Now we join the AAPL and IBM DataFrames using their indices and applying inner-join policy.
     //
-    DTDataFrame aapl_ibm =
-        ibm_dt_df.join_by_index<DTDataFrame, double, long>(aapl_dt_df, join_policy::inner_join);
+    DTDataFrame aapl_ibm = ibm_dt_df.join_by_index<DTDataFrame, double, long>(aapl_dt_df, join_policy::inner_join);
 
-    // Now we calculate the Pearson correlation coefficient between AAPL and IBM close prices.
-    // The visitor's data columns are of type double and its index column is of type DateTime.
+    // Now we calculate the Pearson correlation coefficient between AAPL and IBM close prices. The visitor's data columns are
+    // of type double and its index column is of type DateTime.
     //
     CorrVisitor<double, DateTime>   corrl_v;
 
     std::cout << "Correlation between AAPL and IBM close prices: "
               << aapl_ibm.visit<double, double>("AAPL_Close", "IBM_Close", corrl_v).get_result()
               << std::endl;
 
-    // Now let’s do something more sophisticated and calculate rolling exponentially weighted correlations
-    // between IBM and Apple close prices. Since this is a rolling -- moving -- analysis the result is a
-    // vector of exponentially weighted correlations for each date in the data stream.
+    // Now let’s do something more sophisticated and calculate rolling exponentially weighted correlations between IBM and
+    // Apple close prices. Since this is a rolling -- moving -- analysis the result is a vector of exponentially weighted
+    // correlations for each date in the data stream.
     //
     ewm_corr_v<double>  ewmcorr { exponential_decay_spec::span, 3 };
     const auto          &ewmcorr_result =
@@ -264,8 +244,7 @@ int main(int, char *[])  {
             std::make_tuple("AAPL_Close",  "Median",        MedianVisitor<double, dt_idx_t>()),
             std::make_tuple("AAPL_Close",  "25% Quantile",  QuantileVisitor<double, dt_idx_t>(0.25)),
             std::make_tuple("AAPL_Close",  "Std",           StdVisitor<double, dt_idx_t>()),
-            // "Mode" column is a column of std::array<ModeVisitor::DataItem, 2>'s -- It cannot be printed
-            // by default
+            // "Mode" column is a column of std::array<ModeVisitor::DataItem, 2>'s
             std::make_tuple("AAPL_Close",  "Mode",          ModeVisitor<2, double, dt_idx_t>()),
             std::make_tuple("AAPL_Close",  "MAD",           MADVisitor<double, dt_idx_t>(mad_type::mean_abs_dev_around_mean)),
             // "Z Score" column is a column of std::vector<double>'s
@@ -285,11 +264,9 @@ int main(int, char *[])  {
     // ---------------------------------------------------
     //
     // Now let’s do some stuff that are a little more involved (multi steps). There are a lot of theories,
-    // math, and procedures that I am skipping to explain here.
-    // See docs for more details.
+    // math, and procedures that I am skipping to explain here. See docs for more details.
     //
-    // NOTE: I am applying the following analysis to financial data but it equally applies to other
-    //       scientific fields.
+    // NOTE: I am applying the following analysis to financial data but it equally applies to other scientific fields.
     //
     // ---------------------------------------------------
 
@@ -339,19 +316,18 @@ int main(int, char *[])  {
               << "75% quantile: " << qt75.get_result() << ", "
               << "95% quantile: " << qt95.get_result() << std::endl;
 
-    // Now let’s do another interesting thing. Let’s take the IBM returns curve and split it into
-    // 3 different curves; Trend, Seasonal, and Idiocentric or Residual or Random.
-    // For the sake of this exercise, we assume IBM business goes through 170-day seasonal cycles.
+    // Now let’s do another interesting thing. Let’s take the IBM returns curve and split it into 3 different curves; Trend,
+    // Seasonal, and Idiocentric or Residual or Random. For the sake of this exercise, we assume IBM business goes through
+    // 170-day seasonal cycles.
     //
     DecomposeVisitor<double, DateTime>  decom { 170, 0.6, 0.01 };
 
-    // After this call, the 3 curves will be in decom visitor instance. See docs how to get them
-    // and analyze them.
+    // After this call, the 3 curves will be in decom visitor instance. See docs how to get them and analyze them.
     //
     ibm_dt_df.single_act_visit<double>("IBM_Return", decom);
 
-    // But what if you don’t know the seasonality of IBM returns which would be most of the time.
-    // No worries, Mr Joseph Fourier comes to the rescue.
+    // But what if you don’t know the seasonality of IBM returns which would be most of the time. No worries,
+    // Mr. Joseph Fourier comes to the rescue.
     //
     FastFourierTransVisitor<double, DateTime>   fft;
 
@@ -360,8 +336,8 @@ int main(int, char *[])  {
     const auto  &magnitudes = fft.get_magnitude();
     double      max_val = 0;
 
-    // The following analysis and conclusion are over simplified and naive. It is more involved
-    // which is behind the scope of Hello World. But this is the basic idea.
+    // The following analysis and conclusion are over simplified and naive. It is more involved which is behind the scope of
+    // Hello World. But this is the basic idea.
     //
     for (std::size_t i = 1; i < magnitudes.size(); ++i)  {
         const double    val = 1.0 / magnitudes[i];