diff --git a/hist/histv7/benchmark/CMakeLists.txt b/hist/histv7/benchmark/CMakeLists.txt index f7cfa93eca030..d367ddcb75dc7 100644 --- a/hist/histv7/benchmark/CMakeLists.txt +++ b/hist/histv7/benchmark/CMakeLists.txt @@ -6,3 +6,6 @@ target_link_libraries(hist_benchmark_engine ROOTHist benchmark::benchmark) add_executable(hist_benchmark_regular hist_benchmark_regular.cxx) target_link_libraries(hist_benchmark_regular ROOTHist benchmark::benchmark) + +add_executable(hist_benchmark_stats hist_benchmark_stats.cxx) +target_link_libraries(hist_benchmark_stats ROOTHist benchmark::benchmark) diff --git a/hist/histv7/benchmark/hist_benchmark_stats.cxx b/hist/histv7/benchmark/hist_benchmark_stats.cxx new file mode 100644 index 0000000000000..dd3ab679b95de --- /dev/null +++ b/hist/histv7/benchmark/hist_benchmark_stats.cxx @@ -0,0 +1,87 @@ +#include +#include + +#include + +#include +#include + +struct RHistStats1 : public benchmark::Fixture { + // The object is stored and constructed in the fixture to avoid compiler optimizations in the benchmark body taking + // advantage of the (constant) constructor parameters. + ROOT::Experimental::RHistStats stats{1}; + std::vector fNumbers; + + // Avoid GCC warning + using benchmark::Fixture::SetUp; + void SetUp(benchmark::State &state) final + { + std::mt19937 gen; + std::uniform_real_distribution<> dis; + fNumbers.resize(state.range(0)); + for (std::size_t i = 0; i < fNumbers.size(); i++) { + fNumbers[i] = dis(gen); + } + } +}; + +BENCHMARK_DEFINE_F(RHistStats1, Fill)(benchmark::State &state) +{ + for (auto _ : state) { + for (double number : fNumbers) { + stats.Fill(number); + } + } +} +BENCHMARK_REGISTER_F(RHistStats1, Fill)->Range(0, 32768); + +BENCHMARK_DEFINE_F(RHistStats1, FillWeight)(benchmark::State &state) +{ + for (auto _ : state) { + for (double number : fNumbers) { + stats.Fill(number, ROOT::Experimental::RWeight(0.8)); + } + } +} +BENCHMARK_REGISTER_F(RHistStats1, FillWeight)->Range(0, 32768); + +struct RHistStats2 : public benchmark::Fixture { + // The object is stored and constructed in the fixture to avoid compiler optimizations in the benchmark body taking + // advantage of the (constant) constructor parameters. + ROOT::Experimental::RHistStats stats{2}; + std::vector fNumbers; + + // Avoid GCC warning + using benchmark::Fixture::SetUp; + void SetUp(benchmark::State &state) final + { + std::mt19937 gen; + std::uniform_real_distribution<> dis; + fNumbers.resize(2 * state.range(0)); + for (std::size_t i = 0; i < fNumbers.size(); i++) { + fNumbers[i] = dis(gen); + } + } +}; + +BENCHMARK_DEFINE_F(RHistStats2, Fill)(benchmark::State &state) +{ + for (auto _ : state) { + for (std::size_t i = 0; i < fNumbers.size(); i += 2) { + stats.Fill(fNumbers[i], fNumbers[i + 1]); + } + } +} +BENCHMARK_REGISTER_F(RHistStats2, Fill)->Range(0, 32768); + +BENCHMARK_DEFINE_F(RHistStats2, FillWeight)(benchmark::State &state) +{ + for (auto _ : state) { + for (std::size_t i = 0; i < fNumbers.size(); i += 2) { + stats.Fill(fNumbers[i], fNumbers[i + 1], ROOT::Experimental::RWeight(0.8)); + } + } +} +BENCHMARK_REGISTER_F(RHistStats2, FillWeight)->Range(0, 32768); + +BENCHMARK_MAIN(); diff --git a/hist/histv7/headers.cmake b/hist/histv7/headers.cmake index 7791a76adbb32..dbc16ccbab484 100644 --- a/hist/histv7/headers.cmake +++ b/hist/histv7/headers.cmake @@ -4,6 +4,7 @@ set(histv7_headers ROOT/RBinIndexRange.hxx ROOT/RBinWithError.hxx ROOT/RHistEngine.hxx + ROOT/RHistStats.hxx ROOT/RHistUtils.hxx ROOT/RLinearizedIndex.hxx ROOT/RRegularAxis.hxx diff --git a/hist/histv7/inc/LinkDef.h b/hist/histv7/inc/LinkDef.h index ff41b88af1065..eec413e8805bd 100644 --- a/hist/histv7/inc/LinkDef.h +++ b/hist/histv7/inc/LinkDef.h @@ -6,6 +6,7 @@ #pragma link C++ class ROOT::Experimental::RHistEngine-; #pragma link C++ class ROOT::Experimental::RHistEngine-; +#pragma link C++ class ROOT::Experimental::RHistStats-; #pragma link C++ class ROOT::Experimental::RRegularAxis-; #pragma link C++ class ROOT::Experimental::RVariableBinAxis-; #pragma link C++ class ROOT::Experimental::Internal::RAxes-; diff --git a/hist/histv7/inc/ROOT/RHistStats.hxx b/hist/histv7/inc/ROOT/RHistStats.hxx new file mode 100644 index 0000000000000..5a80f547bdec4 --- /dev/null +++ b/hist/histv7/inc/ROOT/RHistStats.hxx @@ -0,0 +1,358 @@ +/// \file +/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. +/// Feedback is welcome! + +#ifndef ROOT_RHistStats +#define ROOT_RHistStats + +#include "RHistUtils.hxx" +#include "RLinearizedIndex.hxx" +#include "RWeight.hxx" + +#include +#include +#include +#include +#include + +class TBuffer; + +namespace ROOT { +namespace Experimental { + +/** +Histogram statistics of unbinned values. + +Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the +arithmetic mean and other statistical quantities per dimension: +\code +ROOT::Experimental::RHistStats stats(1); +stats.Fill(8.5); +stats.Fill(1.5); +// stats.ComputeMean() will return 5.0 +\endcode + +\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes. +Feedback is welcome! +*/ +class RHistStats final { +public: + /// Statistics for one dimension. + struct RDimensionStats final { + double fSumWX = 0.0; + double fSumWX2 = 0.0; + double fSumWX3 = 0.0; + double fSumWX4 = 0.0; + + void Add(double x) + { + fSumWX += x; + fSumWX2 += x * x; + fSumWX3 += x * x * x; + fSumWX4 += x * x * x * x; + } + + void Add(double x, double w) + { + fSumWX += w * x; + fSumWX2 += w * x * x; + fSumWX3 += w * x * x * x; + fSumWX4 += w * x * x * x * x; + } + }; + +private: + /// The number of entries + std::uint64_t fNEntries = 0; + /// The sum of weights + double fSumW = 0.0; + /// The sum of weights squared + double fSumW2 = 0.0; + /// The sums per dimension + std::vector fDimensionStats; + +public: + /// Construct a statistics object. + /// + /// \param[in] nDimensions the number of dimensions, must be > 0 + explicit RHistStats(std::size_t nDimensions) + { + if (nDimensions == 0) { + throw std::invalid_argument("nDimensions must be > 0"); + } + fDimensionStats.resize(nDimensions); + } + + std::size_t GetNDimensions() const { return fDimensionStats.size(); } + + std::uint64_t GetNEntries() const { return fNEntries; } + double GetSumW() const { return fSumW; } + double GetSumW2() const { return fSumW2; } + + const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const { return fDimensionStats.at(dim); } + + /// Compute the number of effective entries. + /// + /// \f[ + /// \frac{(\sum w_i)^2}{\sum w_i^2} + /// \f] + /// + /// \return the number of effective entries + double ComputeNEffectiveEntries() const + { + if (fSumW2 == 0) { + return 0; + } + return fSumW * fSumW / fSumW2; + } + + /// Compute the arithmetic mean of unbinned values. + /// + /// \f[ + /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i} + /// \f] + /// + /// \param[in] dim the dimension index, starting at 0 + /// \return the arithmetic mean of unbinned values + double ComputeMean(std::size_t dim = 0) const + { + // First get the statistics, which includes checking the argument. + auto &stats = fDimensionStats.at(dim); + if (fSumW == 0) { + return 0; + } + return stats.fSumWX / fSumW; + } + + /// Compute the variance of unbinned values. + /// + /// This function computes the uncorrected sample variance: + /// \f[ + /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2 + /// \f] + /// With some rewriting, this is equivalent to: + /// \f[ + /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2 + /// \f] + /// + /// This function does not include Bessel's correction needed for an unbiased estimator of population variance. + /// In other words, the return value is a biased estimation lower than the actual population variance. + /// + /// \param[in] dim the dimension index, starting at 0 + /// \return the variance of unbinned values + double ComputeVariance(std::size_t dim = 0) const + { + // First get the statistics, which includes checking the argument. + auto &stats = fDimensionStats.at(dim); + if (fSumW == 0) { + return 0; + } + double mean = ComputeMean(dim); + return stats.fSumWX2 / fSumW - mean * mean; + } + + /// Compute the standard deviation of unbinned values. + /// + /// This function computes the uncorrected sample standard deviation: + /// \f[ + /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2} + /// \f] + /// With some rewriting, this is equivalent to: + /// \f[ + /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}} + /// \f] + /// + /// This function does not include Bessel's correction needed for an unbiased estimator of population variance. + /// In other words, the return value is a biased estimation lower than the actual population standard deviation. + /// + /// \param[in] dim the dimension index, starting at 0 + /// \return the standard deviation of unbinned values + double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); } + + // clang-format off + /// Compute the skewness of unbinned values. + /// + /// The skewness is the third standardized moment: + /// \f[ + /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right] + /// \f] + /// With support for weighted filling and after some rewriting, it is computed as: + /// \f[ + /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3} + /// \f] + /// + /// \param[in] dim the dimension index, starting at 0 + /// \return the skewness of unbinned values + // clang-format on + double ComputeSkewness(std::size_t dim = 0) const + { + // First get the statistics, which includes checking the argument. + auto &stats = fDimensionStats.at(dim); + if (fSumW == 0) { + return 0; + } + double mean = ComputeMean(dim); + double var = ComputeVariance(dim); + if (var == 0) { + return 0; + } + double EWX3 = stats.fSumWX3 / fSumW; + double EWX2 = stats.fSumWX2 / fSumW; + return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5); + } + + // clang-format off + /// Compute the (excess) kurtosis of unbinned values. + /// + /// The kurtosis is based on the fourth standardized moment: + /// \f[ + /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] + /// \f] + /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution: + /// \f[ + /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3 + /// \f] + /// + /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as: + /// \f[ + /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3 + /// \f] + /// + /// \param[in] dim the dimension index, starting at 0 + /// \return the (excess) kurtosis of unbinned values + // clang-format on + double ComputeKurtosis(std::size_t dim = 0) const + { + // First get the statistics, which includes checking the argument. + auto &stats = fDimensionStats.at(dim); + if (fSumW == 0) { + return 0; + } + double mean = ComputeMean(dim); + double var = ComputeVariance(dim); + if (var == 0) { + return 0; + } + double EWX4 = stats.fSumWX4 / fSumW; + double EWX3 = stats.fSumWX3 / fSumW; + double EWX2 = stats.fSumWX2 / fSumW; + return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3; + } + +private: + template + void FillImpl(const std::tuple &args) + { + fDimensionStats[I].Add(std::get(args)); + if constexpr (I + 1 < sizeof...(A)) { + FillImpl(args); + } + } + + template + void FillImpl(const std::tuple &args, double w) + { + fDimensionStats[I].Add(std::get(args), w); + if constexpr (I + 1 < N) { + FillImpl(args, w); + } + } + +public: + /// Fill an entry into this statistics object. + /// + /// \code + /// ROOT::Experimental::RHistStats stats(2); + /// auto args = std::make_tuple(8.5, 10.5); + /// stats.Fill(args); + /// \endcode + /// + /// Throws an exception if the number of arguments does not match the number of dimensions. + /// + /// \param[in] args the arguments for each dimension + /// \par See also + /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the + /// \ref Fill(const std::tuple &args, RWeight weight) "overload for weighted filling" + template + void Fill(const std::tuple &args) + { + if (sizeof...(A) != fDimensionStats.size()) { + throw std::invalid_argument("invalid number of arguments to Fill"); + } + fNEntries++; + fSumW++; + fSumW2++; + FillImpl<0>(args); + } + + /// Fill an entry into this statistics object with a weight. + /// + /// \code + /// ROOT::Experimental::RHistStats stats(2); + /// auto args = std::make_tuple(8.5, 10.5); + /// stats.Fill(args, ROOT::Experimental::RWeight(0.8)); + /// \endcode + /// + /// \param[in] args the arguments for each dimension + /// \param[in] weight the weight for this entry + /// \par See also + /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the + /// \ref Fill(const std::tuple &args) "overload for unweighted filling" + template + void Fill(const std::tuple &args, RWeight weight) + { + if (sizeof...(A) != fDimensionStats.size()) { + throw std::invalid_argument("invalid number of arguments to Fill"); + } + fNEntries++; + double w = weight.fValue; + fSumW += w; + fSumW2 += w * w; + FillImpl<0, sizeof...(A)>(args, w); + } + + /// Fill an entry into this statistics object. + /// + /// \code + /// ROOT::Experimental::RHistStats stats(2); + /// stats.Fill(8.5, 10.5); + /// \endcode + /// For weighted filling, pass an RWeight as the last argument: + /// \code + /// ROOT::Experimental::RHistStats stats(2); + /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8)); + /// \endcode + /// + /// Throws an exception if the number of arguments does not match the number of dimensions. + /// + /// \param[in] args the arguments for each dimension + /// \par See also + /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple &args) "for unweighted filling" + /// and \ref Fill(const std::tuple &args, RWeight) "for weighted filling" + template + void Fill(const A &...args) + { + auto t = std::forward_as_tuple(args...); + if constexpr (std::is_same_v::type, RWeight>) { + static constexpr std::size_t N = sizeof...(A) - 1; + if (N != fDimensionStats.size()) { + throw std::invalid_argument("invalid number of arguments to Fill"); + } + fNEntries++; + double w = std::get(t).fValue; + fSumW += w; + fSumW2 += w * w; + FillImpl<0, N>(t, w); + } else { + Fill(t); + } + } + + /// %ROOT Streamer function to throw when trying to store an object of this class. + void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); } +}; + +} // namespace Experimental +} // namespace ROOT + +#endif diff --git a/hist/histv7/test/CMakeLists.txt b/hist/histv7/test/CMakeLists.txt index 4f75b4f4f13b7..09c17eaeaf07d 100644 --- a/hist/histv7/test/CMakeLists.txt +++ b/hist/histv7/test/CMakeLists.txt @@ -2,6 +2,7 @@ HIST_ADD_GTEST(hist_axes hist_axes.cxx) HIST_ADD_GTEST(hist_engine hist_engine.cxx) HIST_ADD_GTEST(hist_index hist_index.cxx) HIST_ADD_GTEST(hist_regular hist_regular.cxx) +HIST_ADD_GTEST(hist_stats hist_stats.cxx) HIST_ADD_GTEST(hist_variable hist_variable.cxx) if(NOT DEFINED histv7_standalone) diff --git a/hist/histv7/test/hist_io.cxx b/hist/histv7/test/hist_io.cxx index 8e225e2dee5af..5b5d237ac8314 100644 --- a/hist/histv7/test/hist_io.cxx +++ b/hist/histv7/test/hist_io.cxx @@ -76,3 +76,9 @@ TEST(RHistEngine, Streamer) const RHistEngine engineD({axis}); ExpectThrowOnWriteObject(engineD); } + +TEST(RHistStats, Streamer) +{ + const RHistStats stats(1); + ExpectThrowOnWriteObject(stats); +} diff --git a/hist/histv7/test/hist_stats.cxx b/hist/histv7/test/hist_stats.cxx new file mode 100644 index 0000000000000..71d1905118a3e --- /dev/null +++ b/hist/histv7/test/hist_stats.cxx @@ -0,0 +1,392 @@ +#include "hist_test.hxx" + +#include +#include + +TEST(RHistStats, Constructor) +{ + RHistStats stats(1); + EXPECT_EQ(stats.GetNDimensions(), 1); + + stats = RHistStats(2); + EXPECT_EQ(stats.GetNDimensions(), 2); + + EXPECT_THROW(RHistStats(0), std::invalid_argument); +} + +TEST(RHistStats, GetDimensionStats) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + { + const auto &dimensionStats = stats.GetDimensionStats(/*=0*/); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 190); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 2470); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 36100); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 562666); + } + { + const auto &dimensionStats = stats.GetDimensionStats(1); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 2 * 190); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 4 * 2470); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 8 * 36100); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 16 * 562666); + } + { + const auto &dimensionStats = stats.GetDimensionStats(2); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 2470); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 562666); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 152455810); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 44940730666); + } +} + +TEST(RHistStats, GetDimensionStatsWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + { + const auto &dimensionStats = stats.GetDimensionStats(/*=0*/); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 93.1); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 1330.0); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 20489.98); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 330265.6); + } + { + const auto &dimensionStats = stats.GetDimensionStats(1); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 2 * 93.1); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 4 * 1330.0); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 8 * 20489.98); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 16 * 330265.6); + } + { + const auto &dimensionStats = stats.GetDimensionStats(2); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX, 1330.0); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX2, 330265.6); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX3, 93164182.0); + EXPECT_DOUBLE_EQ(dimensionStats.fSumWX4, 28108731464.8); + } +} + +TEST(RHistStats, ComputeNEffectiveEntries) +{ + RHistStats stats(1); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeNEffectiveEntries(), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(1); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.GetSumW(), Entries); + EXPECT_DOUBLE_EQ(stats.GetSumW2(), Entries); + EXPECT_DOUBLE_EQ(stats.ComputeNEffectiveEntries(), Entries); +} + +TEST(RHistStats, ComputeNEffectiveEntriesWeighted) +{ + RHistStats stats(1); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeNEffectiveEntries(), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(1, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.GetSumW(), 7.7); + EXPECT_DOUBLE_EQ(stats.GetSumW2(), 3.563); + // Cross-checked with TH1 + EXPECT_DOUBLE_EQ(stats.ComputeNEffectiveEntries(), 16.640471512770137); +} + +TEST(RHistStats, ComputeMean) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeMean(/*=0*/), 0); + EXPECT_EQ(stats.ComputeMean(1), 0); + EXPECT_EQ(stats.ComputeMean(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.ComputeMean(/*=0*/), 9.5); + EXPECT_DOUBLE_EQ(stats.ComputeMean(1), 19); + EXPECT_DOUBLE_EQ(stats.ComputeMean(2), 123.5); +} + +TEST(RHistStats, ComputeMeanWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeMean(/*=0*/), 0); + EXPECT_EQ(stats.ComputeMean(1), 0); + EXPECT_EQ(stats.ComputeMean(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1 + EXPECT_DOUBLE_EQ(stats.ComputeMean(/*=0*/), 12.090909090909090); + EXPECT_DOUBLE_EQ(stats.ComputeMean(1), 24.181818181818180); + EXPECT_DOUBLE_EQ(stats.ComputeMean(2), 172.72727272727272); +} + +TEST(RHistStats, ComputeVariance) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeVariance(/*=0*/), 0); + EXPECT_EQ(stats.ComputeVariance(1), 0); + EXPECT_EQ(stats.ComputeVariance(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.ComputeVariance(/*=0*/), 33.25); + EXPECT_DOUBLE_EQ(stats.ComputeVariance(1), 133); + EXPECT_DOUBLE_EQ(stats.ComputeVariance(2), 12881.05); +} + +TEST(RHistStats, ComputeVarianceWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeVariance(/*=0*/), 0); + EXPECT_EQ(stats.ComputeVariance(1), 0); + EXPECT_EQ(stats.ComputeVariance(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1::GetStdDev squared, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeVariance(/*=0*/), 26.5371900); + EXPECT_FLOAT_EQ(stats.ComputeVariance(1), 106.148760); + EXPECT_FLOAT_EQ(stats.ComputeVariance(2), 13056.9256); +} + +TEST(RHistStats, ComputeStdDev) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeStdDev(/*=0*/), 0); + EXPECT_EQ(stats.ComputeStdDev(1), 0); + EXPECT_EQ(stats.ComputeStdDev(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.ComputeStdDev(/*=0*/), std::sqrt(33.25)); + EXPECT_DOUBLE_EQ(stats.ComputeStdDev(1), std::sqrt(133)); + EXPECT_DOUBLE_EQ(stats.ComputeStdDev(2), std::sqrt(12881.05)); +} + +TEST(RHistStats, ComputeStdDevWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeStdDev(/*=0*/), 0); + EXPECT_EQ(stats.ComputeStdDev(1), 0); + EXPECT_EQ(stats.ComputeStdDev(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 0; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeStdDev(/*=0*/), 5.15142602); + EXPECT_FLOAT_EQ(stats.ComputeStdDev(1), 10.3028520); + EXPECT_FLOAT_EQ(stats.ComputeStdDev(2), 114.266905); +} + +TEST(RHistStats, ComputeSkewness) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0); + EXPECT_EQ(stats.ComputeSkewness(1), 0); + EXPECT_EQ(stats.ComputeSkewness(2), 0); + + stats.Fill(0, 0, 0); + ASSERT_EQ(stats.GetNEntries(), 1); + // With one entry, the variance is 0 and we define skewness to be 0 as well. + EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0); + EXPECT_EQ(stats.ComputeSkewness(1), 0); + EXPECT_EQ(stats.ComputeSkewness(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 1; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + EXPECT_DOUBLE_EQ(stats.ComputeSkewness(/*=0*/), 0); + EXPECT_DOUBLE_EQ(stats.ComputeSkewness(1), 0); + // Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeSkewness(2), 0.66125456); +} + +TEST(RHistStats, ComputeSkewnessWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0); + EXPECT_EQ(stats.ComputeSkewness(1), 0); + EXPECT_EQ(stats.ComputeSkewness(2), 0); + + stats.Fill(0, 0, 0, RWeight(0.1)); + ASSERT_EQ(stats.GetNEntries(), 1); + // With one entry, the variance is 0 and we define skewness to be 0 as well. + EXPECT_EQ(stats.ComputeSkewness(/*=0*/), 0); + EXPECT_EQ(stats.ComputeSkewness(1), 0); + EXPECT_EQ(stats.ComputeSkewness(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 1; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeSkewness(/*=0*/), -0.50554999); + EXPECT_FLOAT_EQ(stats.ComputeSkewness(1), -0.50554999); + EXPECT_FLOAT_EQ(stats.ComputeSkewness(2), 0.12072240); +} + +TEST(RHistStats, ComputeKurtosis) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0); + EXPECT_EQ(stats.ComputeKurtosis(1), 0); + EXPECT_EQ(stats.ComputeKurtosis(2), 0); + + stats.Fill(0, 0, 0); + ASSERT_EQ(stats.GetNEntries(), 1); + // With one entry, the variance is 0 and we define kurtosis to be 0 as well. + EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0); + EXPECT_EQ(stats.ComputeKurtosis(1), 0); + EXPECT_EQ(stats.ComputeKurtosis(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 1; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1 and SciPy, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(/*=0*/), -1.2060150); + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(1), -1.2060150); + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(2), -0.84198253); +} + +TEST(RHistStats, ComputeKurtosisWeighted) +{ + RHistStats stats(3); + ASSERT_EQ(stats.GetNEntries(), 0); + EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0); + EXPECT_EQ(stats.ComputeKurtosis(1), 0); + EXPECT_EQ(stats.ComputeKurtosis(2), 0); + + stats.Fill(0, 0, 0, RWeight(0.1)); + ASSERT_EQ(stats.GetNEntries(), 1); + // With one entry, the variance is 0 and we define kurtosis to be 0 as well. + EXPECT_EQ(stats.ComputeKurtosis(/*=0*/), 0); + EXPECT_EQ(stats.ComputeKurtosis(1), 0); + EXPECT_EQ(stats.ComputeKurtosis(2), 0); + + static constexpr std::size_t Entries = 20; + for (std::size_t i = 1; i < Entries; i++) { + stats.Fill(i, 2 * i, i * i, RWeight(0.1 + 0.03 * i)); + } + + ASSERT_EQ(stats.GetNEntries(), Entries); + // Cross-checked with TH1, numerical differences with EXPECT_DOUBLE_EQ + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(/*=0*/), -0.74828797); + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(1), -0.74828797); + EXPECT_FLOAT_EQ(stats.ComputeKurtosis(2), -1.2483086); +} + +TEST(RHistStats, ComputeSkewnessKurtosisVar0) +{ + RHistStats stats(1); + stats.Fill(1, RWeight(0.1)); + ASSERT_EQ(stats.GetNEntries(), 1); + EXPECT_EQ(stats.ComputeVariance(), 0); + EXPECT_EQ(stats.ComputeSkewness(), 0); + EXPECT_EQ(stats.ComputeKurtosis(), 0); +} + +TEST(RHistStats, FillInvalidNumberOfArguments) +{ + RHistStats stats1(1); + RHistStats stats2(2); + + EXPECT_NO_THROW(stats1.Fill(1)); + EXPECT_THROW(stats1.Fill(1, 2), std::invalid_argument); + + EXPECT_THROW(stats2.Fill(1), std::invalid_argument); + EXPECT_NO_THROW(stats2.Fill(1, 2)); + EXPECT_THROW(stats2.Fill(1, 2, 3), std::invalid_argument); +} + +TEST(RHistStats, FillWeightInvalidNumberOfArguments) +{ + RHistStats stats1(1); + RHistStats stats2(2); + + EXPECT_NO_THROW(stats1.Fill(1, RWeight(1))); + EXPECT_THROW(stats1.Fill(1, 2, RWeight(1)), std::invalid_argument); + + EXPECT_THROW(stats2.Fill(1, RWeight(1)), std::invalid_argument); + EXPECT_NO_THROW(stats2.Fill(1, 2, RWeight(1))); + EXPECT_THROW(stats2.Fill(1, 2, 3, RWeight(1)), std::invalid_argument); +} + +TEST(RHistStats, FillTupleWeightInvalidNumberOfArguments) +{ + RHistStats stats1(1); + RHistStats stats2(2); + + EXPECT_NO_THROW(stats1.Fill(std::make_tuple(1), RWeight(1))); + EXPECT_THROW(stats1.Fill(std::make_tuple(1, 2), RWeight(1)), std::invalid_argument); + + EXPECT_THROW(stats2.Fill(std::make_tuple(1), RWeight(1)), std::invalid_argument); + EXPECT_NO_THROW(stats2.Fill(std::make_tuple(1, 2), RWeight(1))); + EXPECT_THROW(stats2.Fill(std::make_tuple(1, 2, 3), RWeight(1)), std::invalid_argument); +} diff --git a/hist/histv7/test/hist_test.hxx b/hist/histv7/test/hist_test.hxx index 6d8da6880404f..2b8d7b2162885 100644 --- a/hist/histv7/test/hist_test.hxx +++ b/hist/histv7/test/hist_test.hxx @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ using ROOT::Experimental::RBinIndex; using ROOT::Experimental::RBinIndexRange; using ROOT::Experimental::RBinWithError; using ROOT::Experimental::RHistEngine; +using ROOT::Experimental::RHistStats; using ROOT::Experimental::RRegularAxis; using ROOT::Experimental::RVariableBinAxis; using ROOT::Experimental::RWeight;