diff --git a/TODO/WIP-COUPLE.cpp b/TODO/WIP-COUPLE.cpp index 2e2702551..6c9ab2507 100644 --- a/TODO/WIP-COUPLE.cpp +++ b/TODO/WIP-COUPLE.cpp @@ -177,19 +177,17 @@ void COUPLE::runOpenMPVariant(VariantID vid) RAJAPERF_DEFAULT_TUNING_DEFINE_BOILERPLATE(COUPLE, OpenMP, Base_OpenMP, RAJA_OpenMP) -void COUPLE::updateChecksum(VariantID vid, size_t tune_idx) +void COUPLE::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { Index_type max_loop_index = m_domain->lrn; - checksum[vid][tune_idx] += calcChecksum(m_t0, max_loop_index, vid); - checksum[vid][tune_idx] += calcChecksum(m_t1, max_loop_index, vid); - checksum[vid][tune_idx] += calcChecksum(m_t2, max_loop_index, vid); + addToChecksum(m_t0, max_loop_index, vid); + addToChecksum(m_t1, max_loop_index, vid); + addToChecksum(m_t2, max_loop_index, vid); } void COUPLE::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_t0, vid); deallocData(m_t1, vid); deallocData(m_t2, vid); diff --git a/docs/sphinx/dev_guide/kernel_class_impl.rst b/docs/sphinx/dev_guide/kernel_class_impl.rst index 144f1f1c4..7f1062f77 100644 --- a/docs/sphinx/dev_guide/kernel_class_impl.rst +++ b/docs/sphinx/dev_guide/kernel_class_impl.rst @@ -57,12 +57,24 @@ The methods in the source file are: kernel execution. * The number of floating point operations (FLOPS) performed for each kernel execution. - * The consistency of the checksums of the kernel. The possible values are - ``Consistent`` where all the variant tunings always get the same checksum, - ``ConsistentPerVariantTuning`` where an individual variant tuning always - gets the same checksum but different variant tunings may differ - slightly, and ``Inconsistent`` where the checksum of a variant tuning - may vary slightly run to run. + * The consistency of the checksums of the kernel. If the kernel + always produces the same checksum value for all variant tunings then the + checksums are ``Consistent``. Most kernels get a different but consistent + checksum for each variant tuning so the checksums are + ``ConsistentPerVariantTuning``. On the other hand, some kernels have + variant tunings that get different checksums on each run of that variant + tuning, for example due to the ordering of floating-point atomic add + operations, so the checksums are ``Inconsistent``. + * The tolerance of the checksums of the kernel. A number of predefined + values are available in the ``KernelBase\:\:ChecksumTolerance`` class. If + the kernel consistently produces the same checksums then ``zero`` tolerance + is used. Most kernels use the ``normal`` tolerance. Some kernels are very + simple, for example they have a single floating-point operation per + iteration, so they use the ``tight`` tolerance. + * The scale factor to use with the checksums of the kernel. This is an + arbitrary multiplier on the checksum values used to scale the checksums + to a desired range. Mostly used for kernels with floating-point + operation complexity that does not scale linearly with problem size. * The operational complexity of the kernel, where N is the *problem size* of the kernel. * Which RAJA features the kernel exercises. diff --git a/src/algorithm/ATOMIC.cpp b/src/algorithm/ATOMIC.cpp index bbca956c9..69e45d083 100644 --- a/src/algorithm/ATOMIC.cpp +++ b/src/algorithm/ATOMIC.cpp @@ -35,6 +35,7 @@ ATOMIC::ATOMIC(const RunParams& params) setFLOPsPerRep(getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::Inconsistent); // atomics + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -54,14 +55,14 @@ void ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_final = -static_cast(vid); } -void ATOMIC::updateChecksum(VariantID vid, size_t tune_idx) +void ATOMIC::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += static_cast(m_final); + addToChecksum(m_final); } -void ATOMIC::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +void ATOMIC::tearDown(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; + } } // end namespace algorithm diff --git a/src/algorithm/HISTOGRAM.cpp b/src/algorithm/HISTOGRAM.cpp index 9a51fde48..706b8bc57 100644 --- a/src/algorithm/HISTOGRAM.cpp +++ b/src/algorithm/HISTOGRAM.cpp @@ -41,6 +41,7 @@ HISTOGRAM::HISTOGRAM(const RunParams& params) setFLOPsPerRep( (std::is_floating_point_v ? 1 : 0) * getActualProblemSize() ); setChecksumConsistency(ChecksumConsistency::Consistent); // integer arithmetic + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -119,14 +120,13 @@ void HISTOGRAM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(DataSpace::Host, m_counts_final, m_num_bins, static_cast(0)); } -void HISTOGRAM::updateChecksum(VariantID vid, size_t tune_idx) +void HISTOGRAM::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_counts_final, m_num_bins); + addToChecksum(DataSpace::Host, m_counts_final, m_num_bins); } void HISTOGRAM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_bins, vid); deallocData(DataSpace::Host, m_counts_init); deallocData(DataSpace::Host, m_counts_final); diff --git a/src/algorithm/MEMCPY.cpp b/src/algorithm/MEMCPY.cpp index c4d86688d..0f12ff692 100644 --- a/src/algorithm/MEMCPY.cpp +++ b/src/algorithm/MEMCPY.cpp @@ -35,6 +35,7 @@ MEMCPY::MEMCPY(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -53,14 +54,13 @@ void MEMCPY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_y, getActualProblemSize(), -1.234567e89, vid); } -void MEMCPY::updateChecksum(VariantID vid, size_t tune_idx) +void MEMCPY::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize(), vid); + addToChecksum(m_y, getActualProblemSize(), vid); } void MEMCPY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/algorithm/MEMSET.cpp b/src/algorithm/MEMSET.cpp index 010f65ec0..6a6816476 100644 --- a/src/algorithm/MEMSET.cpp +++ b/src/algorithm/MEMSET.cpp @@ -35,6 +35,7 @@ MEMSET::MEMSET(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -53,14 +54,13 @@ void MEMSET::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_val = 0.0; } -void MEMSET::updateChecksum(VariantID vid, size_t tune_idx) +void MEMSET::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_x, getActualProblemSize(), vid); + addToChecksum(m_x, getActualProblemSize(), vid); } void MEMSET::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); } diff --git a/src/algorithm/REDUCE_SUM.cpp b/src/algorithm/REDUCE_SUM.cpp index 7f218b53a..4686312f5 100644 --- a/src/algorithm/REDUCE_SUM.cpp +++ b/src/algorithm/REDUCE_SUM.cpp @@ -35,6 +35,7 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params) setFLOPsPerRep(getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::Inconsistent); // Reduction may use atomics + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -55,14 +56,13 @@ void REDUCE_SUM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_sum = 0.0; } -void REDUCE_SUM::updateChecksum(VariantID vid, size_t tune_idx) +void REDUCE_SUM::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(&m_sum, 1, vid); + addToChecksum(m_sum); } void REDUCE_SUM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); } diff --git a/src/algorithm/SCAN.cpp b/src/algorithm/SCAN.cpp index e056c83d3..0962e9ac2 100644 --- a/src/algorithm/SCAN.cpp +++ b/src/algorithm/SCAN.cpp @@ -34,12 +34,12 @@ SCAN::SCAN(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - checksum_scale_factor = 1e-2 * + setChecksumConsistency(ChecksumConsistency::Inconsistent); // could depend on scheduling, this may be overly conservative + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(1e-2 * ( static_cast(getDefaultProblemSize()) / getActualProblemSize() ) / - getActualProblemSize(); - - setChecksumConsistency(ChecksumConsistency::Inconsistent); // could depend on scheduling, this may be overly conservative + getActualProblemSize()); setComplexity(Complexity::N); @@ -58,14 +58,13 @@ void SCAN::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_y, getActualProblemSize(), 0.0, vid); } -void SCAN::updateChecksum(VariantID vid, size_t tune_idx) +void SCAN::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_y, getActualProblemSize(), checksum_scale_factor, vid); + addToChecksum(m_y, getActualProblemSize(), vid); } void SCAN::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/algorithm/SORT.cpp b/src/algorithm/SORT.cpp index f12d8faca..98be2c728 100644 --- a/src/algorithm/SORT.cpp +++ b/src/algorithm/SORT.cpp @@ -35,7 +35,8 @@ SORT::SORT(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); - setChecksumConsistency(ChecksumConsistency::Consistent); // // sort is not stable but values are equal if equivalent + setChecksumConsistency(ChecksumConsistency::Consistent); // sort is not stable but values are equal if equivalent + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_logN); @@ -53,14 +54,13 @@ void SORT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataRandValue(m_x, getActualProblemSize()*getRunReps(), vid); } -void SORT::updateChecksum(VariantID vid, size_t tune_idx) +void SORT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize()*getRunReps(), vid); + addToChecksum(m_x, getActualProblemSize()*getRunReps(), vid); } void SORT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); } diff --git a/src/algorithm/SORTPAIRS.cpp b/src/algorithm/SORTPAIRS.cpp index 28d20e75e..6b03d38a0 100644 --- a/src/algorithm/SORTPAIRS.cpp +++ b/src/algorithm/SORTPAIRS.cpp @@ -36,6 +36,7 @@ SORTPAIRS::SORTPAIRS(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Inconsistent); // sort is not stable and could depend on scheduling + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N_logN); @@ -54,15 +55,14 @@ void SORTPAIRS::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataRandValue(m_i, getActualProblemSize()*getRunReps(), vid); } -void SORTPAIRS::updateChecksum(VariantID vid, size_t tune_idx) +void SORTPAIRS::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize()*getRunReps(), vid); - checksum[vid][tune_idx] += calcChecksum(m_i, getActualProblemSize()*getRunReps(), vid); + addToChecksum(m_x, getActualProblemSize()*getRunReps(), vid); + addToChecksum(m_i, getActualProblemSize()*getRunReps(), vid); } void SORTPAIRS::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_i, vid); } diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp index b53671c22..c92db5004 100644 --- a/src/apps/CONVECTION3DPA.cpp +++ b/src/apps/CONVECTION3DPA.cpp @@ -53,6 +53,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) )); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -76,15 +77,13 @@ void CONVECTION3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_Y, Index_type(CPA_D1D*CPA_D1D*CPA_D1D*m_NE), Real_type(0.0), vid); } -void CONVECTION3DPA::updateChecksum(VariantID vid, size_t tune_idx) +void CONVECTION3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_Y, CPA_D1D*CPA_D1D*CPA_D1D*m_NE, vid); + addToChecksum(m_Y, CPA_D1D*CPA_D1D*CPA_D1D*m_NE, vid); } void CONVECTION3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_B, vid); deallocData(m_Bt, vid); deallocData(m_G, vid); diff --git a/src/apps/DEL_DOT_VEC_2D.cpp b/src/apps/DEL_DOT_VEC_2D.cpp index 412b99f53..24ce8532f 100644 --- a/src/apps/DEL_DOT_VEC_2D.cpp +++ b/src/apps/DEL_DOT_VEC_2D.cpp @@ -45,6 +45,7 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params) setFLOPsPerRep(54 * m_domain->n_real_zones); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -79,15 +80,13 @@ void DEL_DOT_VEC_2D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_half = 0.5; } -void DEL_DOT_VEC_2D::updateChecksum(VariantID vid, size_t tune_idx) +void DEL_DOT_VEC_2D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_div, m_array_length, vid); + addToChecksum(m_div, m_array_length, vid); } void DEL_DOT_VEC_2D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_real_zones, vid); diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index 55977f325..118245fa2 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -54,6 +54,7 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) 3 * DPA_D1D * DPA_D1D * DPA_D1D)); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -76,15 +77,13 @@ void DIFFUSION3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_Y, Index_type(DPA_D1D*DPA_D1D*DPA_D1D*m_NE), Real_type(0.0), vid); } -void DIFFUSION3DPA::updateChecksum(VariantID vid, size_t tune_idx) +void DIFFUSION3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_Y, DPA_D1D*DPA_D1D*DPA_D1D*m_NE, vid); + addToChecksum(m_Y, DPA_D1D*DPA_D1D*DPA_D1D*m_NE, vid); } void DIFFUSION3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_B, vid); deallocData(m_G, vid); deallocData(m_D, vid); diff --git a/src/apps/EDGE3D.cpp b/src/apps/EDGE3D.cpp index 944a47ea4..066d8a4a3 100644 --- a/src/apps/EDGE3D.cpp +++ b/src/apps/EDGE3D.cpp @@ -59,11 +59,11 @@ EDGE3D::EDGE3D(const RunParams& params) setFLOPsPerRep(number_of_elements * flops_per_element); - m_checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -91,9 +91,9 @@ void EDGE3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_sum, m_array_length, Real_type(0.0), vid); } -void EDGE3D::updateChecksum(VariantID vid, size_t tune_idx) +void EDGE3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_sum, m_array_length, m_checksum_scale_factor, vid ); + addToChecksum(m_sum, m_array_length, vid); } void EDGE3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) diff --git a/src/apps/EDGE3D.hpp b/src/apps/EDGE3D.hpp index 10dc47bd3..51d2b269a 100644 --- a/src/apps/EDGE3D.hpp +++ b/src/apps/EDGE3D.hpp @@ -441,8 +441,6 @@ class EDGE3D : public KernelBase ADomain* m_domain; Index_type m_array_length; - - Real_type m_checksum_scale_factor; }; } // end namespace apps diff --git a/src/apps/ENERGY.cpp b/src/apps/ENERGY.cpp index dd567d245..91c5207a9 100644 --- a/src/apps/ENERGY.cpp +++ b/src/apps/ENERGY.cpp @@ -61,6 +61,7 @@ ENERGY::ENERGY(const RunParams& params) ) * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -97,16 +98,14 @@ void ENERGY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_q_cut, vid); } -void ENERGY::updateChecksum(VariantID vid, size_t tune_idx) +void ENERGY::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_e_new, getActualProblemSize(), vid); - checksum[vid][tune_idx] += calcChecksum(m_q_new, getActualProblemSize(), vid); + addToChecksum(m_e_new, getActualProblemSize(), vid); + addToChecksum(m_q_new, getActualProblemSize(), vid); } void ENERGY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_e_new, vid); deallocData(m_e_old, vid); deallocData(m_delvc, vid); diff --git a/src/apps/FEMSWEEP.cpp b/src/apps/FEMSWEEP.cpp index e2dea215c..24535d532 100644 --- a/src/apps/FEMSWEEP.cpp +++ b/src/apps/FEMSWEEP.cpp @@ -65,9 +65,9 @@ FEMSWEEP::FEMSWEEP(const RunParams& params) m_ne * m_na * m_ng ); // for all elements, angles, and groups // The checksum is inaccurate starting at the 10's digit for: AMD CPU and older clang versions on NVIDIA GPUs. - checksum_scale_factor = 0.0000000001; - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0000000001); setComplexity(Complexity::N); @@ -104,15 +104,13 @@ void FEMSWEEP::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndCopyHostData(m_idx2 , g_idx2 , 37800 , vid); } -void FEMSWEEP::updateChecksum(VariantID vid, size_t tune_idx) +void FEMSWEEP::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_Xdat, m_Xlen, checksum_scale_factor , vid); + addToChecksum(m_Xdat, m_Xlen, vid); } void FEMSWEEP::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_Bdat, vid); deallocData(m_Adat, vid); deallocData(m_Fdat, vid); diff --git a/src/apps/FIR.cpp b/src/apps/FIR.cpp index 6c507e7b0..e03a0871e 100644 --- a/src/apps/FIR.cpp +++ b/src/apps/FIR.cpp @@ -37,11 +37,11 @@ FIR::FIR(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep((2 * m_coefflen) * getActualProblemSize()); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -60,15 +60,13 @@ void FIR::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_out, getActualProblemSize(), 0.0, vid); } -void FIR::updateChecksum(VariantID vid, size_t tune_idx) +void FIR::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_out, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_out, getActualProblemSize(), vid); } void FIR::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_in, vid); deallocData(m_out, vid); } diff --git a/src/apps/INTSC_HEXHEX.cpp b/src/apps/INTSC_HEXHEX.cpp index dec325e44..247364694 100644 --- a/src/apps/INTSC_HEXHEX.cpp +++ b/src/apps/INTSC_HEXHEX.cpp @@ -74,6 +74,7 @@ INTSC_HEXHEX::INTSC_HEXHEX(const RunParams& params) setFLOPsPerRep(n_std_intsc * flops_per_intsc); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -137,7 +138,7 @@ void INTSC_HEXHEX::setUp(VariantID vid, allocAndInitDataConst ( m_vv_out, nvals_per_pair * n_subz_intsc, 0.0, vid ) ; // output volumes and moments on the host - allocData ( m_vv, nvals_per_pair * n_subz_intsc, Base_Seq ) ; + allocData ( DataSpace::Host, m_vv, nvals_per_pair * n_subz_intsc ) ; } @@ -246,7 +247,7 @@ void INTSC_HEXHEX::check_intsc_volume_moments void INTSC_HEXHEX::updateChecksum(VariantID vid, - size_t tune_idx) + size_t RAJAPERF_UNUSED_ARG(tune_idx)) { // One standard intersection is 8 subzone intersections. Index_type n_std_intsc = getActualProblemSize() ; @@ -257,8 +258,7 @@ void INTSC_HEXHEX::updateChecksum(VariantID vid, check_intsc_volume_moments ( n_subz_intsc, m_vv, vid ) ; - checksum[vid][tune_idx] += calcChecksum - (m_vv_out, nvals_per_pair*n_subz_intsc, vid ); + addToChecksum(m_vv_out, nvals_per_pair*n_subz_intsc, vid); } void INTSC_HEXHEX::tearDown(VariantID vid, @@ -268,7 +268,7 @@ void INTSC_HEXHEX::tearDown(VariantID vid, deallocData ( m_tsubz, vid ) ; deallocData ( m_vv_int, vid ) ; deallocData ( m_vv_out, vid ) ; - deallocData ( m_vv, Base_Seq ) ; + deallocData ( DataSpace::Host, m_vv ) ; } } // end namespace apps diff --git a/src/apps/INTSC_HEXRECT.cpp b/src/apps/INTSC_HEXRECT.cpp index 3a9e0eec6..5ddfdaab7 100644 --- a/src/apps/INTSC_HEXRECT.cpp +++ b/src/apps/INTSC_HEXRECT.cpp @@ -82,6 +82,7 @@ INTSC_HEXRECT::INTSC_HEXRECT(const RunParams& params) setFLOPsPerRep(n_intsc * flops_per_intsc); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -159,7 +160,7 @@ void INTSC_HEXRECT::setupTargetPlanes Int_type nty = ndy + 1 ; Int_type ntz = ndz + 1 ; - allocData ( planes[0], (nx+ny+nz), Base_Seq ) ; + allocData ( DataSpace::Host, planes[0], (nx+ny+nz) ) ; planes[1] = planes[0] + nz ; planes[2] = planes[1] + ny ; @@ -338,9 +339,9 @@ void INTSC_HEXRECT::setUp(VariantID vid, allocAndInitDataConst ( m_records, 4L*m_nrecords, 0.0, vid ) ; // Output records copied to the host. - allocData ( m_records_h, 4L*m_nrecords, Base_Seq ) ; + allocData ( DataSpace::Host, m_records_h, 4L*m_nrecords ) ; - deallocData ( planes[0], Base_Seq ) ; + deallocData ( DataSpace::Host, planes[0] ) ; } @@ -380,12 +381,12 @@ void INTSC_HEXRECT::checkMoments } Real_ptr zca, zcb, yca, ycb, xca, xcb ; - allocData ( zca, ndz, Base_Seq ) ; - allocData ( zcb, ndz, Base_Seq ) ; - allocData ( yca, ndy, Base_Seq ) ; - allocData ( ycb, ndy, Base_Seq ) ; - allocData ( xca, ndx, Base_Seq ) ; - allocData ( xcb, ndx, Base_Seq ) ; + allocData ( DataSpace::Host, zca, ndz ) ; + allocData ( DataSpace::Host, zcb, ndz ) ; + allocData ( DataSpace::Host, yca, ndy ) ; + allocData ( DataSpace::Host, ycb, ndy ) ; + allocData ( DataSpace::Host, xca, ndx ) ; + allocData ( DataSpace::Host, xcb, ndx ) ; for ( Index_type jz = 0 ; jz < ndz ; ++jz ) { Real_type za = zd0 + jz * sep1z ; @@ -495,12 +496,12 @@ void INTSC_HEXRECT::checkMoments } } - deallocData ( xca, Base_Seq ) ; - deallocData ( xcb, Base_Seq ) ; - deallocData ( yca, Base_Seq ) ; - deallocData ( ycb, Base_Seq ) ; - deallocData ( zca, Base_Seq ) ; - deallocData ( zcb, Base_Seq ) ; + deallocData ( DataSpace::Host, xca ) ; + deallocData ( DataSpace::Host, xcb ) ; + deallocData ( DataSpace::Host, yca ) ; + deallocData ( DataSpace::Host, ycb ) ; + deallocData ( DataSpace::Host, zca ) ; + deallocData ( DataSpace::Host, zcb ) ; } } @@ -617,7 +618,7 @@ void INTSC_HEXRECT::checkScaledVolumes -void INTSC_HEXRECT::updateChecksum(VariantID vid, size_t tune_idx) +void INTSC_HEXRECT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { copyData ( DataSpace::Host, m_records_h, getDataSpace(vid), m_records, 4L*m_nrecords ) ; @@ -631,7 +632,7 @@ void INTSC_HEXRECT::updateChecksum(VariantID vid, size_t tune_idx) ( m_records_h, m_x_scl_offs, m_y_scl_offs, m_z_scl_offs, m_sep, vid ) ; - checksum[vid][tune_idx] += calcChecksum(m_records, 4L*m_nrecords, vid ); + addToChecksum(m_records, 4L*m_nrecords, vid); } void INTSC_HEXRECT::tearDown(VariantID vid, @@ -644,7 +645,7 @@ void INTSC_HEXRECT::tearDown(VariantID vid, deallocData ( m_xdnode, vid ) ; deallocData ( m_ydnode, vid ) ; deallocData ( m_zdnode, vid ) ; - deallocData ( m_records_h, Base_Seq ) ; + deallocData ( DataSpace::Host, m_records_h ) ; } } // end namespace apps diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index 0c6110ce8..1eb752f47 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -49,11 +49,11 @@ LTIMES::LTIMES(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(2 * m_num_z * m_num_g * m_num_m * m_num_d); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -75,15 +75,13 @@ void LTIMES::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_psidat, int(m_psilen), vid); } -void LTIMES::updateChecksum(VariantID vid, size_t tune_idx) +void LTIMES::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_phidat, m_philen, checksum_scale_factor , vid); + addToChecksum(m_phidat, m_philen, vid); } void LTIMES::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_phidat, vid); deallocData(m_elldat, vid); deallocData(m_psidat, vid); diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index 670d8ddb5..5ead04481 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -49,11 +49,11 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(2 * m_num_z * m_num_g * m_num_m * m_num_d); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -74,15 +74,13 @@ void LTIMES_NOVIEW::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_psidat, int(m_psilen), vid); } -void LTIMES_NOVIEW::updateChecksum(VariantID vid, size_t tune_idx) +void LTIMES_NOVIEW::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_phidat, m_philen, checksum_scale_factor , vid); + addToChecksum(m_phidat, m_philen, vid); } void LTIMES_NOVIEW::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_phidat, vid); deallocData(m_elldat, vid); deallocData(m_psidat, vid); diff --git a/src/apps/MASS3DEA.cpp b/src/apps/MASS3DEA.cpp index 90c6fbb30..ba2e0832d 100644 --- a/src/apps/MASS3DEA.cpp +++ b/src/apps/MASS3DEA.cpp @@ -46,6 +46,7 @@ MASS3DEA::MASS3DEA(const RunParams& params) setFLOPsPerRep(m_NE * 7 * ea_mat_entries); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -67,16 +68,14 @@ void MASS3DEA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) MEA_D1D*MEA_D1D*MEA_D1D*m_NE), Real_type(0.0), vid); } -void MASS3DEA::updateChecksum(VariantID vid, size_t tune_idx) +void MASS3DEA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_M, MEA_D1D*MEA_D1D*MEA_D1D* + addToChecksum(m_M, MEA_D1D*MEA_D1D*MEA_D1D* MEA_D1D*MEA_D1D*MEA_D1D*m_NE, vid); } void MASS3DEA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_B, vid); deallocData(m_D, vid); deallocData(m_M, vid); diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index 8838d7741..a256a7187 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -50,6 +50,7 @@ MASS3DPA::MASS3DPA(const RunParams& params) 2 * MPA_Q1D * MPA_D1D * MPA_D1D * MPA_D1D + MPA_D1D * MPA_D1D * MPA_D1D)); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -72,15 +73,13 @@ void MASS3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_Y, Index_type(MPA_D1D*MPA_D1D*MPA_D1D*m_NE), Real_type(0.0), vid); } -void MASS3DPA::updateChecksum(VariantID vid, size_t tune_idx) +void MASS3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_Y, MPA_D1D*MPA_D1D*MPA_D1D*m_NE, vid); + addToChecksum(m_Y, MPA_D1D*MPA_D1D*MPA_D1D*m_NE, vid); } void MASS3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_B, vid); deallocData(m_Bt, vid); deallocData(m_D, vid); diff --git a/src/apps/MASSVEC3DPA.cpp b/src/apps/MASSVEC3DPA.cpp index 3d41a9915..5e5716cd1 100644 --- a/src/apps/MASSVEC3DPA.cpp +++ b/src/apps/MASSVEC3DPA.cpp @@ -57,6 +57,7 @@ MASSVEC3DPA::MASSVEC3DPA(const RunParams ¶ms) 2 * MVPA_Q1D * MVPA_D1D * MVPA_D1D * MVPA_D1D)); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -80,17 +81,14 @@ void MASSVEC3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } -void MASSVEC3DPA::updateChecksum(VariantID vid, size_t tune_idx) +void MASSVEC3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += - calcChecksum(m_Y, MVPA_DIM * MVPA_D1D * MVPA_D1D * MVPA_D1D * m_NE, vid); + addToChecksum(m_Y, MVPA_DIM * MVPA_D1D * MVPA_D1D * MVPA_D1D * m_NE, vid); } void MASSVEC3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void)vid; - deallocData(m_B, vid); deallocData(m_Bt, vid); deallocData(m_D, vid); diff --git a/src/apps/MATVEC_3D_STENCIL.cpp b/src/apps/MATVEC_3D_STENCIL.cpp index 7bc5d4e83..c2c2af792 100644 --- a/src/apps/MATVEC_3D_STENCIL.cpp +++ b/src/apps/MATVEC_3D_STENCIL.cpp @@ -80,11 +80,11 @@ MATVEC_3D_STENCIL::MATVEC_3D_STENCIL(const RunParams& params) const size_t adds = 26; setFLOPsPerRep((multiplies + adds) * getItsPerRep()); - checksum_scale_factor = 1.0 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(1.0 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -137,15 +137,13 @@ void MATVEC_3D_STENCIL::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx setRealZones_3d(m_real_zones, *m_domain); } -void MATVEC_3D_STENCIL::updateChecksum(VariantID vid, size_t tune_idx) +void MATVEC_3D_STENCIL::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_b, m_zonal_array_length, checksum_scale_factor , vid); + addToChecksum(m_b, m_zonal_array_length, vid); } void MATVEC_3D_STENCIL::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_b, vid); deallocData(m_x, vid); diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 5e77a72b8..3a5d56e7e 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -46,11 +46,11 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 1*sizeof(Real_type) * m_domain->n_real_nodes ); // x (3d nodal stencil pattern: 8 touches per iterate) setFLOPsPerRep(9 * getItsPerRep()); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -76,15 +76,13 @@ void NODAL_ACCUMULATION_3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune setRealZones_3d(m_real_zones, *m_domain); } -void NODAL_ACCUMULATION_3D::updateChecksum(VariantID vid, size_t tune_idx) +void NODAL_ACCUMULATION_3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_x, m_nodal_array_length, checksum_scale_factor , vid); + addToChecksum(m_x, m_nodal_array_length, vid); } void NODAL_ACCUMULATION_3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_x, vid); deallocData(m_vol, vid); deallocData(m_real_zones, vid); diff --git a/src/apps/PRESSURE.cpp b/src/apps/PRESSURE.cpp index 5d485b6ff..2e68d5aa8 100644 --- a/src/apps/PRESSURE.cpp +++ b/src/apps/PRESSURE.cpp @@ -39,6 +39,7 @@ PRESSURE::PRESSURE(const RunParams& params) ) * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -65,15 +66,13 @@ void PRESSURE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_eosvmax, vid); } -void PRESSURE::updateChecksum(VariantID vid, size_t tune_idx) +void PRESSURE::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_p_new, getActualProblemSize(), vid); + addToChecksum(m_p_new, getActualProblemSize(), vid); } void PRESSURE::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_compression, vid); deallocData(m_bvc, vid); deallocData(m_p_new, vid); diff --git a/src/apps/VOL3D.cpp b/src/apps/VOL3D.cpp index 7f65abd58..f25ecccdb 100644 --- a/src/apps/VOL3D.cpp +++ b/src/apps/VOL3D.cpp @@ -44,11 +44,11 @@ VOL3D::VOL3D(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(72 * (m_domain->lpz+1 - m_domain->fpz)); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -78,15 +78,13 @@ void VOL3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_vnormq = 0.083333333333333333; /* vnormq = 1/12 */ } -void VOL3D::updateChecksum(VariantID vid, size_t tune_idx) +void VOL3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_vol, m_array_length, checksum_scale_factor , vid); + addToChecksum(m_vol, m_array_length, vid); } void VOL3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_z, vid); diff --git a/src/apps/ZONAL_ACCUMULATION_3D.cpp b/src/apps/ZONAL_ACCUMULATION_3D.cpp index 8a87731cd..2175df5c1 100644 --- a/src/apps/ZONAL_ACCUMULATION_3D.cpp +++ b/src/apps/ZONAL_ACCUMULATION_3D.cpp @@ -46,11 +46,11 @@ ZONAL_ACCUMULATION_3D::ZONAL_ACCUMULATION_3D(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(8 * getItsPerRep()); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -75,15 +75,13 @@ void ZONAL_ACCUMULATION_3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune setRealZones_3d(m_real_zones, *m_domain); } -void ZONAL_ACCUMULATION_3D::updateChecksum(VariantID vid, size_t tune_idx) +void ZONAL_ACCUMULATION_3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_vol, m_zonal_array_length, checksum_scale_factor , vid); + addToChecksum(m_vol, m_zonal_array_length, vid); } void ZONAL_ACCUMULATION_3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - deallocData(m_x, vid); deallocData(m_vol, vid); deallocData(m_real_zones, vid); diff --git a/src/basic/ARRAY_OF_PTRS.cpp b/src/basic/ARRAY_OF_PTRS.cpp index ac64245d8..792a73528 100644 --- a/src/basic/ARRAY_OF_PTRS.cpp +++ b/src/basic/ARRAY_OF_PTRS.cpp @@ -37,6 +37,7 @@ ARRAY_OF_PTRS::ARRAY_OF_PTRS(const RunParams& params) setFLOPsPerRep(m_array_size * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -55,9 +56,9 @@ void ARRAY_OF_PTRS::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_x, m_array_size*getActualProblemSize(), vid); } -void ARRAY_OF_PTRS::updateChecksum(VariantID vid, size_t tune_idx) +void ARRAY_OF_PTRS::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize(), vid); + addToChecksum(m_y, getActualProblemSize(), vid); } void ARRAY_OF_PTRS::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) diff --git a/src/basic/COPY8.cpp b/src/basic/COPY8.cpp index c75794667..e6546a424 100644 --- a/src/basic/COPY8.cpp +++ b/src/basic/COPY8.cpp @@ -35,6 +35,7 @@ COPY8::COPY8(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -67,21 +68,20 @@ void COPY8::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_x7, getActualProblemSize(), vid); } -void COPY8::updateChecksum(VariantID vid, size_t tune_idx) +void COPY8::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_y0, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y1, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y2, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y3, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y4, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y5, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y6, getActualProblemSize(), vid); - checksum[vid].at(tune_idx) += calcChecksum(m_y7, getActualProblemSize(), vid); + addToChecksum(m_y0, getActualProblemSize(), vid); + addToChecksum(m_y1, getActualProblemSize(), vid); + addToChecksum(m_y2, getActualProblemSize(), vid); + addToChecksum(m_y3, getActualProblemSize(), vid); + addToChecksum(m_y4, getActualProblemSize(), vid); + addToChecksum(m_y5, getActualProblemSize(), vid); + addToChecksum(m_y6, getActualProblemSize(), vid); + addToChecksum(m_y7, getActualProblemSize(), vid); } void COPY8::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x0, vid); deallocData(m_x1, vid); deallocData(m_x2, vid); diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 277ab567e..3bf3e079d 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -35,6 +35,7 @@ DAXPY::DAXPY(const RunParams& params) setFLOPsPerRep(2 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -54,14 +55,13 @@ void DAXPY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_a, vid); } -void DAXPY::updateChecksum(VariantID vid, size_t tune_idx) +void DAXPY::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize(), vid); + addToChecksum(m_y, getActualProblemSize(), vid); } void DAXPY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index d9e3dfed6..1c7d9af79 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -35,6 +35,7 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setFLOPsPerRep(2 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -55,14 +56,13 @@ void DAXPY_ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_a, vid); } -void DAXPY_ATOMIC::updateChecksum(VariantID vid, size_t tune_idx) +void DAXPY_ATOMIC::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize(), vid); + addToChecksum(m_y, getActualProblemSize(), vid); } void DAXPY_ATOMIC::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/basic/EMPTY.cpp b/src/basic/EMPTY.cpp index edb8b3497..6534988d0 100644 --- a/src/basic/EMPTY.cpp +++ b/src/basic/EMPTY.cpp @@ -35,6 +35,7 @@ EMPTY::EMPTY(const RunParams& params) setFLOPsPerRep( 0 ); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index feab1d994..031ed81be 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -36,11 +36,11 @@ IF_QUAD::IF_QUAD(const RunParams& params) setFLOPsPerRep(4 * getActualProblemSize() + 7 * getActualProblemSize() / 2); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -62,15 +62,14 @@ void IF_QUAD::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_x2, getActualProblemSize(), 0.0, vid); } -void IF_QUAD::updateChecksum(VariantID vid, size_t tune_idx) +void IF_QUAD::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x1, getActualProblemSize(), checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_x2, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_x1, getActualProblemSize(), vid); + addToChecksum(m_x2, getActualProblemSize(), vid); } void IF_QUAD::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); deallocData(m_b, vid); deallocData(m_c, vid); diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp index 354e274b2..57c0e8a54 100644 --- a/src/basic/INDEXLIST.cpp +++ b/src/basic/INDEXLIST.cpp @@ -35,6 +35,7 @@ INDEXLIST::INDEXLIST(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -59,15 +60,14 @@ void INDEXLIST::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_len = -1; } -void INDEXLIST::updateChecksum(VariantID vid, size_t tune_idx) +void INDEXLIST::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_list, getActualProblemSize(), vid); - checksum[vid][tune_idx] += Checksum_type(m_len); + addToChecksum(m_list, getActualProblemSize(), vid); + addToChecksum(m_len); } void INDEXLIST::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_list, vid); } diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp index 8fb19321e..a074343f2 100644 --- a/src/basic/INDEXLIST_3LOOP.cpp +++ b/src/basic/INDEXLIST_3LOOP.cpp @@ -41,6 +41,7 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -61,15 +62,14 @@ void INDEXLIST_3LOOP::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_len = -1; } -void INDEXLIST_3LOOP::updateChecksum(VariantID vid, size_t tune_idx) +void INDEXLIST_3LOOP::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_list, getActualProblemSize(), vid); - checksum[vid][tune_idx] += Checksum_type(m_len); + addToChecksum(m_list, getActualProblemSize(), vid); + addToChecksum(m_len); } void INDEXLIST_3LOOP::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_list, vid); } diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index e60c70f2c..999797e2e 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -35,6 +35,7 @@ INIT3::INIT3(const RunParams& params) setFLOPsPerRep(1 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -56,16 +57,15 @@ void INIT3::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_in2, getActualProblemSize(), vid); } -void INIT3::updateChecksum(VariantID vid, size_t tune_idx) +void INIT3::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_out1, getActualProblemSize(), vid); - checksum[vid][tune_idx] += calcChecksum(m_out2, getActualProblemSize(), vid); - checksum[vid][tune_idx] += calcChecksum(m_out3, getActualProblemSize(), vid); + addToChecksum(m_out1, getActualProblemSize(), vid); + addToChecksum(m_out2, getActualProblemSize(), vid); + addToChecksum(m_out3, getActualProblemSize(), vid); } void INIT3::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_out1, vid); deallocData(m_out2, vid); deallocData(m_out3, vid); diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index ebf2d1964..8bd5e81a6 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -34,7 +34,8 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -54,14 +55,13 @@ void INIT_VIEW1D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_val = 0.00000123; } -void INIT_VIEW1D::updateChecksum(VariantID vid, size_t tune_idx) +void INIT_VIEW1D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_a, getActualProblemSize(), vid); + addToChecksum(m_a, getActualProblemSize(), vid); } void INIT_VIEW1D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); } diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 75e6452db..4b12dec20 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -34,7 +34,8 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -54,14 +55,13 @@ void INIT_VIEW1D_OFFSET::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id m_val = 0.00000123; } -void INIT_VIEW1D_OFFSET::updateChecksum(VariantID vid, size_t tune_idx) +void INIT_VIEW1D_OFFSET::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_a, getActualProblemSize(), vid); + addToChecksum(m_a, getActualProblemSize(), vid); } void INIT_VIEW1D_OFFSET::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); } diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 179eb2715..0fe0ed3dd 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -39,11 +39,11 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setFLOPsPerRep(2 * TL_SZ * TL_SZ * TL_SZ * num_tiles * num_tiles * num_tiles); - checksum_scale_factor = 1e-6 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(1e-6 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N_to_the_three_halves); @@ -62,12 +62,11 @@ void MAT_MAT_SHARED::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_C, NN, 0.0, vid); } -void MAT_MAT_SHARED::updateChecksum(VariantID vid, size_t tune_idx) { - checksum[vid][tune_idx] += calcChecksum(m_C, m_N*m_N, checksum_scale_factor , vid); +void MAT_MAT_SHARED::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { + addToChecksum(m_C, m_N*m_N, vid); } void MAT_MAT_SHARED::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void)vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_C, vid); diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index 4c6d92ef2..91258c0c7 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -34,7 +34,8 @@ MULADDSUB::MULADDSUB(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(3 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -56,16 +57,15 @@ void MULADDSUB::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_in2, getActualProblemSize(), vid); } -void MULADDSUB::updateChecksum(VariantID vid, size_t tune_idx) +void MULADDSUB::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_out1, getActualProblemSize(), vid); - checksum[vid][tune_idx] += calcChecksum(m_out2, getActualProblemSize(), vid); - checksum[vid][tune_idx] += calcChecksum(m_out3, getActualProblemSize(), vid); + addToChecksum(m_out1, getActualProblemSize(), vid); + addToChecksum(m_out2, getActualProblemSize(), vid); + addToChecksum(m_out3, getActualProblemSize(), vid); } void MULADDSUB::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_out1, vid); deallocData(m_out2, vid); deallocData(m_out3, vid); diff --git a/src/basic/MULTI_REDUCE.cpp b/src/basic/MULTI_REDUCE.cpp index 2f8680648..963765793 100644 --- a/src/basic/MULTI_REDUCE.cpp +++ b/src/basic/MULTI_REDUCE.cpp @@ -42,6 +42,7 @@ MULTI_REDUCE::MULTI_REDUCE(const RunParams& params) setFLOPsPerRep(1 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -72,15 +73,15 @@ void MULTI_REDUCE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) if (init_even_sizes || init_random_sizes || init_all_one) { Real_ptr data = nullptr; if (init_even_sizes) { - allocData(data, m_num_bins, Base_Seq); + allocData(DataSpace::Host, data, m_num_bins); for (Index_type b = 0; b < m_num_bins; ++b) { data[b] = static_cast(b+1) / m_num_bins; } } else if (init_random_sizes) { - allocAndInitDataRandValue(data, m_num_bins, Base_Seq); + allocAndInitDataRandValue(DataSpace::Host, data, m_num_bins); std::sort(data, data+m_num_bins); } else if (init_all_one) { - allocData(data, m_num_bins, Base_Seq); + allocData(DataSpace::Host, data, m_num_bins); for (Index_type b = 0; b < m_num_bins; ++b) { data[b] = static_cast(0); } @@ -96,11 +97,11 @@ void MULTI_REDUCE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_bins[i] = bin; } - deallocData(data, Base_Seq); + deallocData(DataSpace::Host, data); } else if (init_random_per_iterate) { Real_ptr data; - allocAndInitDataRandValue(data, getActualProblemSize(), Base_Seq); + allocAndInitDataRandValue(DataSpace::Host, data, getActualProblemSize()); for (Index_type i = 0; i < getActualProblemSize(); ++i) { m_bins[i] = static_cast(data[i] * m_num_bins); @@ -112,7 +113,7 @@ void MULTI_REDUCE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } } - deallocData(data, Base_Seq); + deallocData(DataSpace::Host, data); } else { throw 1; } @@ -121,14 +122,13 @@ void MULTI_REDUCE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(DataSpace::Host, m_values_final, m_num_bins, 0.0); } -void MULTI_REDUCE::updateChecksum(VariantID vid, size_t tune_idx) +void MULTI_REDUCE::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_values_final, m_num_bins); + addToChecksum(DataSpace::Host, m_values_final, m_num_bins); } void MULTI_REDUCE::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_bins, vid); deallocData(m_data, vid); deallocData(DataSpace::Host, m_values_init); diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index 486be7ee2..2dbb5cc00 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -45,7 +45,8 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(3 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -63,14 +64,13 @@ void NESTED_INIT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_array, m_array_length, 0.0, vid); } -void NESTED_INIT::updateChecksum(VariantID vid, size_t tune_idx) +void NESTED_INIT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_array, m_array_length, vid); + addToChecksum(m_array, m_array_length, vid); } void NESTED_INIT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_array, vid); } diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 852099179..d0325bd14 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -35,6 +35,7 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setFLOPsPerRep(6 * getActualProblemSize() + 1); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -55,14 +56,14 @@ void PI_ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_pi_final = -static_cast(vid); } -void PI_ATOMIC::updateChecksum(VariantID vid, size_t tune_idx) +void PI_ATOMIC::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += static_cast(m_pi_final); + addToChecksum(m_pi_final); } -void PI_ATOMIC::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +void PI_ATOMIC::tearDown(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; + } } // end namespace basic diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 56f35ae5f..57f4e18cb 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -35,6 +35,7 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setFLOPsPerRep(6 * getActualProblemSize() + 1); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -48,22 +49,21 @@ PI_REDUCE::~PI_REDUCE() { } -void PI_REDUCE::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +void PI_REDUCE::setUp(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; m_dx = 1.0 / double(getActualProblemSize()); m_pi_init = 0.0; m_pi = 0.0; } -void PI_REDUCE::updateChecksum(VariantID vid, size_t tune_idx) +void PI_REDUCE::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += Checksum_type(m_pi); + addToChecksum(m_pi); } -void PI_REDUCE::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +void PI_REDUCE::tearDown(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; + } } // end namespace basic diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index cd7a2926b..61c1acbf4 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -40,6 +40,7 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -65,16 +66,15 @@ void REDUCE3_INT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_vmax_init = std::numeric_limits::min(); } -void REDUCE3_INT::updateChecksum(VariantID vid, size_t tune_idx) +void REDUCE3_INT::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += m_vsum; - checksum[vid][tune_idx] += m_vmin; - checksum[vid][tune_idx] += m_vmax; + addToChecksum(m_vsum); + addToChecksum(m_vmin); + addToChecksum(m_vmax); } void REDUCE3_INT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_vec, vid); } diff --git a/src/basic/REDUCE_STRUCT.cpp b/src/basic/REDUCE_STRUCT.cpp index 949a351ef..1ff0a7d9d 100644 --- a/src/basic/REDUCE_STRUCT.cpp +++ b/src/basic/REDUCE_STRUCT.cpp @@ -40,6 +40,7 @@ REDUCE_STRUCT::REDUCE_STRUCT(const RunParams& params) setFLOPsPerRep(2 * getActualProblemSize() + 2); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -70,21 +71,20 @@ void REDUCE_STRUCT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) } } -void REDUCE_STRUCT::updateChecksum(VariantID vid, size_t tune_idx) +void REDUCE_STRUCT::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += m_points.GetCenter()[0]; - checksum[vid][tune_idx] += m_points.GetXMin(); - checksum[vid][tune_idx] += m_points.GetXMax(); - checksum[vid][tune_idx] += m_points.GetCenter()[1]; - checksum[vid][tune_idx] += m_points.GetYMin(); - checksum[vid][tune_idx] += m_points.GetYMax(); + addToChecksum(m_points.GetCenter()[0]); + addToChecksum(m_points.GetXMin()); + addToChecksum(m_points.GetXMax()); + addToChecksum(m_points.GetCenter()[1]); + addToChecksum(m_points.GetYMin()); + addToChecksum(m_points.GetYMax()); return; } void REDUCE_STRUCT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index b708d381f..ba93222f2 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -35,6 +35,7 @@ TRAP_INT::TRAP_INT(const RunParams& params) setFLOPsPerRep(10 * getActualProblemSize()); // 1 sqrt setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -65,14 +66,14 @@ void TRAP_INT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_sumx = 0; } -void TRAP_INT::updateChecksum(VariantID vid, size_t tune_idx) +void TRAP_INT::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += m_sumx; + addToChecksum(m_sumx); } -void TRAP_INT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +void TRAP_INT::tearDown(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; + } } // end namespace basic diff --git a/src/comm/HALO_EXCHANGE.cpp b/src/comm/HALO_EXCHANGE.cpp index 6a91ce2ab..67d19a08c 100644 --- a/src/comm/HALO_EXCHANGE.cpp +++ b/src/comm/HALO_EXCHANGE.cpp @@ -49,6 +49,7 @@ HALO_EXCHANGE::HALO_EXCHANGE(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_to_the_two_thirds); @@ -80,10 +81,10 @@ void HALO_EXCHANGE::setUp(VariantID vid, size_t tune_idx) } } -void HALO_EXCHANGE::updateChecksum(VariantID vid, size_t tune_idx) +void HALO_EXCHANGE::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { for (Index_type v = 0; v < m_num_vars; ++v) { - checksum[vid][tune_idx] += calcChecksum(m_vars[v], m_var_size, vid); + addToChecksum(m_vars[v], m_var_size, vid); } } diff --git a/src/comm/HALO_EXCHANGE_FUSED.cpp b/src/comm/HALO_EXCHANGE_FUSED.cpp index 227a10bda..0c47a0130 100644 --- a/src/comm/HALO_EXCHANGE_FUSED.cpp +++ b/src/comm/HALO_EXCHANGE_FUSED.cpp @@ -49,6 +49,7 @@ HALO_EXCHANGE_FUSED::HALO_EXCHANGE_FUSED(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_to_the_two_thirds); @@ -80,10 +81,10 @@ void HALO_EXCHANGE_FUSED::setUp(VariantID vid, size_t tune_idx) } } -void HALO_EXCHANGE_FUSED::updateChecksum(VariantID vid, size_t tune_idx) +void HALO_EXCHANGE_FUSED::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { for (Index_type v = 0; v < m_num_vars; ++v) { - checksum[vid][tune_idx] += calcChecksum(m_vars[v], m_var_size, vid); + addToChecksum(m_vars[v], m_var_size, vid); } } diff --git a/src/comm/HALO_PACKING.cpp b/src/comm/HALO_PACKING.cpp index 6ed165613..1b7984e88 100644 --- a/src/comm/HALO_PACKING.cpp +++ b/src/comm/HALO_PACKING.cpp @@ -39,6 +39,7 @@ HALO_PACKING::HALO_PACKING(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_to_the_two_thirds); @@ -69,10 +70,10 @@ void HALO_PACKING::setUp(VariantID vid, size_t tune_idx) } } -void HALO_PACKING::updateChecksum(VariantID vid, size_t tune_idx) +void HALO_PACKING::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { for (Index_type v = 0; v < m_num_vars; ++v) { - checksum[vid][tune_idx] += calcChecksum(m_vars[v], m_var_size, vid); + addToChecksum(m_vars[v], m_var_size, vid); } const bool separate_buffers = (getMPIDataSpace(vid) == DataSpace::Copy); @@ -80,9 +81,9 @@ void HALO_PACKING::updateChecksum(VariantID vid, size_t tune_idx) for (Index_type l = 0; l < s_num_neighbors; ++l) { Index_type buffer_len = m_num_vars * m_pack_index_list_lengths[l]; if (separate_buffers) { - checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_send_buffers[l], buffer_len); + addToChecksum(DataSpace::Host, m_send_buffers[l], buffer_len); } else { - checksum[vid][tune_idx] += calcChecksum(getMPIDataSpace(vid), m_send_buffers[l], buffer_len); + addToChecksum(getMPIDataSpace(vid), m_send_buffers[l], buffer_len); } } } diff --git a/src/comm/HALO_PACKING_FUSED.cpp b/src/comm/HALO_PACKING_FUSED.cpp index f503c0951..534ce994b 100644 --- a/src/comm/HALO_PACKING_FUSED.cpp +++ b/src/comm/HALO_PACKING_FUSED.cpp @@ -39,6 +39,7 @@ HALO_PACKING_FUSED::HALO_PACKING_FUSED(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_to_the_two_thirds); @@ -69,10 +70,10 @@ void HALO_PACKING_FUSED::setUp(VariantID vid, size_t tune_idx) } } -void HALO_PACKING_FUSED::updateChecksum(VariantID vid, size_t tune_idx) +void HALO_PACKING_FUSED::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { for (Index_type v = 0; v < m_num_vars; ++v) { - checksum[vid][tune_idx] += calcChecksum(m_vars[v], m_var_size, vid); + addToChecksum(m_vars[v], m_var_size, vid); } const bool separate_buffers = (getMPIDataSpace(vid) == DataSpace::Copy); @@ -80,9 +81,9 @@ void HALO_PACKING_FUSED::updateChecksum(VariantID vid, size_t tune_idx) for (Index_type l = 0; l < s_num_neighbors; ++l) { Index_type buffer_len = m_num_vars * m_pack_index_list_lengths[l]; if (separate_buffers) { - checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_send_buffers[l], buffer_len); + addToChecksum(DataSpace::Host, m_send_buffers[l], buffer_len); } else { - checksum[vid][tune_idx] += calcChecksum(getMPIDataSpace(vid), m_send_buffers[l], buffer_len); + addToChecksum(getMPIDataSpace(vid), m_send_buffers[l], buffer_len); } } } diff --git a/src/comm/HALO_SENDRECV.cpp b/src/comm/HALO_SENDRECV.cpp index 0c7cfc574..1a6e1bd18 100644 --- a/src/comm/HALO_SENDRECV.cpp +++ b/src/comm/HALO_SENDRECV.cpp @@ -39,6 +39,7 @@ HALO_SENDRECV::HALO_SENDRECV(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N_to_the_two_thirds); @@ -59,16 +60,16 @@ void HALO_SENDRECV::setUp(VariantID vid, size_t tune_idx) setUp_base(m_my_mpi_rank, m_mpi_dims.data(), m_num_vars, vid, tune_idx); } -void HALO_SENDRECV::updateChecksum(VariantID vid, size_t tune_idx) +void HALO_SENDRECV::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const bool separate_buffers = (getMPIDataSpace(vid) == DataSpace::Copy); for (Index_type l = 0; l < s_num_neighbors; ++l) { Index_type buffer_len = m_num_vars * m_unpack_index_list_lengths[l]; if (separate_buffers) { - checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_recv_buffers[l], buffer_len); + addToChecksum(DataSpace::Host, m_recv_buffers[l], buffer_len); } else { - checksum[vid][tune_idx] += calcChecksum(getMPIDataSpace(vid), m_recv_buffers[l], buffer_len); + addToChecksum(getMPIDataSpace(vid), m_recv_buffers[l], buffer_len); } } } diff --git a/src/common/DataUtils.cpp b/src/common/DataUtils.cpp index c0674a37e..87362bf4b 100644 --- a/src/common/DataUtils.cpp +++ b/src/common/DataUtils.cpp @@ -14,6 +14,7 @@ #include "KernelBase.hpp" +#include "RAJA/util/reduce.hpp" #include "RAJA/internal/MemUtils_CPU.hpp" #include @@ -575,58 +576,42 @@ void initData(Real_type& d) * Calculate and return checksum for data arrays. */ template < typename Data_getter > -long double calcChecksumImpl(Data_getter data, Size_type len, - Real_type scale_factor) +Checksum_type calcChecksumImpl(Data_getter data, Size_type len) { - long double tchk = 0.0; - long double ckahan = 0.0; + RAJA::KahanSum chk(0.0); + for (Size_type j = 0; j < len; ++j) { - long double x = (std::abs(std::sin(j+1.0))+0.5) * data(j); - long double y = x - ckahan; - volatile long double t = tchk + y; - volatile long double z = t - tchk; - ckahan = z - y; - tchk = t; -#if 0 // RDH DEBUG - if ( (j % 10000000) == 0 ) { - getCout() << "j : tchk = " << std::setprecision(std::numeric_limits::max_digits10) << j << " : " << tchk << std::endl; - } -#endif + chk += (std::abs(std::sin(j+1.0))+0.5) * data(j); } - tchk *= scale_factor; - return tchk; + return chk.get(); } -long double calcChecksum(Int_ptr ptr, Size_type len, - Real_type scale_factor) +Checksum_type calcChecksum(Int_ptr ptr, Size_type len) { return calcChecksumImpl([=](Size_type j) { - return static_cast(ptr[j]); - }, len, scale_factor); + return static_cast(ptr[j]); + }, len); } -long double calcChecksum(unsigned long long* ptr, Size_type len, - Real_type scale_factor) +Checksum_type calcChecksum(unsigned long long* ptr, Size_type len) { return calcChecksumImpl([=](Size_type j) { - return static_cast(ptr[j]); - }, len, scale_factor); + return static_cast(ptr[j]); + }, len); } -long double calcChecksum(Real_ptr ptr, Size_type len, - Real_type scale_factor) +Checksum_type calcChecksum(Real_ptr ptr, Size_type len) { return calcChecksumImpl([=](Size_type j) { - return static_cast(ptr[j]); - }, len, scale_factor); + return static_cast(ptr[j]); + }, len); } -long double calcChecksum(Complex_ptr ptr, Size_type len, - Real_type scale_factor) +Checksum_type calcChecksum(Complex_ptr ptr, Size_type len) { return calcChecksumImpl([=](Size_type j) { - return static_cast(real(ptr[j])+imag(ptr[j])); - }, len, scale_factor); + return static_cast(real(ptr[j])+imag(ptr[j])); + }, len); } } // closing brace for detail namespace diff --git a/src/common/DataUtils.hpp b/src/common/DataUtils.hpp index bfa34efa9..fe805c262 100644 --- a/src/common/DataUtils.hpp +++ b/src/common/DataUtils.hpp @@ -151,17 +151,13 @@ void initData(Real_type& d); * * Checksumn is multiplied by given scale factor. */ -long double calcChecksum(Int_ptr d, Size_type len, - Real_type scale_factor); +Checksum_type calcChecksum(Int_ptr d, Size_type len); /// -long double calcChecksum(unsigned long long* d, Size_type len, - Real_type scale_factor); +Checksum_type calcChecksum(unsigned long long* d, Size_type len); /// -long double calcChecksum(Real_ptr d, Size_type len, - Real_type scale_factor); +Checksum_type calcChecksum(Real_ptr d, Size_type len); /// -long double calcChecksum(Complex_ptr d, Size_type len, - Real_type scale_factor); +Checksum_type calcChecksum(Complex_ptr d, Size_type len); } // closing brace for detail namespace @@ -385,8 +381,7 @@ inline void allocAndInitDataRandValue(DataSpace dataSpace, T*& ptr, Size_type le * Calculate and return checksum for arrays. */ template -inline long double calcChecksum(DataSpace dataSpace, T* ptr, Size_type len, Size_type align, - Real_type scale_factor) +inline Checksum_type calcChecksum(DataSpace dataSpace, T* ptr, Size_type len, Size_type align) { T* check_ptr = ptr; T* copied_ptr = nullptr; @@ -400,7 +395,7 @@ inline long double calcChecksum(DataSpace dataSpace, T* ptr, Size_type len, Size check_ptr = copied_ptr; } - auto val = detail::calcChecksum(check_ptr, len, scale_factor); + Checksum_type val = detail::calcChecksum(check_ptr, len); if (check_dataSpace != dataSpace) { deallocData(check_dataSpace, copied_ptr); diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index 656c2e771..b7576936c 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -762,23 +762,27 @@ void Executor::runKernel(KernelBase* kernel, bool print_kernel_name) kernel->execute(vid, tune_idx); // Execute kernel if ( run_params.showProgress() ) { - getCout() << " -- " << kernel->getLastTime() / kernel->getRunReps() << " sec. x " << kernel->getRunReps() << " rep."; - - size_t prec = 20; - const auto default_precision = getCout().precision(); - Checksum_type checksum = kernel->getChecksum(vid, tune_idx); + Checksum_type cksum_tol = kernel->getChecksumTolerance(); + Checksum_type cksum_ref = kernel->getReferenceChecksum(); + Checksum_type cksum = kernel->getLastChecksum(); + Checksum_type cksum_diff = std::abs(cksum_ref - cksum); #if defined(RAJA_PERFSUITE_ENABLE_MPI) { - Checksum_type checksum_sum = 0; - Allreduce(&checksum, &checksum_sum, 1, MPI_SUM, MPI_COMM_WORLD); - checksum = checksum_sum / num_ranks; + Checksum_type cksum_diff_max = 1e80; + Allreduce(&cksum_diff, &cksum_diff_max, 1, MPI_MAX, MPI_COMM_WORLD); + cksum_diff = cksum_diff_max; } - getCout() << " checksum_avg "; -#else - getCout() << " checksum "; #endif - getCout() << setprecision(prec) << checksum - << setprecision(default_precision) << endl; + const char* cksum_result = "FAILED"; + if (cksum_diff <= cksum_tol) { + cksum_result = "PASSED"; + } + + getCout() << " -- " + << kernel->getLastTime() / kernel->getRunReps() << " sec." + << " x " << kernel->getRunReps() << " rep." + << " " << cksum_result << " checksum" + << endl; } } else { @@ -1403,6 +1407,7 @@ void Executor::writeChecksumReport(ostream& file) } namecol_width++; + size_t resultcol_width = 6+2; // // Print title. @@ -1416,11 +1421,15 @@ void Executor::writeChecksumReport(ostream& file) file << equal_line << endl; // - // Print column title line. + // Print column title lines. // file <getName() << endl; + file << dot_line << endl; - Checksum_type cksum_ref = 0.0; - size_t ivck = 0; - bool found_ref = false; - while ( ivck < variant_ids.size() && !found_ref ) { - VariantID vid = variant_ids[ivck]; - size_t num_tunings = kern->getNumVariantTunings(vid); - for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) { - if ( kern->wasVariantTuningRun(vid, tune_idx) ) { - cksum_ref = kern->getChecksum(vid, tune_idx); - found_ref = true; - break; - } - } - ++ivck; - } + Checksum_type cksum_tol = kern->getChecksumTolerance(); // get vector of checksums and diffs std::vector> checksums(variant_ids.size()); - std::vector> checksums_diff(variant_ids.size()); + std::vector> checksums_abs_diff(variant_ids.size()); for (size_t iv = 0; iv < variant_ids.size(); ++iv) { VariantID vid = variant_ids[iv]; size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]); checksums[iv].resize(num_tunings, 0.0); - checksums_diff[iv].resize(num_tunings, 0.0); + checksums_abs_diff[iv].resize(num_tunings, 0.0); for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) { if ( kern->wasVariantTuningRun(vid, tune_idx) ) { - checksums[iv][tune_idx] = kern->getChecksum(vid, tune_idx); - checksums_diff[iv][tune_idx] = cksum_ref - kern->getChecksum(vid, tune_idx); + checksums[iv][tune_idx] = kern->getChecksumAverage(vid, tune_idx); + checksums_abs_diff[iv][tune_idx] = kern->getChecksumMaxDifference(vid, tune_idx); } } } @@ -1501,16 +1501,6 @@ void Executor::writeChecksumReport(ostream& file) } } - // get stats for checksums_abs_diff - std::vector> checksums_abs_diff(variant_ids.size()); - for (size_t iv = 0; iv < variant_ids.size(); ++iv) { - size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]); - checksums_abs_diff[iv].resize(num_tunings, 0.0); - for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) { - checksums_abs_diff[iv][tune_idx] = std::abs(checksums_diff[iv][tune_idx]); - } - } - std::vector> checksums_abs_diff_min(variant_ids.size()); std::vector> checksums_abs_diff_max(variant_ids.size()); std::vector> checksums_abs_diff_sum(variant_ids.size()); @@ -1569,18 +1559,32 @@ void Executor::writeChecksumReport(ostream& file) const string& tuning_name = kern->getVariantTuningName(vid, tune_idx); if ( kern->wasVariantTuningRun(vid, tune_idx) ) { + const char* result = "FAILED"; + if ( +#if defined(RAJA_PERFSUITE_ENABLE_MPI) + checksums_abs_diff_max[iv][tune_idx] +#else + checksums_abs_diff[iv][tune_idx] +#endif + <= cksum_tol ) { + result = "PASSED"; + } file <::max()); + checksum_max[vid].emplace_back(-std::numeric_limits::max()); + checksum_sum[vid].emplace_back(0.0); num_exec[vid].emplace_back(0); min_time[vid].emplace_back(std::numeric_limits::max()); max_time[vid].emplace_back(-std::numeric_limits::max()); @@ -325,7 +333,20 @@ void KernelBase::execute(VariantID vid, size_t tune_idx) this->runKernel(vid, tune_idx); + checksum.reset(); this->updateChecksum(vid, tune_idx); + Checksum_type new_checksum = getLastChecksum(); + + checksum_min[vid].at(tune_idx) = std::min(new_checksum, checksum_min[vid].at(tune_idx)); + checksum_max[vid].at(tune_idx) = std::max(new_checksum, checksum_max[vid].at(tune_idx)); + checksum_sum[vid].at(tune_idx) += new_checksum; + + if (checksum_reference_variant == NumVariants) { + // use first run variant tuning as checksum reference + checksum_reference = new_checksum; + checksum_reference_variant = vid; + checksum_reference_tuning = tune_idx; + } this->tearDown(vid, tune_idx); @@ -428,12 +449,31 @@ void KernelBase::print(std::ostream& os) const os << "\t\t\t\t\t" << tot_time[j][t] << std::endl; } } - os << "\t\t\t checksum: " << std::endl; + os << "\t\t\t checksum_reference_variant = " << getVariantName(checksum_reference_variant) << std::endl; + os << "\t\t\t checksum_reference_tuning = " << checksum_reference_tuning << std::endl; + os << "\t\t\t checksum_reference = " << checksum_reference << std::endl; + os << "\t\t\t checksum_min: " << std::endl; + for (unsigned j = 0; j < NumVariants; ++j) { + os << "\t\t\t\t" << getVariantName(static_cast(j)) + << " :" << std::endl; + for (size_t t = 0; t < checksum_min[j].size(); ++t) { + os << "\t\t\t\t\t" << checksum_min[j][t] << std::endl; + } + } + os << "\t\t\t checksum_max: " << std::endl; + for (unsigned j = 0; j < NumVariants; ++j) { + os << "\t\t\t\t" << getVariantName(static_cast(j)) + << " :" << std::endl; + for (size_t t = 0; t < checksum_max[j].size(); ++t) { + os << "\t\t\t\t\t" << checksum_max[j][t] << std::endl; + } + } + os << "\t\t\t checksum_sum: " << std::endl; for (unsigned j = 0; j < NumVariants; ++j) { os << "\t\t\t\t" << getVariantName(static_cast(j)) << " :" << std::endl; - for (size_t t = 0; t < checksum[j].size(); ++t) { - os << "\t\t\t\t\t" << checksum[j][t] << std::endl; + for (size_t t = 0; t < checksum_sum[j].size(); ++t) { + os << "\t\t\t\t\t" << checksum_sum[j][t].get() << std::endl; } } os << std::endl; diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 7cd4df60c..2b7145b36 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -16,6 +16,7 @@ #include "common/GPUUtils.hpp" #include "RAJA/util/Timer.hpp" +#include "RAJA/util/reduce.hpp" #if defined(RAJA_PERFSUITE_ENABLE_MPI) #include #endif @@ -123,6 +124,8 @@ class KernelBase void setFLOPsPerRep(Index_type FLOPs) { FLOPs_per_rep = FLOPs; } void setBlockSize(Index_type size) { kernel_block_size = size; } void setChecksumConsistency(ChecksumConsistency cc) { checksum_consistency = cc; } + void setChecksumTolerance(Checksum_type ct) { checksum_tolerance = ct; } + void setChecksumScaleFactor(Checksum_type csf) { checksum_scale_factor = csf; } void setComplexity(Complexity ac) { complexity = ac; } void setUsesFeature(FeatureID fid) { uses_feature[fid] = true; } @@ -210,6 +213,7 @@ class KernelBase Index_type getFLOPsPerRep() const { return FLOPs_per_rep; } double getBlockSize() const { return kernel_block_size; } ChecksumConsistency getChecksumConsistency() const { return checksum_consistency; }; + Checksum_type getChecksumTolerance() const { return checksum_tolerance; } Complexity getComplexity() const { return complexity; }; Index_type getTargetProblemSize() const; @@ -279,8 +283,39 @@ class KernelBase double getTotTime(VariantID vid, size_t tune_idx) const { return tot_time[vid].at(tune_idx); } - Checksum_type getChecksum(VariantID vid, size_t tune_idx) const - { return checksum[vid].at(tune_idx); } + // Get reference checksum (first variant tuning run) + Checksum_type getReferenceChecksum() const + { + if (checksum_reference_variant == NumVariants) { + throw std::runtime_error("Can't get reference checksum average if kernel was not run"); + } + return checksum_reference; + } + Checksum_type getLastChecksum() const + { + return checksum.get() * checksum_scale_factor; + } + Checksum_type getChecksumAverage(VariantID vid, size_t tune_idx) const + { + if (num_exec[vid].at(tune_idx) <= 0) { + throw std::runtime_error("Can't get checksum average if variant tuning was not run"); + } + return checksum_sum[vid].at(tune_idx).get() / num_exec[vid].at(tune_idx); + } + Checksum_type getChecksumMaxDifference(VariantID vid, size_t tune_idx) const + { + if (num_exec[vid].at(tune_idx) <= 0) { + throw std::runtime_error("Can't get checksum max diff if variant tuning was not run"); + } + + Checksum_type reference_checksum = getReferenceChecksum(); + + Checksum_type cksum_max_diff = std::abs(reference_checksum - checksum_min[vid].at(tune_idx)); + cksum_max_diff = std::max(cksum_max_diff, + std::abs(reference_checksum - checksum_max[vid].at(tune_idx))); + + return cksum_max_diff; + } void execute(VariantID vid, size_t tune_idx); @@ -521,23 +556,21 @@ class KernelBase } template - long double calcChecksum(DataSpace dataSpace, T* ptr, Size_type len, - Real_type scale_factor = 1.0) + void addToChecksum(T val) { - return rajaperf::calcChecksum(dataSpace, - ptr, len, getDataAlignment(), scale_factor); + checksum += static_cast(val); } template - long double calcChecksum(T* ptr, Size_type len, VariantID vid) + void addToChecksum(T* ptr, Size_type len, VariantID vid) { - return calcChecksum(getDataSpace(vid), ptr, len); + addToChecksum(getDataSpace(vid), ptr, len); } template - long double calcChecksum(T* ptr, Size_type len, Real_type scale_factor, VariantID vid) + void addToChecksum(DataSpace dataSpace, T* ptr, Size_type len) { - return calcChecksum(getDataSpace(vid), ptr, len, scale_factor); + checksum += rajaperf::calcChecksum(dataSpace, ptr, len, getDataAlignment()); } void startTimer() @@ -613,8 +646,13 @@ class KernelBase protected: const RunParams& run_params; - std::vector checksum[NumVariants]; - Checksum_type checksum_scale_factor; + struct ChecksumTolerance + { + static constexpr inline Checksum_type zero = 0.0; + static constexpr inline Checksum_type tight = 1e-12; + static constexpr inline Checksum_type normal = 1e-7; + static constexpr inline Checksum_type loose = 5e-6; + }; #if defined(RAJA_ENABLE_TARGET_OPENMP) int did; @@ -662,6 +700,14 @@ class KernelBase bool uses_feature[NumFeatures]; ChecksumConsistency checksum_consistency; + Checksum_type checksum_tolerance; + Checksum_type checksum_scale_factor; + + RAJA::KahanSum checksum; + + std::vector checksum_min[NumVariants]; + std::vector checksum_max[NumVariants]; + std::vector> checksum_sum[NumVariants]; Complexity complexity; @@ -683,6 +729,10 @@ class KernelBase VariantID running_variant; size_t running_tuning; + Checksum_type checksum_reference; + VariantID checksum_reference_variant; + size_t checksum_reference_tuning; + std::vector num_exec[NumVariants]; RAJA::Timer timer; diff --git a/src/lcals/DIFF_PREDICT.cpp b/src/lcals/DIFF_PREDICT.cpp index f858cc988..2dcc7fb6b 100644 --- a/src/lcals/DIFF_PREDICT.cpp +++ b/src/lcals/DIFF_PREDICT.cpp @@ -35,6 +35,7 @@ DIFF_PREDICT::DIFF_PREDICT(const RunParams& params) setFLOPsPerRep(9 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -56,14 +57,13 @@ void DIFF_PREDICT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_cx, m_array_length, vid); } -void DIFF_PREDICT::updateChecksum(VariantID vid, size_t tune_idx) +void DIFF_PREDICT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_px, m_array_length, vid); + addToChecksum(m_px, m_array_length, vid); } void DIFF_PREDICT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_px, vid); deallocData(m_cx, vid); } diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 5ac4acc05..c88c7025b 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -38,11 +38,11 @@ EOS::EOS(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(16 * getActualProblemSize()); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -67,14 +67,13 @@ void EOS::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_t, vid); } -void EOS::updateChecksum(VariantID vid, size_t tune_idx) +void EOS::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_x, getActualProblemSize(), vid); } void EOS::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_z, vid); diff --git a/src/lcals/FIRST_DIFF.cpp b/src/lcals/FIRST_DIFF.cpp index d9f303bb2..70519b923 100644 --- a/src/lcals/FIRST_DIFF.cpp +++ b/src/lcals/FIRST_DIFF.cpp @@ -36,7 +36,8 @@ FIRST_DIFF::FIRST_DIFF(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes FP ops get the same answer across platforms + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -55,14 +56,13 @@ void FIRST_DIFF::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_y, m_N, vid); } -void FIRST_DIFF::updateChecksum(VariantID vid, size_t tune_idx) +void FIRST_DIFF::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize(), vid); + addToChecksum(m_x, getActualProblemSize(), vid); } void FIRST_DIFF::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/lcals/FIRST_MIN.cpp b/src/lcals/FIRST_MIN.cpp index 8a2c0ba09..0d481f75c 100644 --- a/src/lcals/FIRST_MIN.cpp +++ b/src/lcals/FIRST_MIN.cpp @@ -39,7 +39,8 @@ FIRST_MIN::FIRST_MIN(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes that the loc returned is always from the first of equivalent mins + setChecksumConsistency(ChecksumConsistency::Consistent); // The loc returned is always the first of equivalent mins + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -64,14 +65,13 @@ void FIRST_MIN::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_minloc = -1; } -void FIRST_MIN::updateChecksum(VariantID vid, size_t tune_idx) +void FIRST_MIN::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += static_cast(m_minloc); + addToChecksum(m_minloc); } void FIRST_MIN::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); } diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index a4d223d95..bcfe7604a 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -36,7 +36,8 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * (m_N-1)); - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes FP ops get the same answer across platforms + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -55,14 +56,13 @@ void FIRST_SUM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_y, m_N, vid); } -void FIRST_SUM::updateChecksum(VariantID vid, size_t tune_idx) +void FIRST_SUM::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize(), vid); + addToChecksum(m_x, getActualProblemSize(), vid); } void FIRST_SUM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); } diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index 54bd21011..6833bc6e4 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -40,11 +40,11 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) setFLOPsPerRep((3 + 3 ) * m_N); - checksum_scale_factor = 0.01 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.01 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -67,14 +67,13 @@ void GEN_LIN_RECUR::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_sb, m_N, vid); } -void GEN_LIN_RECUR::updateChecksum(VariantID vid, size_t tune_idx) +void GEN_LIN_RECUR::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_b5, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_b5, getActualProblemSize(), vid); } void GEN_LIN_RECUR::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_b5, vid); deallocData(m_stb5, vid); deallocData(m_sa, vid); diff --git a/src/lcals/HYDRO_1D.cpp b/src/lcals/HYDRO_1D.cpp index 0b1bb98e9..55abecd53 100644 --- a/src/lcals/HYDRO_1D.cpp +++ b/src/lcals/HYDRO_1D.cpp @@ -37,11 +37,11 @@ HYDRO_1D::HYDRO_1D(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(5 * getActualProblemSize()); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -65,14 +65,13 @@ void HYDRO_1D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_t, vid); } -void HYDRO_1D::updateChecksum(VariantID vid, size_t tune_idx) +void HYDRO_1D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_x, getActualProblemSize(), vid); } void HYDRO_1D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_z, vid); diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index 29058c557..09594285b 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -62,11 +62,11 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) 26 + 4 ) * (m_jn-2)*(m_kn-2)); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -94,15 +94,14 @@ void HYDRO_2D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_zz, m_array_length, vid); } -void HYDRO_2D::updateChecksum(VariantID vid, size_t tune_idx) +void HYDRO_2D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_zzout, m_array_length, checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_zrout, m_array_length, checksum_scale_factor , vid); + addToChecksum(m_zzout, m_array_length, vid); + addToChecksum(m_zrout, m_array_length, vid); } void HYDRO_2D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_zrout, vid); deallocData(m_zzout, vid); deallocData(m_za, vid); diff --git a/src/lcals/INT_PREDICT.cpp b/src/lcals/INT_PREDICT.cpp index 0d5beaa64..38594a912 100644 --- a/src/lcals/INT_PREDICT.cpp +++ b/src/lcals/INT_PREDICT.cpp @@ -35,6 +35,7 @@ INT_PREDICT::INT_PREDICT(const RunParams& params) setFLOPsPerRep(17 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -65,7 +66,7 @@ void INT_PREDICT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_c0, vid); } -void INT_PREDICT::updateChecksum(VariantID vid, size_t tune_idx) +void INT_PREDICT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { Real_ptr px_host = m_px; @@ -80,7 +81,7 @@ void INT_PREDICT::updateChecksum(VariantID vid, size_t tune_idx) px_host[i] -= m_px_initval; } - checksum[vid][tune_idx] += calcChecksum(px_host, getActualProblemSize(), vid); + addToChecksum(px_host, getActualProblemSize(), vid); if (ds != hds) { copyData(ds, m_px, hds, px_host, m_array_length); @@ -90,7 +91,6 @@ void INT_PREDICT::updateChecksum(VariantID vid, size_t tune_idx) void INT_PREDICT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_px, vid); } diff --git a/src/lcals/PLANCKIAN.cpp b/src/lcals/PLANCKIAN.cpp index 76f7fbedd..aaf77f5a1 100644 --- a/src/lcals/PLANCKIAN.cpp +++ b/src/lcals/PLANCKIAN.cpp @@ -35,6 +35,7 @@ PLANCKIAN::PLANCKIAN(const RunParams& params) setFLOPsPerRep(4 * getActualProblemSize()); // 1 exp setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -56,14 +57,13 @@ void PLANCKIAN::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_w, getActualProblemSize(), 0.0, vid); } -void PLANCKIAN::updateChecksum(VariantID vid, size_t tune_idx) +void PLANCKIAN::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_w, getActualProblemSize(), vid); + addToChecksum(m_w, getActualProblemSize(), vid); } void PLANCKIAN::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_u, vid); diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 9fd6855b2..d4aa8eced 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -37,6 +37,7 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setFLOPsPerRep(2 * (m_N-1)); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -57,14 +58,13 @@ void TRIDIAG_ELIM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_z, m_N, vid); } -void TRIDIAG_ELIM::updateChecksum(VariantID vid, size_t tune_idx) +void TRIDIAG_ELIM::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_xout, m_N, vid); + addToChecksum(m_xout, m_N, vid); } void TRIDIAG_ELIM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_xout, vid); deallocData(m_xin, vid); deallocData(m_y, vid); diff --git a/src/polybench/POLYBENCH_2MM.cpp b/src/polybench/POLYBENCH_2MM.cpp index 90738cc96..aa07da40f 100644 --- a/src/polybench/POLYBENCH_2MM.cpp +++ b/src/polybench/POLYBENCH_2MM.cpp @@ -57,11 +57,11 @@ POLYBENCH_2MM::POLYBENCH_2MM(const RunParams& params) setFLOPsPerRep(3 * m_ni*m_nj*m_nk + 2 * m_ni*m_nj*m_nl ); - checksum_scale_factor = 0.000001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.000001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N_to_the_three_halves); @@ -76,7 +76,6 @@ POLYBENCH_2MM::~POLYBENCH_2MM() void POLYBENCH_2MM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_tmp, m_ni * m_nj, vid); allocAndInitData(m_A, m_ni * m_nk, vid); allocAndInitData(m_B, m_nk * m_nj, vid); @@ -84,14 +83,13 @@ void POLYBENCH_2MM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_D, m_ni * m_nl, 0.0, vid); } -void POLYBENCH_2MM::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_2MM::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_D, m_ni * m_nl, checksum_scale_factor , vid); + addToChecksum(m_D, m_ni * m_nl, vid); } void POLYBENCH_2MM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_tmp, vid); deallocData(m_A, vid); deallocData(m_B, vid); diff --git a/src/polybench/POLYBENCH_3MM.cpp b/src/polybench/POLYBENCH_3MM.cpp index f17e4a999..4149afaf5 100644 --- a/src/polybench/POLYBENCH_3MM.cpp +++ b/src/polybench/POLYBENCH_3MM.cpp @@ -66,11 +66,11 @@ POLYBENCH_3MM::POLYBENCH_3MM(const RunParams& params) 2 * m_nj*m_nl*m_nm + 2 * m_ni*m_nj*m_nl ); - checksum_scale_factor = 0.000000001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.000000001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N_to_the_three_halves); @@ -85,7 +85,6 @@ POLYBENCH_3MM::~POLYBENCH_3MM() void POLYBENCH_3MM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_A, m_ni * m_nk, vid); allocAndInitData(m_B, m_nk * m_nj, vid); allocAndInitData(m_C, m_nj * m_nm, vid); @@ -95,14 +94,13 @@ void POLYBENCH_3MM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_G, m_ni * m_nl, 0.0, vid); } -void POLYBENCH_3MM::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_3MM::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_G, m_ni * m_nl, checksum_scale_factor , vid); + addToChecksum(m_G, m_ni * m_nl, vid); } void POLYBENCH_3MM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_C, vid); diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 3da785dee..f86628caf 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -49,11 +49,11 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) setFLOPsPerRep( (13 + 2) * (m_n-2)*(m_n-2) + (13 + 2) * (m_n-2)*(m_n-2) ); - checksum_scale_factor = 0.0000001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0000001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -74,14 +74,13 @@ void POLYBENCH_ADI::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitData(m_Q, m_n * m_n, vid); } -void POLYBENCH_ADI::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_ADI::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_U, m_n * m_n, checksum_scale_factor , vid); + addToChecksum(m_U, m_n * m_n, vid); } void POLYBENCH_ADI::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_U, vid); deallocData(m_V, vid); deallocData(m_P, vid); diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 1d28e2d14..ab37d6c0f 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -48,11 +48,11 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setFLOPsPerRep(2 * m_N*m_N + 2 * m_N*m_N ); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -67,21 +67,19 @@ POLYBENCH_ATAX::~POLYBENCH_ATAX() void POLYBENCH_ATAX::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_tmp, m_N, vid); allocAndInitData(m_x, m_N, vid); allocAndInitData(m_A, m_N * m_N, vid); allocAndInitDataConst(m_y, m_N, 0.0, vid); } -void POLYBENCH_ATAX::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_ATAX::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_y, m_N, checksum_scale_factor , vid); + addToChecksum(m_y, m_N, vid); } void POLYBENCH_ATAX::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_tmp, vid); deallocData(m_x, vid); deallocData(m_y, vid); diff --git a/src/polybench/POLYBENCH_FDTD_2D.cpp b/src/polybench/POLYBENCH_FDTD_2D.cpp index fe3929ba2..103e03d86 100644 --- a/src/polybench/POLYBENCH_FDTD_2D.cpp +++ b/src/polybench/POLYBENCH_FDTD_2D.cpp @@ -71,11 +71,11 @@ POLYBENCH_FDTD_2D::POLYBENCH_FDTD_2D(const RunParams& params) 3 * m_nx*(m_ny-1) + 5 * (m_nx-1)*(m_ny-1) ); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -96,14 +96,13 @@ void POLYBENCH_FDTD_2D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx allocAndInitData(m_fict, m_tsteps, vid); } -void POLYBENCH_FDTD_2D::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_FDTD_2D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_hz, m_nx * m_ny, checksum_scale_factor, vid); + addToChecksum(m_hz, m_nx * m_ny, vid); } void POLYBENCH_FDTD_2D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_fict, vid); deallocData(m_ex, vid); deallocData(m_ey, vid); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 0f01afdea..8d17eb974 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -39,11 +39,11 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep( m_N*m_N*m_N * 3 / 2 ); // conditional is true about half of the time - checksum_scale_factor = 1.0 * + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); + setChecksumScaleFactor(1.0 * ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes FP ops get the same answer across platforms + getActualProblemSize() )); setComplexity(Complexity::N_to_the_three_halves); @@ -58,19 +58,17 @@ POLYBENCH_FLOYD_WARSHALL::~POLYBENCH_FLOYD_WARSHALL() void POLYBENCH_FLOYD_WARSHALL::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitDataRandSign(m_pin, m_N*m_N, vid); allocAndInitDataConst(m_pout, m_N*m_N, 0.0, vid); } -void POLYBENCH_FLOYD_WARSHALL::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_FLOYD_WARSHALL::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_pout, m_N*m_N, checksum_scale_factor , vid); + addToChecksum(m_pout, m_N*m_N, vid); } void POLYBENCH_FLOYD_WARSHALL::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_pin, vid); deallocData(m_pout, vid); } diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index 9ef30c647..bb0cb7382 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -48,11 +48,11 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setFLOPsPerRep((1 + 3 * m_nk) * m_ni*m_nj); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N_to_the_three_halves); @@ -67,20 +67,18 @@ POLYBENCH_GEMM::~POLYBENCH_GEMM() void POLYBENCH_GEMM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_A, m_ni * m_nk, vid); allocAndInitData(m_B, m_nk * m_nj, vid); allocAndInitDataConst(m_C, m_ni * m_nj, 0.0, vid); } -void POLYBENCH_GEMM::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_GEMM::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_C, m_ni * m_nj, checksum_scale_factor , vid); + addToChecksum(m_C, m_ni * m_nj, vid); } void POLYBENCH_GEMM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_C, vid); diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 82d584c0e..a08d78621 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -68,11 +68,11 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) 1 * m_n + 3 * m_n*m_n ); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -88,8 +88,6 @@ POLYBENCH_GEMVER::~POLYBENCH_GEMVER() void POLYBENCH_GEMVER::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; - allocAndInitData(m_A, m_n * m_n, vid); allocAndInitData(m_u1, m_n, vid); allocAndInitData(m_v1, m_n, vid); @@ -101,14 +99,13 @@ void POLYBENCH_GEMVER::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) allocAndInitData(m_z, m_n, vid); } -void POLYBENCH_GEMVER::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_GEMVER::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_w, m_n, checksum_scale_factor , vid); + addToChecksum(m_w, m_n, vid); } void POLYBENCH_GEMVER::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_u1, vid); deallocData(m_v1, vid); diff --git a/src/polybench/POLYBENCH_GESUMMV.cpp b/src/polybench/POLYBENCH_GESUMMV.cpp index 690f950f8..5cd28bc1b 100644 --- a/src/polybench/POLYBENCH_GESUMMV.cpp +++ b/src/polybench/POLYBENCH_GESUMMV.cpp @@ -45,6 +45,7 @@ POLYBENCH_GESUMMV::POLYBENCH_GESUMMV(const RunParams& params) 3 ) * m_N ); setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -59,21 +60,19 @@ POLYBENCH_GESUMMV::~POLYBENCH_GESUMMV() void POLYBENCH_GESUMMV::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_x, m_N, vid); allocAndInitDataConst(m_y, m_N, 0.0, vid); allocAndInitData(m_A, m_N * m_N, vid); allocAndInitData(m_B, m_N * m_N, vid); } -void POLYBENCH_GESUMMV::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_GESUMMV::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_y, m_N, vid); + addToChecksum(m_y, m_N, vid); } void POLYBENCH_GESUMMV::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x, vid); deallocData(m_y, vid); deallocData(m_A, vid); diff --git a/src/polybench/POLYBENCH_HEAT_3D.cpp b/src/polybench/POLYBENCH_HEAT_3D.cpp index cf7da4db6..4abebae5f 100644 --- a/src/polybench/POLYBENCH_HEAT_3D.cpp +++ b/src/polybench/POLYBENCH_HEAT_3D.cpp @@ -45,11 +45,11 @@ POLYBENCH_HEAT_3D::POLYBENCH_HEAT_3D(const RunParams& params) setFLOPsPerRep( 15 * (m_N-2) * (m_N-2) * (m_N-2) + 15 * (m_N-2) * (m_N-2) * (m_N-2) ); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -64,22 +64,20 @@ POLYBENCH_HEAT_3D::~POLYBENCH_HEAT_3D() void POLYBENCH_HEAT_3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_Ainit, m_N*m_N*m_N, vid); allocAndInitData(m_Binit, m_N*m_N*m_N, vid); allocData(m_A, m_N*m_N*m_N, vid); allocData(m_B, m_N*m_N*m_N, vid); } -void POLYBENCH_HEAT_3D::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_HEAT_3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_A, m_N*m_N*m_N, checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_B, m_N*m_N*m_N, checksum_scale_factor , vid); + addToChecksum(m_A, m_N*m_N*m_N, vid); + addToChecksum(m_B, m_N*m_N*m_N, vid); } void POLYBENCH_HEAT_3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_Ainit, vid); diff --git a/src/polybench/POLYBENCH_JACOBI_1D.cpp b/src/polybench/POLYBENCH_JACOBI_1D.cpp index ce587a6d7..68a99a0e9 100644 --- a/src/polybench/POLYBENCH_JACOBI_1D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_1D.cpp @@ -44,11 +44,16 @@ POLYBENCH_JACOBI_1D::POLYBENCH_JACOBI_1D(const RunParams& params) setFLOPsPerRep( 3 * (m_N-2) + 3 * (m_N-2) ); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); +#if defined(RAJA_ENABLE_TARGET_OPENMP) + // TODO: base omp target variant result is off + setChecksumTolerance(ChecksumTolerance::loose); +#else + setChecksumTolerance(ChecksumTolerance::normal); +#endif + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -63,22 +68,20 @@ POLYBENCH_JACOBI_1D::~POLYBENCH_JACOBI_1D() void POLYBENCH_JACOBI_1D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_Ainit, m_N, vid); allocAndInitData(m_Binit, m_N, vid); allocData(m_A, m_N, vid); allocData(m_B, m_N, vid); } -void POLYBENCH_JACOBI_1D::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_JACOBI_1D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_A, m_N, checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_B, m_N, checksum_scale_factor , vid); + addToChecksum(m_A, m_N, vid); + addToChecksum(m_B, m_N, vid); } void POLYBENCH_JACOBI_1D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_Ainit, vid); diff --git a/src/polybench/POLYBENCH_JACOBI_2D.cpp b/src/polybench/POLYBENCH_JACOBI_2D.cpp index 3630d765b..6c4ba84d3 100644 --- a/src/polybench/POLYBENCH_JACOBI_2D.cpp +++ b/src/polybench/POLYBENCH_JACOBI_2D.cpp @@ -44,11 +44,11 @@ POLYBENCH_JACOBI_2D::POLYBENCH_JACOBI_2D(const RunParams& params) setFLOPsPerRep( 5 * (m_N-2)*(m_N-2) + 5 * (m_N-2)*(m_N-2) ); - checksum_scale_factor = 0.0001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(0.0001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -63,22 +63,20 @@ POLYBENCH_JACOBI_2D::~POLYBENCH_JACOBI_2D() void POLYBENCH_JACOBI_2D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_Ainit, m_N*m_N, vid); allocAndInitData(m_Binit, m_N*m_N, vid); allocData(m_A, m_N*m_N, vid); allocData(m_B, m_N*m_N, vid); } -void POLYBENCH_JACOBI_2D::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_JACOBI_2D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_A, m_N*m_N, checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_B, m_N*m_N, checksum_scale_factor , vid); + addToChecksum(m_A, m_N*m_N, vid); + addToChecksum(m_B, m_N*m_N, vid); } void POLYBENCH_JACOBI_2D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_A, vid); deallocData(m_B, vid); deallocData(m_Ainit, vid); diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 2292f6bc3..6ff9d816e 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -46,11 +46,11 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setFLOPsPerRep(2 * m_N*m_N + 2 * m_N*m_N ); - checksum_scale_factor = 1.0 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); // Change to Inconsistent if internal reductions use atomics + setChecksumTolerance(ChecksumTolerance::normal); + setChecksumScaleFactor(1.0 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -65,7 +65,6 @@ POLYBENCH_MVT::~POLYBENCH_MVT() void POLYBENCH_MVT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; allocAndInitData(m_y1, m_N, vid); allocAndInitData(m_y2, m_N, vid); allocAndInitData(m_A, m_N * m_N, vid); @@ -73,15 +72,14 @@ void POLYBENCH_MVT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_x2, m_N, 0.0, vid); } -void POLYBENCH_MVT::updateChecksum(VariantID vid, size_t tune_idx) +void POLYBENCH_MVT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_x1, m_N, checksum_scale_factor , vid); - checksum[vid][tune_idx] += calcChecksum(m_x2, m_N, checksum_scale_factor , vid); + addToChecksum(m_x1, m_N, vid); + addToChecksum(m_x2, m_N, vid); } void POLYBENCH_MVT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_x1, vid); deallocData(m_x2, vid); deallocData(m_y1, vid); diff --git a/src/stream/ADD.cpp b/src/stream/ADD.cpp index bec04786c..73e1bc51f 100644 --- a/src/stream/ADD.cpp +++ b/src/stream/ADD.cpp @@ -34,7 +34,8 @@ ADD::ADD(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes FP ops get the same answer across platforms + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -54,14 +55,13 @@ void ADD::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_c, getActualProblemSize(), 0.0, vid); } -void ADD::updateChecksum(VariantID vid, size_t tune_idx) +void ADD::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_c, getActualProblemSize(), vid); + addToChecksum(m_c, getActualProblemSize(), vid); } void ADD::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); deallocData(m_b, vid); deallocData(m_c, vid); diff --git a/src/stream/COPY.cpp b/src/stream/COPY.cpp index 71037f6cf..9cad4fb2e 100644 --- a/src/stream/COPY.cpp +++ b/src/stream/COPY.cpp @@ -35,6 +35,7 @@ COPY::COPY(const RunParams& params) setFLOPsPerRep(0); setChecksumConsistency(ChecksumConsistency::Consistent); + setChecksumTolerance(ChecksumTolerance::zero); setComplexity(Complexity::N); @@ -53,14 +54,13 @@ void COPY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) allocAndInitDataConst(m_c, getActualProblemSize(), 0.0, vid); } -void COPY::updateChecksum(VariantID vid, size_t tune_idx) +void COPY::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_c, getActualProblemSize(), vid); + addToChecksum(m_c, getActualProblemSize(), vid); } void COPY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); deallocData(m_c, vid); } diff --git a/src/stream/DOT.cpp b/src/stream/DOT.cpp index 34a0ca5ef..fe367c692 100644 --- a/src/stream/DOT.cpp +++ b/src/stream/DOT.cpp @@ -35,6 +35,7 @@ DOT::DOT(const RunParams& params) setFLOPsPerRep(2 * getActualProblemSize()); setChecksumConsistency(ChecksumConsistency::Inconsistent); + setChecksumTolerance(ChecksumTolerance::normal); setComplexity(Complexity::N); @@ -57,14 +58,13 @@ void DOT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) m_dot_init = 0.0; } -void DOT::updateChecksum(VariantID vid, size_t tune_idx) +void DOT::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += m_dot; + addToChecksum(m_dot); } void DOT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); deallocData(m_b, vid); } diff --git a/src/stream/MUL.cpp b/src/stream/MUL.cpp index 5ccac63c0..710827fd1 100644 --- a/src/stream/MUL.cpp +++ b/src/stream/MUL.cpp @@ -34,7 +34,8 @@ MUL::MUL(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(1 * getActualProblemSize()); - setChecksumConsistency(ChecksumConsistency::Consistent); // assumes FP ops get the same answer across platforms + setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); setComplexity(Complexity::N); @@ -54,14 +55,13 @@ void MUL::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_alpha, vid); } -void MUL::updateChecksum(VariantID vid, size_t tune_idx) +void MUL::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_b, getActualProblemSize(), vid); + addToChecksum(m_b, getActualProblemSize(), vid); } void MUL::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_b, vid); deallocData(m_c, vid); } diff --git a/src/stream/TRIAD.cpp b/src/stream/TRIAD.cpp index e66f5188e..2baeeda57 100644 --- a/src/stream/TRIAD.cpp +++ b/src/stream/TRIAD.cpp @@ -34,11 +34,11 @@ TRIAD::TRIAD(const RunParams& params) setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(2 * getActualProblemSize()); - checksum_scale_factor = 0.001 * - ( static_cast(getDefaultProblemSize()) / - getActualProblemSize() ); - setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning); + setChecksumTolerance(ChecksumTolerance::tight); + setChecksumScaleFactor(0.001 * + ( static_cast(getDefaultProblemSize()) / + getActualProblemSize() )); setComplexity(Complexity::N); @@ -59,14 +59,13 @@ void TRIAD::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) initData(m_alpha, vid); } -void TRIAD::updateChecksum(VariantID vid, size_t tune_idx) +void TRIAD::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - checksum[vid][tune_idx] += calcChecksum(m_a, getActualProblemSize(), checksum_scale_factor , vid); + addToChecksum(m_a, getActualProblemSize(), vid); } void TRIAD::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { - (void) vid; deallocData(m_a, vid); deallocData(m_b, vid); deallocData(m_c, vid); diff --git a/test/test-raja-perf-suite.cpp b/test/test-raja-perf-suite.cpp index 1ad403a8a..f96960dd7 100644 --- a/test/test-raja-perf-suite.cpp +++ b/test/test-raja-perf-suite.cpp @@ -50,9 +50,6 @@ int main( int argc, char** argv ) TEST(ShortSuiteTest, Basic) { - // default checksum tolerance for test pass/fail - rajaperf::Checksum_type chksum_tol = 1e-7; - // Assemble command line args for basic test std::vector< std::string > sargv{}; @@ -76,10 +73,6 @@ TEST(ShortSuiteTest, Basic) #if !defined(_WIN32) #if defined(RAJA_ENABLE_TARGET_OPENMP) - // checksum tolerance reduced b/c bas omp target variant of JACOBI_1D - // kernel result is off - chksum_tol = 5e-6; - sargv.emplace_back(std::string("--exclude-kernels")); sargv.emplace_back(std::string("Comm")); sargv.emplace_back(std::string("EDGE3D")); @@ -134,27 +127,7 @@ TEST(ShortSuiteTest, Basic) rajaperf::KernelBase* kernel = kernels[ik]; - // - // Get reference checksum (first kernel variant run) - // - rajaperf::Checksum_type cksum_ref = 0.0; - size_t ivck = 0; - bool found_ref = false; - while ( ivck < variant_ids.size() && !found_ref ) { - - rajaperf::VariantID vid = variant_ids[ivck]; - size_t num_tunings = kernel->getNumVariantTunings(vid); - for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) { - if ( kernel->wasVariantTuningRun(vid, tune_idx) ) { - cksum_ref = kernel->getChecksum(vid, tune_idx); - found_ref = true; - break; - } - } - ++ivck; - - } // while loop over variants until reference checksum found - + rajaperf::Checksum_type cksum_tol = kernel->getChecksumTolerance(); // // Check execution time is greater than zero and checksum diff is @@ -170,8 +143,7 @@ TEST(ShortSuiteTest, Basic) double rtime = kernel->getTotTime(vid, tune_idx); - rajaperf::Checksum_type cksum = kernel->getChecksum(vid, tune_idx); - rajaperf::Checksum_type cksum_diff = std::abs(cksum_ref - cksum); + rajaperf::Checksum_type cksum_diff = kernel->getChecksumMaxDifference(vid, tune_idx); // Print kernel information when running test manually std::cout << "Check kernel, variant, tuning : " @@ -180,7 +152,7 @@ TEST(ShortSuiteTest, Basic) << kernel->getVariantTuningName(vid, tune_idx) << std::endl; EXPECT_GT(rtime, 0.0); - EXPECT_LT(cksum_diff, chksum_tol); + EXPECT_LE(cksum_diff, cksum_tol); } } diff --git a/tpl/RAJA b/tpl/RAJA index 725dcc1e5..ecc4047f1 160000 --- a/tpl/RAJA +++ b/tpl/RAJA @@ -1 +1 @@ -Subproject commit 725dcc1e5f74c6c316b1614d3afae7e89526c764 +Subproject commit ecc4047f197f5ad45aa41f8a47b11d5e34ba0978