Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
610c16c
Add checksum tolerance to KernelBase
MrBurmark Dec 19, 2025
e1fad9a
Use checksum tolerance in outputs
MrBurmark Dec 19, 2025
cbdaf94
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Dec 19, 2025
eaa7635
Use a setter for checksum_tolerance
MrBurmark Dec 26, 2025
e45f8a9
Fix use of local checksum_scale_factor in EDGE3D
MrBurmark Dec 26, 2025
9e13e4b
Use setChecksumScaleFactor
MrBurmark Dec 26, 2025
5acfc23
Hide checksum_scale_factor in KernelBase
MrBurmark Dec 26, 2025
bfe597c
Update checksum documentation
MrBurmark Dec 26, 2025
f837902
Unremove POLYBENCH_FLOYD_WARSHALL checksum scale factor
MrBurmark Dec 26, 2025
e38359e
Print checksum tolerance to checksum output file
MrBurmark Dec 26, 2025
d018d0d
Only print pass/fail to -sp output
MrBurmark Dec 26, 2025
a3494c6
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 5, 2026
e71b6fe
divide checksum by number of execs
MrBurmark Jan 5, 2026
e4e85cd
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 5, 2026
110971e
Change how we add to checksum and get checksum
MrBurmark Jan 6, 2026
32db89f
Use DataSpace::Host instead of Base_Seq to get host memory
MrBurmark Jan 6, 2026
0376855
Fix wrong space used in addToChecksum
MrBurmark Jan 6, 2026
342bff4
Use RAJAPERF_UNUSED_ARG instead of casting to void
MrBurmark Jan 6, 2026
a71aa17
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 6, 2026
5abcdc3
Use RAJA::KahanSum in calcChecksumImpl
MrBurmark Jan 6, 2026
641673e
Use the first pass for the reference checksum
MrBurmark Jan 6, 2026
407a462
Use RAJA develop
MrBurmark Jan 6, 2026
7f7508c
Merge branch 'develop' into feature/burmark1/correctness
MrBurmark Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions TODO/WIP-COUPLE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,19 +177,17 @@ void COUPLE::runOpenMPVariant(VariantID vid)
RAJAPERF_DEFAULT_TUNING_DEFINE_BOILERPLATE(COUPLE, OpenMP, Base_OpenMP, RAJA_OpenMP)


void COUPLE::updateChecksum(VariantID vid, size_t tune_idx)
void COUPLE::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
Index_type max_loop_index = m_domain->lrn;

checksum[vid][tune_idx] += calcChecksum(m_t0, max_loop_index, vid);
checksum[vid][tune_idx] += calcChecksum(m_t1, max_loop_index, vid);
checksum[vid][tune_idx] += calcChecksum(m_t2, max_loop_index, vid);
addToChecksum(m_t0, max_loop_index, vid);
addToChecksum(m_t1, max_loop_index, vid);
addToChecksum(m_t2, max_loop_index, vid);
}

void COUPLE::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

deallocData(m_t0, vid);
deallocData(m_t1, vid);
deallocData(m_t2, vid);
Expand Down
24 changes: 18 additions & 6 deletions docs/sphinx/dev_guide/kernel_class_impl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,24 @@ The methods in the source file are:
kernel execution.
* The number of floating point operations (FLOPS) performed for each
kernel execution.
* The consistency of the checksums of the kernel. The possible values are
``Consistent`` where all the variant tunings always get the same checksum,
``ConsistentPerVariantTuning`` where an individual variant tuning always
gets the same checksum but different variant tunings may differ
slightly, and ``Inconsistent`` where the checksum of a variant tuning
may vary slightly run to run.
* The consistency of the checksums of the kernel. If the kernel
always produces the same checksum value for all variant tunings then the
checksums are ``Consistent``. Most kernels get a different but consistent
checksum for each variant tuning so the checksums are
``ConsistentPerVariantTuning``. On the other hand, some kernels have
variant tunings that get different checksums on each run of that variant
tuning, for example due to the ordering of floating-point atomic add
operations, so the checksums are ``Inconsistent``.
* The tolerance of the checksums of the kernel. A number of predefined
values are available in the ``KernelBase\:\:ChecksumTolerance`` class. If
the kernel consistently produces the same checksums then ``zero`` tolerance
is used. Most kernels use the ``normal`` tolerance. Some kernels are very
simple, for example they have a single floating-point operation per
iteration, so they use the ``tight`` tolerance.
* The scale factor to use with the checksums of the kernel. This is an
arbitrary multiplier on the checksum values used to scale the checksums
to a desired range. Mostly used for kernels with floating-point
operation complexity that does not scale linearly with problem size.
* The operational complexity of the kernel, where N is the *problem size*
of the kernel.
* Which RAJA features the kernel exercises.
Expand Down
9 changes: 5 additions & 4 deletions src/algorithm/ATOMIC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ ATOMIC::ATOMIC(const RunParams& params)
setFLOPsPerRep(getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Inconsistent); // atomics
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N);

Expand All @@ -54,14 +55,14 @@ void ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
m_final = -static_cast<int>(vid);
}

void ATOMIC::updateChecksum(VariantID vid, size_t tune_idx)
void ATOMIC::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += static_cast<Checksum_type>(m_final);
addToChecksum(m_final);
}

void ATOMIC::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
void ATOMIC::tearDown(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

}

} // end namespace algorithm
Expand Down
6 changes: 3 additions & 3 deletions src/algorithm/HISTOGRAM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ HISTOGRAM::HISTOGRAM(const RunParams& params)
setFLOPsPerRep( (std::is_floating_point_v<Data_type> ? 1 : 0) * getActualProblemSize() );

setChecksumConsistency(ChecksumConsistency::Consistent); // integer arithmetic
setChecksumTolerance(ChecksumTolerance::zero);

setComplexity(Complexity::N);

Expand Down Expand Up @@ -119,14 +120,13 @@ void HISTOGRAM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(DataSpace::Host, m_counts_final, m_num_bins, static_cast<Data_type>(0));
}

void HISTOGRAM::updateChecksum(VariantID vid, size_t tune_idx)
void HISTOGRAM::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(DataSpace::Host, m_counts_final, m_num_bins);
addToChecksum(DataSpace::Host, m_counts_final, m_num_bins);
}

void HISTOGRAM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_bins, vid);
deallocData(DataSpace::Host, m_counts_init);
deallocData(DataSpace::Host, m_counts_final);
Expand Down
6 changes: 3 additions & 3 deletions src/algorithm/MEMCPY.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ MEMCPY::MEMCPY(const RunParams& params)
setFLOPsPerRep(0);

setChecksumConsistency(ChecksumConsistency::Consistent);
setChecksumTolerance(ChecksumTolerance::zero);

setComplexity(Complexity::N);

Expand All @@ -53,14 +54,13 @@ void MEMCPY::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(m_y, getActualProblemSize(), -1.234567e89, vid);
}

void MEMCPY::updateChecksum(VariantID vid, size_t tune_idx)
void MEMCPY::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid].at(tune_idx) += calcChecksum(m_y, getActualProblemSize(), vid);
addToChecksum(m_y, getActualProblemSize(), vid);
}

void MEMCPY::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
deallocData(m_y, vid);
}
Expand Down
6 changes: 3 additions & 3 deletions src/algorithm/MEMSET.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ MEMSET::MEMSET(const RunParams& params)
setFLOPsPerRep(0);

setChecksumConsistency(ChecksumConsistency::Consistent);
setChecksumTolerance(ChecksumTolerance::zero);

setComplexity(Complexity::N);

Expand All @@ -53,14 +54,13 @@ void MEMSET::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
m_val = 0.0;
}

void MEMSET::updateChecksum(VariantID vid, size_t tune_idx)
void MEMSET::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid].at(tune_idx) += calcChecksum(m_x, getActualProblemSize(), vid);
addToChecksum(m_x, getActualProblemSize(), vid);
}

void MEMSET::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
}

Expand Down
6 changes: 3 additions & 3 deletions src/algorithm/REDUCE_SUM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ REDUCE_SUM::REDUCE_SUM(const RunParams& params)
setFLOPsPerRep(getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Inconsistent); // Reduction may use atomics
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N);

Expand All @@ -55,14 +56,13 @@ void REDUCE_SUM::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
m_sum = 0.0;
}

void REDUCE_SUM::updateChecksum(VariantID vid, size_t tune_idx)
void REDUCE_SUM::updateChecksum(VariantID RAJAPERF_UNUSED_ARG(vid), size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid].at(tune_idx) += calcChecksum(&m_sum, 1, vid);
addToChecksum(m_sum);
}

void REDUCE_SUM::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
}

Expand Down
13 changes: 6 additions & 7 deletions src/algorithm/SCAN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ SCAN::SCAN(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(1 * getActualProblemSize());

checksum_scale_factor = 1e-2 *
setChecksumConsistency(ChecksumConsistency::Inconsistent); // could depend on scheduling, this may be overly conservative
setChecksumTolerance(ChecksumTolerance::normal);
setChecksumScaleFactor(1e-2 *
( static_cast<Checksum_type>(getDefaultProblemSize()) /
getActualProblemSize() ) /
getActualProblemSize();

setChecksumConsistency(ChecksumConsistency::Inconsistent); // could depend on scheduling, this may be overly conservative
getActualProblemSize());

setComplexity(Complexity::N);

Expand All @@ -58,14 +58,13 @@ void SCAN::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(m_y, getActualProblemSize(), 0.0, vid);
}

void SCAN::updateChecksum(VariantID vid, size_t tune_idx)
void SCAN::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_y, getActualProblemSize(), checksum_scale_factor, vid);
addToChecksum(m_y, getActualProblemSize(), vid);
}

void SCAN::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
deallocData(m_y, vid);
}
Expand Down
8 changes: 4 additions & 4 deletions src/algorithm/SORT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ SORT::SORT(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

setChecksumConsistency(ChecksumConsistency::Consistent); // // sort is not stable but values are equal if equivalent
setChecksumConsistency(ChecksumConsistency::Consistent); // sort is not stable but values are equal if equivalent
setChecksumTolerance(ChecksumTolerance::zero);

setComplexity(Complexity::N_logN);

Expand All @@ -53,14 +54,13 @@ void SORT::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataRandValue(m_x, getActualProblemSize()*getRunReps(), vid);
}

void SORT::updateChecksum(VariantID vid, size_t tune_idx)
void SORT::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize()*getRunReps(), vid);
addToChecksum(m_x, getActualProblemSize()*getRunReps(), vid);
}

void SORT::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
}

Expand Down
8 changes: 4 additions & 4 deletions src/algorithm/SORTPAIRS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ SORTPAIRS::SORTPAIRS(const RunParams& params)
setFLOPsPerRep(0);

setChecksumConsistency(ChecksumConsistency::Inconsistent); // sort is not stable and could depend on scheduling
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N_logN);

Expand All @@ -54,15 +55,14 @@ void SORTPAIRS::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataRandValue(m_i, getActualProblemSize()*getRunReps(), vid);
}

void SORTPAIRS::updateChecksum(VariantID vid, size_t tune_idx)
void SORTPAIRS::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_x, getActualProblemSize()*getRunReps(), vid);
checksum[vid][tune_idx] += calcChecksum(m_i, getActualProblemSize()*getRunReps(), vid);
addToChecksum(m_x, getActualProblemSize()*getRunReps(), vid);
addToChecksum(m_i, getActualProblemSize()*getRunReps(), vid);
}

void SORTPAIRS::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;
deallocData(m_x, vid);
deallocData(m_i, vid);
}
Expand Down
7 changes: 3 additions & 4 deletions src/apps/CONVECTION3DPA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params)
));

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N);

Expand All @@ -76,15 +77,13 @@ void CONVECTION3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(m_Y, Index_type(CPA_D1D*CPA_D1D*CPA_D1D*m_NE), Real_type(0.0), vid);
}

void CONVECTION3DPA::updateChecksum(VariantID vid, size_t tune_idx)
void CONVECTION3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_Y, CPA_D1D*CPA_D1D*CPA_D1D*m_NE, vid);
addToChecksum(m_Y, CPA_D1D*CPA_D1D*CPA_D1D*m_NE, vid);
}

void CONVECTION3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

deallocData(m_B, vid);
deallocData(m_Bt, vid);
deallocData(m_G, vid);
Expand Down
7 changes: 3 additions & 4 deletions src/apps/DEL_DOT_VEC_2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ DEL_DOT_VEC_2D::DEL_DOT_VEC_2D(const RunParams& params)
setFLOPsPerRep(54 * m_domain->n_real_zones);

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N);

Expand Down Expand Up @@ -79,15 +80,13 @@ void DEL_DOT_VEC_2D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
m_half = 0.5;
}

void DEL_DOT_VEC_2D::updateChecksum(VariantID vid, size_t tune_idx)
void DEL_DOT_VEC_2D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_div, m_array_length, vid);
addToChecksum(m_div, m_array_length, vid);
}

void DEL_DOT_VEC_2D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

deallocData(m_x, vid);
deallocData(m_y, vid);
deallocData(m_real_zones, vid);
Expand Down
7 changes: 3 additions & 4 deletions src/apps/DIFFUSION3DPA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params)
3 * DPA_D1D * DPA_D1D * DPA_D1D));

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);
setChecksumTolerance(ChecksumTolerance::normal);

setComplexity(Complexity::N);

Expand All @@ -76,15 +77,13 @@ void DIFFUSION3DPA::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(m_Y, Index_type(DPA_D1D*DPA_D1D*DPA_D1D*m_NE), Real_type(0.0), vid);
}

void DIFFUSION3DPA::updateChecksum(VariantID vid, size_t tune_idx)
void DIFFUSION3DPA::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_Y, DPA_D1D*DPA_D1D*DPA_D1D*m_NE, vid);
addToChecksum(m_Y, DPA_D1D*DPA_D1D*DPA_D1D*m_NE, vid);
}

void DIFFUSION3DPA::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

deallocData(m_B, vid);
deallocData(m_G, vid);
deallocData(m_D, vid);
Expand Down
12 changes: 6 additions & 6 deletions src/apps/EDGE3D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ EDGE3D::EDGE3D(const RunParams& params)

setFLOPsPerRep(number_of_elements * flops_per_element);

m_checksum_scale_factor = 0.001 *
( static_cast<Checksum_type>(getDefaultProblemSize()) /
getActualProblemSize() );

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);
setChecksumTolerance(ChecksumTolerance::normal);
setChecksumScaleFactor(0.001 *
( static_cast<Checksum_type>(getDefaultProblemSize()) /
getActualProblemSize() ));

setComplexity(Complexity::N);

Expand Down Expand Up @@ -91,9 +91,9 @@ void EDGE3D::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
allocAndInitDataConst(m_sum, m_array_length, Real_type(0.0), vid);
}

void EDGE3D::updateChecksum(VariantID vid, size_t tune_idx)
void EDGE3D::updateChecksum(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
checksum[vid][tune_idx] += calcChecksum(m_sum, m_array_length, m_checksum_scale_factor, vid );
addToChecksum(m_sum, m_array_length, vid);
}

void EDGE3D::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
Expand Down
2 changes: 0 additions & 2 deletions src/apps/EDGE3D.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,8 +441,6 @@ class EDGE3D : public KernelBase

ADomain* m_domain;
Index_type m_array_length;

Real_type m_checksum_scale_factor;
};

} // end namespace apps
Expand Down
Loading