Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
610c16c
Add checksum tolerance to KernelBase
MrBurmark Dec 19, 2025
e1fad9a
Use checksum tolerance in outputs
MrBurmark Dec 19, 2025
cbdaf94
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Dec 19, 2025
eaa7635
Use a setter for checksum_tolerance
MrBurmark Dec 26, 2025
e45f8a9
Fix use of local checksum_scale_factor in EDGE3D
MrBurmark Dec 26, 2025
9e13e4b
Use setChecksumScaleFactor
MrBurmark Dec 26, 2025
5acfc23
Hide checksum_scale_factor in KernelBase
MrBurmark Dec 26, 2025
bfe597c
Update checksum documentation
MrBurmark Dec 26, 2025
f837902
Unremove POLYBENCH_FLOYD_WARSHALL checksum scale factor
MrBurmark Dec 26, 2025
e38359e
Print checksum tolerance to checksum output file
MrBurmark Dec 26, 2025
d018d0d
Only print pass/fail to -sp output
MrBurmark Dec 26, 2025
a3494c6
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 5, 2026
e71b6fe
divide checksum by number of execs
MrBurmark Jan 5, 2026
e4e85cd
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 5, 2026
110971e
Change how we add to checksum and get checksum
MrBurmark Jan 6, 2026
32db89f
Use DataSpace::Host instead of Base_Seq to get host memory
MrBurmark Jan 6, 2026
0376855
Fix wrong space used in addToChecksum
MrBurmark Jan 6, 2026
342bff4
Use RAJAPERF_UNUSED_ARG instead of casting to void
MrBurmark Jan 6, 2026
a71aa17
Merge branch 'develop' of github.com:LLNL/RAJAPerf into feature/burma…
MrBurmark Jan 6, 2026
5abcdc3
Use RAJA::KahanSum in calcChecksumImpl
MrBurmark Jan 6, 2026
641673e
Use the first pass for the reference checksum
MrBurmark Jan 6, 2026
407a462
Use RAJA develop
MrBurmark Jan 6, 2026
7f7508c
Merge branch 'develop' into feature/burmark1/correctness
MrBurmark Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/algorithm/HISTOGRAM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ HISTOGRAM::HISTOGRAM(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep( (std::is_floating_point_v<Data_type> ? 1 : 0) * getActualProblemSize() );

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent); // integer arithmetic

setComplexity(Complexity::N);
Expand Down
2 changes: 2 additions & 0 deletions src/algorithm/MEMCPY.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ MEMCPY::MEMCPY(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
2 changes: 2 additions & 0 deletions src/algorithm/MEMSET.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ MEMSET::MEMSET(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
4 changes: 3 additions & 1 deletion src/algorithm/SORT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ SORT::SORT(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

setChecksumConsistency(ChecksumConsistency::Consistent); // // sort is not stable but values are equal if equivalent
checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent); // sort is not stable but values are equal if equivalent

setComplexity(Complexity::N_logN);

Expand Down
2 changes: 2 additions & 0 deletions src/basic/COPY8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ COPY8::COPY8(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
2 changes: 2 additions & 0 deletions src/basic/EMPTY.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ EMPTY::EMPTY(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep( 0 );

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
2 changes: 2 additions & 0 deletions src/basic/INDEXLIST.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ INDEXLIST::INDEXLIST(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
2 changes: 2 additions & 0 deletions src/basic/INDEXLIST_3LOOP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N);
Expand Down
4 changes: 3 additions & 1 deletion src/basic/INIT_VIEW1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(1 * getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Consistent);
checksum_tolerance = very_tight_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);

setComplexity(Complexity::N);

Expand Down
4 changes: 3 additions & 1 deletion src/basic/INIT_VIEW1D_OFFSET.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(1 * getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Consistent);
checksum_tolerance = very_tight_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);

setComplexity(Complexity::N);

Expand Down
4 changes: 3 additions & 1 deletion src/basic/MULADDSUB.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ MULADDSUB::MULADDSUB(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(3 * getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Consistent);
checksum_tolerance = tight_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);

setComplexity(Complexity::N);

Expand Down
4 changes: 3 additions & 1 deletion src/basic/NESTED_INIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ NESTED_INIT::NESTED_INIT(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(3 * getActualProblemSize());

setChecksumConsistency(ChecksumConsistency::Consistent);
checksum_tolerance = very_tight_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::ConsistentPerVariantTuning);

setComplexity(Complexity::N);

Expand Down
2 changes: 2 additions & 0 deletions src/comm/HALO_EXCHANGE.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ HALO_EXCHANGE::HALO_EXCHANGE(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N_to_the_two_thirds);
Expand Down
2 changes: 2 additions & 0 deletions src/comm/HALO_EXCHANGE_FUSED.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ HALO_EXCHANGE_FUSED::HALO_EXCHANGE_FUSED(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N_to_the_two_thirds);
Expand Down
2 changes: 2 additions & 0 deletions src/comm/HALO_PACKING.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ HALO_PACKING::HALO_PACKING(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N_to_the_two_thirds);
Expand Down
2 changes: 2 additions & 0 deletions src/comm/HALO_PACKING_FUSED.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ HALO_PACKING_FUSED::HALO_PACKING_FUSED(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N_to_the_two_thirds);
Expand Down
2 changes: 2 additions & 0 deletions src/comm/HALO_SENDRECV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ HALO_SENDRECV::HALO_SENDRECV(const RunParams& params)
setBytesAtomicModifyWrittenPerRep( 0 );
setFLOPsPerRep(0);

checksum_tolerance = zero_checksum_tolerance;

setChecksumConsistency(ChecksumConsistency::Consistent);

setComplexity(Complexity::N_to_the_two_thirds);
Expand Down
85 changes: 48 additions & 37 deletions src/common/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -727,18 +727,31 @@ void Executor::runKernel(KernelBase* kernel, bool print_kernel_name)

size_t prec = 20;
const auto default_precision = getCout().precision();
Checksum_type checksum = kernel->getChecksum(vid, tune_idx);

Checksum_type cksum_tol = kernel->getChecksumTolerance();
Checksum_type cksum_ref = kernel->getReferenceChecksum();
Checksum_type cksum = kernel->getChecksum(vid, tune_idx);
Checksum_type cksum_diff = std::abs(cksum_ref - cksum);
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
{
Checksum_type checksum_sum = 0;
Allreduce(&checksum, &checksum_sum, 1, MPI_SUM, MPI_COMM_WORLD);
checksum = checksum_sum / num_ranks;
Checksum_type cksum_sum = 0;
Allreduce(&cksum, &cksum_sum, 1, MPI_SUM, MPI_COMM_WORLD);
cksum = cksum_sum / num_ranks;

Checksum_type cksum_diff_max = 1e80;
Allreduce(&cksum_diff, &cksum_diff_max, 1, MPI_MAX, MPI_COMM_WORLD);
cksum_diff = cksum_diff_max;
}
getCout() << " checksum_avg ";
const char* cksum_name = "cksum_avg";
#else
getCout() << " checksum ";
const char* cksum_name = "checksum";
#endif
getCout() << setprecision(prec) << checksum
const char* cksum_result = "FAILED";
if (cksum_diff <= cksum_tol) {
cksum_result = "PASSED";
}
getCout() << " " << cksum_result << " " << cksum_name << " ";
getCout() << setprecision(prec) << cksum
<< setprecision(default_precision) << endl;
}

Expand Down Expand Up @@ -1364,6 +1377,7 @@ void Executor::writeChecksumReport(ostream& file)
}
namecol_width++;

size_t resultcol_width = 6+2;

//
// Print title.
Expand All @@ -1377,11 +1391,14 @@ void Executor::writeChecksumReport(ostream& file)
file << equal_line << endl;

//
// Print column title line.
// Print column title lines.
//
file <<left<< setw(namecol_width) << "Kernel " << endl;

file << dot_line << endl;

file <<left<< setw(namecol_width) << "Variants "
<<left<< setw(resultcol_width) << "Result "
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
<<left<< setw(checksum_width) << "Average Checksum "
<<left<< setw(checksum_width) << "Max Checksum Diff "
Expand All @@ -1391,13 +1408,16 @@ void Executor::writeChecksumReport(ostream& file)
<<left<< setw(checksum_width) << "Checksum Diff "
#endif
<< endl;

file <<left<< setw(namecol_width) << " "
<<left<< setw(resultcol_width) << " "
<<left<< setw(checksum_width) << " "
<<left<< setw(checksum_width) << "(vs. first variant listed) "
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
<<left<< setw(checksum_width) << ""
#endif
<< endl;

file << dash_line << endl;

//
Expand All @@ -1407,37 +1427,25 @@ void Executor::writeChecksumReport(ostream& file)
KernelBase* kern = kernels[ik];

file <<left<< setw(namecol_width) << kern->getName() << endl;

file << dot_line << endl;

Checksum_type cksum_ref = 0.0;
size_t ivck = 0;
bool found_ref = false;
while ( ivck < variant_ids.size() && !found_ref ) {
VariantID vid = variant_ids[ivck];
size_t num_tunings = kern->getNumVariantTunings(vid);
for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) {
if ( kern->wasVariantTuningRun(vid, tune_idx) ) {
cksum_ref = kern->getChecksum(vid, tune_idx);
found_ref = true;
break;
}
}
++ivck;
}
Checksum_type cksum_tol = kern->getChecksumTolerance();
Checksum_type cksum_ref = kern->getReferenceChecksum();

// get vector of checksums and diffs
std::vector<std::vector<Checksum_type>> checksums(variant_ids.size());
std::vector<std::vector<Checksum_type>> checksums_diff(variant_ids.size());
std::vector<std::vector<Checksum_type>> checksums_abs_diff(variant_ids.size());
for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
VariantID vid = variant_ids[iv];
size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]);

checksums[iv].resize(num_tunings, 0.0);
checksums_diff[iv].resize(num_tunings, 0.0);
checksums_abs_diff[iv].resize(num_tunings, 0.0);
for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) {
if ( kern->wasVariantTuningRun(vid, tune_idx) ) {
checksums[iv][tune_idx] = kern->getChecksum(vid, tune_idx);
checksums_diff[iv][tune_idx] = cksum_ref - kern->getChecksum(vid, tune_idx);
checksums_abs_diff[iv][tune_idx] = std::abs(cksum_ref - kern->getChecksum(vid, tune_idx));
}
}
}
Expand All @@ -1462,16 +1470,6 @@ void Executor::writeChecksumReport(ostream& file)
}
}

// get stats for checksums_abs_diff
std::vector<std::vector<Checksum_type>> checksums_abs_diff(variant_ids.size());
for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
size_t num_tunings = kernels[ik]->getNumVariantTunings(variant_ids[iv]);
checksums_abs_diff[iv].resize(num_tunings, 0.0);
for (size_t tune_idx = 0; tune_idx < num_tunings; ++tune_idx) {
checksums_abs_diff[iv][tune_idx] = std::abs(checksums_diff[iv][tune_idx]);
}
}

std::vector<std::vector<Checksum_type>> checksums_abs_diff_min(variant_ids.size());
std::vector<std::vector<Checksum_type>> checksums_abs_diff_max(variant_ids.size());
std::vector<std::vector<Checksum_type>> checksums_abs_diff_sum(variant_ids.size());
Expand Down Expand Up @@ -1530,18 +1528,30 @@ void Executor::writeChecksumReport(ostream& file)
const string& tuning_name = kern->getVariantTuningName(vid, tune_idx);

if ( kern->wasVariantTuningRun(vid, tune_idx) ) {
const char* result = "FAILED";
if (
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
checksums_abs_diff_max[iv][tune_idx]
#else
checksums_abs_diff[iv][tune_idx]
#endif
<= cksum_tol ) {
result = "PASSED";
}
file <<left<< setw(namecol_width) << (variant_name+"-"+tuning_name)
<<left<< setw(resultcol_width) << result
<< showpoint << setprecision(prec)
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
<<left<< setw(checksum_width) << checksums_avg[iv][tune_idx]
<<left<< setw(checksum_width) << checksums_abs_diff_max[iv][tune_idx]
<<left<< setw(checksum_width) << checksums_abs_diff_stddev[iv][tune_idx] << endl;
#else
<<left<< setw(checksum_width) << checksums[iv][tune_idx]
<<left<< setw(checksum_width) << checksums_diff[iv][tune_idx] << endl;
<<left<< setw(checksum_width) << checksums_abs_diff[iv][tune_idx] << endl;
#endif
} else {
file <<left<< setw(namecol_width) << (variant_name+"-"+tuning_name)
<<left<< setw(resultcol_width) << "Not Run"
#if defined(RAJA_PERFSUITE_ENABLE_MPI)
<<left<< setw(checksum_width) << "Not Run"
<<left<< setw(checksum_width) << "Not Run"
Expand All @@ -1556,6 +1566,7 @@ void Executor::writeChecksumReport(ostream& file)
}

file << endl;

file << dash_line_short << endl;
}

Expand Down
11 changes: 11 additions & 0 deletions src/common/KernelBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,13 @@ KernelBase::KernelBase(KernelID kid, const RunParams& params)
running_variant = NumVariants;
running_tuning = getUnknownTuningIdx();

checksum_reference_variant = NumVariants;
checksum_reference_tuning = getUnknownTuningIdx();

checksum_scale_factor = 1.0;

checksum_tolerance = normal_checksum_tolerance;

#if defined(RAJA_PERFSUITE_USE_CALIPER)
// Init Caliper column metadata attributes
// Aggregatable attributes need to be initialized before manager.start()
Expand Down Expand Up @@ -318,6 +323,12 @@ void KernelBase::execute(VariantID vid, size_t tune_idx)

this->updateChecksum(vid, tune_idx);

if (checksum_reference_variant == NumVariants) {
// use first run variant tuning as checksum reference
checksum_reference_variant = vid;
checksum_reference_tuning = tune_idx;
}

this->tearDown(vid, tune_idx);

running_variant = NumVariants;
Expand Down
Loading