Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 45 additions & 19 deletions src/common/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "CudaDataUtils.hpp"
#include "HipDataUtils.hpp"

// Warmup kernels to run first to help reduce startup overheads in timings
// Warmup kernels for default warmup mode
#include "basic/DAXPY.hpp"
#include "basic/REDUCE3_INT.hpp"
#include "basic/INDEXLIST_3LOOP.hpp"
Expand Down Expand Up @@ -754,7 +754,9 @@ void Executor::runKernel(KernelBase* kernel, bool print_kernel_name)

void Executor::runWarmupKernels()
{
if ( run_params.getDisableWarmup() ) {
RunParams::WarmupMode warmup_mode = run_params.getWarmupMode();

if ( warmup_mode == RunParams::WarmupMode::Disable ) {
return;
}

Expand All @@ -763,16 +765,28 @@ void Executor::runWarmupKernels()
//
// Get warmup kernels to run from input
//
std::set<KernelID> kernel_ids = run_params.getWarmupKernelIDsToRun();
std::set<KernelID> warmup_kernel_ids;

if ( warmup_mode == RunParams::WarmupMode::Explicit ) {

if ( kernel_ids.empty() ) {
warmup_kernel_ids = run_params.getSpecifiedWarmupKernelIDs();

} else if ( warmup_mode == RunParams::WarmupMode::PerfRunSame ) {

//
// If no warmup kernels were given, choose a warmup kernel for each feature
// Warmup kernels will be same as kernels specified to run in the suite
//
for (size_t ik = 0; ik < kernels.size(); ++ik) {
KernelBase* kernel = kernels[ik];
warmup_kernel_ids.insert( kernel->getKernelID() );
} // iterate over kernels to run

} else if ( warmup_mode == RunParams::WarmupMode::Default ) {

//
// For kernels to be run, assemble a set of feature IDs
// No warmup kernel input given, choose a warmup kernel for each feature
//
// First, assemble a set of feature IDs
//
std::set<FeatureID> feature_ids;
for (size_t ik = 0; ik < kernels.size(); ++ik) {
Expand All @@ -788,7 +802,7 @@ void Executor::runWarmupKernels()
} // iterate over kernels

//
// Map feature IDs to set of warmup kernel IDs
// Map feature IDs to rudimentary set of warmup kernel IDs
//
for ( auto fid = feature_ids.begin(); fid != feature_ids.end(); ++ fid ) {

Expand All @@ -797,29 +811,29 @@ void Executor::runWarmupKernels()
case Forall:
case Kernel:
case Launch:
kernel_ids.insert(Basic_DAXPY); break;
warmup_kernel_ids.insert(Basic_DAXPY); break;

case Sort:
kernel_ids.insert(Algorithm_SORT); break;
warmup_kernel_ids.insert(Algorithm_SORT); break;

case Scan:
kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;
warmup_kernel_ids.insert(Basic_INDEXLIST_3LOOP); break;

case Workgroup:
kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;
warmup_kernel_ids.insert(Comm_HALO_PACKING_FUSED); break;

case Reduction:
kernel_ids.insert(Basic_REDUCE3_INT); break;
warmup_kernel_ids.insert(Basic_REDUCE3_INT); break;

case Atomic:
kernel_ids.insert(Basic_PI_ATOMIC); break;
warmup_kernel_ids.insert(Basic_PI_ATOMIC); break;

case View:
break;

#ifdef RAJA_PERFSUITE_ENABLE_MPI
case MPI:
kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
warmup_kernel_ids.insert(Comm_HALO_EXCHANGE_FUSED); break;
#endif

default:
Expand All @@ -835,7 +849,15 @@ void Executor::runWarmupKernels()
//
// Run warmup kernels
//
for ( auto kid = kernel_ids.begin(); kid != kernel_ids.end(); ++ kid ) {
bool prev_state = KernelBase::setWarmupRun(true);

for ( auto kid = warmup_kernel_ids.begin();
kid != warmup_kernel_ids.end(); ++ kid ) {
//
// Note that we create a new kernel object for each kernel to run
// in warmup so we don't pollute timing data, checksum data, etc.
// for kernels that will run for real later...
//
KernelBase* kernel = getKernelObject(*kid, run_params);
#if defined(RAJA_PERFSUITE_USE_CALIPER)
kernel->caliperOff();
Expand All @@ -847,6 +869,8 @@ void Executor::runWarmupKernels()
delete kernel;
}

KernelBase::setWarmupRun(prev_state);

}

void Executor::outputRunData()
Expand Down Expand Up @@ -933,10 +957,12 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
//
// Set basic table formatting parameters.
//
const string kernel_col_name("Kernel ");
const string kernel_name_col_header_variant("Variant ");
const string kernel_name_col_header_tuning("Tuning ");
const string sepchr(" , ");

size_t kercol_width = kernel_col_name.size();
size_t kercol_width = max(kernel_name_col_header_variant.size(),
kernel_name_col_header_tuning.size());
for (size_t ik = 0; ik < kernels.size(); ++ik) {
kercol_width = max(kercol_width, kernels[ik]->getName().size());
}
Expand Down Expand Up @@ -969,7 +995,7 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
//
// Print column variant name line.
//
file <<left<< setw(kercol_width) << kernel_col_name;
file <<left<< setw(kercol_width) << kernel_name_col_header_variant;
for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
for (size_t it = 0; it < tuning_names[variant_ids[iv]].size(); ++it) {
file << sepchr <<left<< setw(vartuncol_width[iv][it])
Expand All @@ -981,7 +1007,7 @@ void Executor::writeCSVReport(ostream& file, CSVRepMode mode,
//
// Print column tuning name line.
//
file <<left<< setw(kercol_width) << kernel_col_name;
file <<left<< setw(kercol_width) << kernel_name_col_header_tuning;
for (size_t iv = 0; iv < variant_ids.size(); ++iv) {
for (size_t it = 0; it < tuning_names[variant_ids[iv]].size(); ++it) {
file << sepchr <<left<< setw(vartuncol_width[iv][it])
Expand Down
14 changes: 13 additions & 1 deletion src/common/KernelBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@

namespace rajaperf {

//
// Static method to set whether kernels are used for warmup purposes or not
//
bool KernelBase::setWarmupRun(bool warmup_run)
{
bool previous_state = s_warmup_run;
s_warmup_run = warmup_run;
return previous_state;
}

KernelBase::KernelBase(KernelID kid, const RunParams& params)
: run_params(params)
#if defined(RAJA_ENABLE_TARGET_OPENMP)
Expand Down Expand Up @@ -129,7 +139,9 @@ Index_type KernelBase::getTargetProblemSize() const
Index_type KernelBase::getRunReps() const
{
Index_type run_reps = static_cast<Index_type>(0);
if (run_params.getInputState() == RunParams::CheckRun) {
if (s_warmup_run) {
run_reps = static_cast<Index_type>(1);
} else if (run_params.getInputState() == RunParams::CheckRun) {
run_reps = static_cast<Index_type>(run_params.getCheckRunReps());
} else {
run_reps = static_cast<Index_type>(default_reps*run_params.getRepFactor());
Expand Down
17 changes: 16 additions & 1 deletion src/common/KernelBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,15 @@ class KernelBase
{ return std::numeric_limits<size_t>::max(); }
static std::string getDefaultTuningName() { return "default"; }

//
// Method to set state of all Kernel objects to indicate kernel runs
// are for warmup purposes if true is passed, else false.
//
// The warmup state before the method call is returned to facilitate
// reset mechanics.
//
static bool setWarmupRun(bool warmup_run);

KernelBase(KernelID kid, const RunParams& params);

virtual ~KernelBase();
Expand Down Expand Up @@ -629,7 +638,13 @@ class KernelBase
variant_tuning_method_pointer method);

//
// Static properties of kernel, independent of run
// Boolean member shared by all kernel objects indicating whether they
// will be run for warmup purposes (true) or not (false).
//
static inline bool s_warmup_run = false;

//
// Persistent properties of kernel, independent of run
//
KernelID kernel_id;
std::string name;
Expand Down
33 changes: 22 additions & 11 deletions src/common/RunParams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ RunParams::RunParams(int argc, char** argv)
checkrun_reps(1),
reference_variant(),
reference_vid(NumVariants),
warmup_mode(WarmupMode::Default),
warmup_kernel_input(),
invalid_warmup_kernel_input(),
kernel_input(),
Expand All @@ -83,7 +84,6 @@ RunParams::RunParams(int argc, char** argv)
#if defined(RAJA_PERFSUITE_USE_CALIPER)
add_to_spot_config(),
#endif
disable_warmup(false),
run_kernels(),
run_variants()
{
Expand Down Expand Up @@ -176,8 +176,6 @@ void RunParams::print(std::ostream& str) const
}
#endif

str << "\n disable_warmup = " << disable_warmup;

str << "\n seq data space = " << getDataSpaceName(seqDataSpace);
str << "\n omp data space = " << getDataSpaceName(ompDataSpace);
str << "\n omp target data space = " << getDataSpaceName(ompTargetDataSpace);
Expand All @@ -200,6 +198,8 @@ void RunParams::print(std::ostream& str) const
str << "\n hip MPI data space = " << getDataSpaceName(hipMPIDataSpace);
str << "\n kokkos MPI data space = " << getDataSpaceName(kokkosMPIDataSpace);

str << "\n warmup_mode = " << WarmupModeToStr(warmup_mode);

str << "\n warmup_kernel_input = ";
for (size_t j = 0; j < warmup_kernel_input.size(); ++j) {
str << "\n\t" << warmup_kernel_input[j];
Expand Down Expand Up @@ -845,6 +845,8 @@ void RunParams::parseCommandLineOptions(int argc, char** argv)
}
}

warmup_mode = WarmupMode::Explicit;

} else if ( opt == std::string("--kernels") ||
opt == std::string("-k") ) {

Expand Down Expand Up @@ -1140,9 +1142,13 @@ void RunParams::parseCommandLineOptions(int argc, char** argv)
input_state = DryRun;
}

} else if ( std::string(argv[i]) == std::string("--disable-warmup") ) {
} else if ( std::string(argv[i]) == std::string("--warmup-disable") ) {

warmup_mode = WarmupMode::Disable;

disable_warmup = true;
} else if ( std::string(argv[i]) == std::string("--warmup-perfrun-same") ) {

warmup_mode = WarmupMode::PerfRunSame;

} else if ( std::string(argv[i]) == std::string("--checkrun") ) {

Expand Down Expand Up @@ -1348,11 +1354,16 @@ void RunParams::printHelpMessage(std::ostream& str) const
<< "\t\t -of dat (output data will be in files 'dat*')\n\n";

str << "\t Options for selecting kernels to run....\n"
<< "\t ========================================\n\n";;
<< "\t ========================================\n\n";

str << "\t For warmup kernels, the default (no option specified) will run a minimal set of warmup kernels based on\n"
<< "\t RAJA features exercised in kernels specified for perf run. Other options are:\n\n";

str << "\t --warmup-disable (do not run any warmup kernels)\n\n";

str << "\t --disable-warmup (disable warmup kernels) [Default is run warmup kernels that are relevant to kernels selected to run]\n\n";
str << "\t --warmup-perfrun-same (run same set of kernels for warmup as specified for perf run)\n\n";

str << "\t --warmup-kernels, -wk <space-separated strings> [Default is run warmup kernels that are relevant to kernels selected to run]\n"
str << "\t --warmup-kernels, -wk <space-separated strings> [if no kernel names specified, none will be run for warmup]\n"
<< "\t (names of individual kernels and/or groups of kernels to warmup)\n"
<< "\t See '--print-kernels'/'-pk' option for list of valid kernel and group names.\n"
<< "\t Kernel names are listed as <group name>_<kernel name>.\n";
Expand Down Expand Up @@ -2065,7 +2076,7 @@ void RunParams::processKernelInput()
//
// ================================================================

run_warmup_kernels.clear();
specified_warmup_kernel_ids.clear();

if ( !warmup_kernel_input.empty() ) {

Expand Down Expand Up @@ -2103,7 +2114,7 @@ void RunParams::processKernelInput()
KernelID tkid = static_cast<KernelID>(kid);
if ( getFullKernelName(tkid).find(gname) != std::string::npos &&
exclude_kernels.find(tkid) == exclude_kernels.end()) {
run_warmup_kernels.insert(tkid);
specified_warmup_kernel_ids.insert(tkid);
}
}

Expand All @@ -2121,7 +2132,7 @@ void RunParams::processKernelInput()
KernelID tkid = static_cast<KernelID>(kid);
if ( getKernelName(tkid) == *it || getFullKernelName(tkid) == *it ) {
if (exclude_kernels.find(tkid) == exclude_kernels.end()) {
run_warmup_kernels.insert(tkid);
specified_warmup_kernel_ids.insert(tkid);
}
found_it = true;
}
Expand Down
40 changes: 35 additions & 5 deletions src/common/RunParams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,35 @@ class RunParams {
}
}

/*!
* \brief Enumeration indicating how to run warmup kernels
*/
enum WarmupMode {
Disable, /*!< no warmup kernels will be run */
Default, /*!< run minimal set of warmup kernels based kernels to run */
PerfRunSame, /*!< run warmup pass of each kernel to run */
Explicit, /*!< run warmup pass of each kernel explicitly named for warmup in input */
};

/*!
* \brief Translate SizeMeaning enum value to string
*/
static std::string WarmupModeToStr(WarmupMode wm)
{
switch (wm) {
case WarmupMode::Disable:
return "Disable";
case WarmupMode::Default:
return "Default";
case WarmupMode::PerfRunSame:
return "PerfRunSame";
case WarmupMode::Explicit:
return "Explicit";
default:
return "Unknown";
}
}

/*!
* \brief Return state of input parsed to this point.
*/
Expand Down Expand Up @@ -252,9 +281,10 @@ class RunParams {
const std::string& getAddToCaliperConfig() const { return add_to_cali_config; }
#endif

bool getDisableWarmup() const { return disable_warmup; }
WarmupMode getWarmupMode() const { return warmup_mode; }

const std::set<KernelID>& getWarmupKernelIDsToRun() const { return run_warmup_kernels; }
const std::set<KernelID>& getSpecifiedWarmupKernelIDs() const
{ return specified_warmup_kernel_ids; }
const std::set<KernelID>& getKernelIDsToRun() const { return run_kernels; }
const std::set<VariantID>& getVariantIDsToRun() const { return run_variants; }
VariantID getReferenceVariantID() const { return reference_vid; }
Expand Down Expand Up @@ -364,6 +394,8 @@ class RunParams {
DataSpace syclMPIDataSpace = DataSpace::SyclPinned;
DataSpace kokkosMPIDataSpace = DataSpace::Copy;

WarmupMode warmup_mode;

//
// Arrays to hold input strings for valid/invalid input. Helpful for
// debugging command line args.
Expand Down Expand Up @@ -398,9 +430,7 @@ class RunParams {
std::string add_to_cali_config;
#endif

bool disable_warmup;

std::set<KernelID> run_warmup_kernels;
std::set<KernelID> specified_warmup_kernel_ids;
std::set<KernelID> run_kernels;
std::set<VariantID> run_variants;

Expand Down