diff --git a/HeterogeneousCore/ROCmServices/BuildFile.xml b/HeterogeneousCore/ROCmServices/BuildFile.xml
new file mode 100644
index 0000000000000..0ff47a94f4ebc
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/BuildFile.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/interface/ROCmService.h b/HeterogeneousCore/ROCmServices/interface/ROCmService.h
new file mode 100644
index 0000000000000..c78ec27f51d80
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/interface/ROCmService.h
@@ -0,0 +1,45 @@
+#ifndef HeterogeneousCore_ROCmServices_interface_ROCmService_h
+#define HeterogeneousCore_ROCmServices_interface_ROCmService_h
+
+#include
+#include
+
+#include "FWCore/Utilities/interface/StreamID.h"
+
+namespace edm {
+ class ParameterSet;
+ class ActivityRegistry;
+ class ConfigurationDescriptions;
+} // namespace edm
+
+class ROCmService {
+public:
+ ROCmService(edm::ParameterSet const& config);
+ ~ROCmService();
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ bool enabled() const { return enabled_; }
+
+ int numberOfDevices() const { return numberOfDevices_; }
+
+ // major, minor
+ std::pair computeCapability(int device) const { return computeCapabilities_.at(device); }
+
+ // Returns the id of device with most free memory. If none is found, returns -1.
+ int deviceWithMostFreeMemory() const;
+
+private:
+ int numberOfDevices_ = 0;
+ std::vector> computeCapabilities_;
+ bool enabled_ = false;
+ bool verbose_ = false;
+};
+
+namespace edm {
+ namespace service {
+ inline bool isProcessWideService(ROCmService const*) { return true; }
+ } // namespace service
+} // namespace edm
+
+#endif // HeterogeneousCore_ROCmServices_interface_ROCmService_h
diff --git a/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..42f9e3024fc2f
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
new file mode 100644
index 0000000000000..3bd0f2448f1b4
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/ROCmMonitoringService.cc
@@ -0,0 +1,120 @@
+#include
+
+#include
+
+#include "DataFormats/Provenance/interface/ModuleDescription.h"
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
+#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace edm {
+ class StreamContext;
+}
+
+class ROCmMonitoringService {
+public:
+ ROCmMonitoringService(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iRegistry);
+ ~ROCmMonitoringService() = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void postModuleConstruction(edm::ModuleDescription const& desc);
+ void postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc);
+ void postModuleEvent(edm::StreamContext const& sc, edm::ModuleCallingContext const& mcc);
+ void postEvent(edm::StreamContext const& sc);
+
+private:
+ int numberOfDevices_ = 0;
+};
+
+ROCmMonitoringService::ROCmMonitoringService(edm::ParameterSet const& config, edm::ActivityRegistry& registry) {
+ // make sure that ROCm is initialised, and that the ROCmService destructor is called after this service's destructor
+ edm::Service rocmService;
+ if (!rocmService->enabled())
+ return;
+ numberOfDevices_ = rocmService->numberOfDevices();
+
+ if (config.getUntrackedParameter("memoryConstruction")) {
+ registry.watchPostModuleConstruction(this, &ROCmMonitoringService::postModuleConstruction);
+ }
+ if (config.getUntrackedParameter("memoryBeginStream")) {
+ registry.watchPostModuleBeginStream(this, &ROCmMonitoringService::postModuleBeginStream);
+ }
+ if (config.getUntrackedParameter("memoryPerModule")) {
+ registry.watchPostModuleEvent(this, &ROCmMonitoringService::postModuleEvent);
+ }
+ if (config.getUntrackedParameter("memoryPerEvent")) {
+ registry.watchPostEvent(this, &ROCmMonitoringService::postEvent);
+ }
+}
+
+void ROCmMonitoringService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+
+ desc.addUntracked("memoryConstruction", false)
+ ->setComment("Print memory information for each device after the construction of each module");
+ desc.addUntracked("memoryBeginStream", true)
+ ->setComment("Print memory information for each device after the beginStream() of each module");
+ desc.addUntracked("memoryPerModule", true)
+ ->setComment("Print memory information for each device after the event of each module");
+ desc.addUntracked("memoryPerEvent", true)
+ ->setComment("Print memory information for each device after each event");
+
+ descriptions.add("ROCmMonitoringService", desc);
+ descriptions.setComment(
+ "The memory information is the global state of the device. This gets confusing if there are multiple processes "
+ "running on the same device. Probably the information retrieval should be re-thought?");
+}
+
+// activity handlers
+namespace {
+ template
+ void dumpUsedMemory(T& log, int num) {
+ int old = 0;
+ hipCheck(hipGetDevice(&old));
+ constexpr auto mbytes = 1 << 20;
+ for (int i = 0; i < num; ++i) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ log << "\n"
+ << i << ": " << (totalMemory - freeMemory) / mbytes << " MB used / " << totalMemory / mbytes << " MB total";
+ }
+ hipCheck(hipSetDevice(old));
+ }
+} // namespace
+
+void ROCmMonitoringService::postModuleConstruction(edm::ModuleDescription const& desc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after construction of " << desc.moduleLabel() << " (" << desc.moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postModuleBeginStream(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after beginStream() of " << mcc.moduleDescription()->moduleLabel() << " ("
+ << mcc.moduleDescription()->moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postModuleEvent(edm::StreamContext const&, edm::ModuleCallingContext const& mcc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after processing an event by " << mcc.moduleDescription()->moduleLabel() << " ("
+ << mcc.moduleDescription()->moduleName() << ")";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+void ROCmMonitoringService::postEvent(edm::StreamContext const& sc) {
+ auto log = edm::LogPrint("ROCmMonitoringService");
+ log << "ROCm device memory after event";
+ dumpUsedMemory(log, numberOfDevices_);
+}
+
+DEFINE_FWK_SERVICE(ROCmMonitoringService);
diff --git a/HeterogeneousCore/ROCmServices/plugins/plugins.cc b/HeterogeneousCore/ROCmServices/plugins/plugins.cc
new file mode 100644
index 0000000000000..a418eeced333f
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/plugins/plugins.cc
@@ -0,0 +1,4 @@
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "FWCore/ServiceRegistry/interface/ServiceMaker.h"
+
+DEFINE_FWK_SERVICE_MAKER(ROCmService, edm::serviceregistry::ParameterSetMaker);
diff --git a/HeterogeneousCore/ROCmServices/src/ROCmService.cc b/HeterogeneousCore/ROCmServices/src/ROCmService.cc
new file mode 100644
index 0000000000000..2cabaed127d99
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/src/ROCmService.cc
@@ -0,0 +1,382 @@
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+/*
+#include
+*/
+
+#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/Utilities/interface/ResourceInformation.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+/*
+#include "HeterogeneousCore/ROCmUtilities/interface/nvmlCheck.h"
+*/
+
+void setHipLimit(hipLimit_t limit, const char* name, size_t request) {
+ // read the current device
+ int device;
+ hipCheck(hipGetDevice(&device));
+ // try to set the requested limit
+ auto result = hipDeviceSetLimit(limit, request);
+ if (hipErrorUnsupportedLimit == result) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": unsupported limit \"" << name << "\"";
+ return;
+ }
+ // read back the limit value
+ size_t value;
+ result = hipDeviceGetLimit(&value, limit);
+ if (hipSuccess != result) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": failed to set limit \"" << name << "\" to "
+ << request << ", current value is " << value;
+ } else if (value != request) {
+ edm::LogWarning("ROCmService") << "ROCm device " << device << ": limit \"" << name << "\" set to " << value
+ << " instead of requested " << request;
+ }
+}
+
+std::string decodeVersion(int version) {
+ return std::to_string(version / 1000) + '.' + std::to_string(version % 1000 / 10);
+}
+
+/// Constructor
+ROCmService::ROCmService(edm::ParameterSet const& config) : verbose_(config.getUntrackedParameter("verbose")) {
+ bool configEnabled = config.getUntrackedParameter("enabled");
+ if (not configEnabled) {
+ edm::LogInfo("ROCmService") << "ROCmService disabled by configuration";
+ return;
+ }
+
+ auto status = hipGetDeviceCount(&numberOfDevices_);
+ if (hipSuccess != status) {
+ edm::LogWarning("ROCmService") << "Failed to initialize the ROCm runtime.\n"
+ << "Disabling the ROCmService.";
+ return;
+ }
+ computeCapabilities_.reserve(numberOfDevices_);
+
+ /*
+ // AMD system driver version, e.g. 470.57.02
+ char systemDriverVersion[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
+ nvmlCheck(nvmlInitWithFlags(NVML_INIT_FLAG_NO_GPUS | NVML_INIT_FLAG_NO_ATTACH));
+ nvmlCheck(nvmlSystemGetDriverVersion(systemDriverVersion, sizeof(systemDriverVersion)));
+ nvmlCheck(nvmlShutdown());
+ */
+
+ // ROCm driver version, e.g. 11.4
+ // the full version, like 11.4.1 or 11.4.100, is not reported
+ int driverVersion = 0;
+ hipCheck(hipDriverGetVersion(&driverVersion));
+
+ // ROCm runtime version, e.g. 11.4
+ // the full version, like 11.4.1 or 11.4.108, is not reported
+ int runtimeVersion = 0;
+ hipCheck(hipRuntimeGetVersion(&runtimeVersion));
+
+ edm::LogInfo log("ROCmService");
+ if (verbose_) {
+ /*
+ log << "AMD driver: " << systemDriverVersion << '\n';
+ */
+ log << "ROCm driver API: " << decodeVersion(driverVersion) << /*" (compiled with " << decodeVersion(ROCm_VERSION)
+ << ")" */
+ "\n";
+ log << "ROCm runtime API: " << decodeVersion(runtimeVersion)
+ << /*" (compiled with " << decodeVersion(ROCmRT_VERSION)
+ << ")" */
+ "\n";
+ log << "ROCm runtime successfully initialised, found " << numberOfDevices_ << " compute devices.\n";
+ } else {
+ log << "ROCm runtime version " << decodeVersion(runtimeVersion) << ", driver version "
+ << decodeVersion(driverVersion)
+ /*
+ << ", AMD driver version " << systemDriverVersion
+ */
+ ;
+ }
+
+ auto const& limits = config.getUntrackedParameter("limits");
+ /*
+ auto printfFifoSize = limits.getUntrackedParameter("hipLimitPrintfFifoSize");
+ */
+ auto stackSize = limits.getUntrackedParameter("hipLimitStackSize");
+ auto mallocHeapSize = limits.getUntrackedParameter("hipLimitMallocHeapSize");
+ /*
+ auto devRuntimeSyncDepth = limits.getUntrackedParameter("hipLimitDevRuntimeSyncDepth");
+ auto devRuntimePendingLaunchCount = limits.getUntrackedParameter("hipLimitDevRuntimePendingLaunchCount");
+ */
+
+ std::set models;
+
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ // read information about the compute device.
+ // see the documentation of hipGetDeviceProperties() for more information.
+ hipDeviceProp_t properties;
+ hipCheck(hipGetDeviceProperties(&properties, i));
+ log << '\n' << "ROCm device " << i << ": " << properties.name;
+ if (verbose_) {
+ log << '\n';
+ }
+ models.insert(std::string(properties.name));
+
+ // compute capabilities
+ computeCapabilities_.emplace_back(properties.major, properties.minor);
+ if (verbose_) {
+ log << " compute capability: " << properties.major << "." << properties.minor;
+ }
+ log << " (sm_" << properties.major << properties.minor << ")";
+ if (verbose_) {
+ log << '\n';
+ log << " streaming multiprocessors: " << std::setw(13) << properties.multiProcessorCount << '\n';
+ log << " ROCm cores: " << std::setw(28) << "not yet implemented" << '\n';
+ /*
+ log << " single to double performance: " << std::setw(8) << properties.singleToDoublePrecisionPerfRatio
+ << ":1\n";
+ */
+ }
+
+ // compute mode
+ static constexpr const char* computeModeDescription[] = {
+ "default (shared)", // hipComputeModeDefault
+ "exclusive (single thread)", // hipComputeModeExclusive
+ "prohibited", // hipComputeModeProhibited
+ "exclusive (single process)", // hipComputeModeExclusiveProcess
+ "unknown"};
+ if (verbose_) {
+ log << " compute mode:" << std::right << std::setw(27)
+ << computeModeDescription[std::min(properties.computeMode,
+ static_cast(std::size(computeModeDescription)) - 1)]
+ << '\n';
+ }
+
+ // TODO if a device is in exclusive use, skip it and remove it from the list, instead of failing with an exception
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipSetDeviceFlags(hipDeviceScheduleAuto | hipDeviceMapHost));
+
+ // read the free and total amount of memory available for allocation by the device, in bytes.
+ // see the documentation of hipMemGetInfo() for more information.
+ if (verbose_) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ log << " memory: " << std::setw(6) << freeMemory / (1 << 20) << " MB free / " << std::setw(6)
+ << totalMemory / (1 << 20) << " MB total\n";
+ log << " constant memory: " << std::setw(6) << properties.totalConstMem / (1 << 10) << " kB\n";
+ log << " L2 cache size: " << std::setw(6) << properties.l2CacheSize / (1 << 10) << " kB\n";
+ }
+
+ // L1 cache behaviour
+ if (verbose_) {
+ /*
+ static constexpr const char* l1CacheModeDescription[] = {
+ "unknown", "local memory", "global memory", "local and global memory"};
+ int l1CacheMode = properties.localL1CacheSupported + 2 * properties.globalL1CacheSupported;
+ log << " L1 cache mode:" << std::setw(26) << std::right << l1CacheModeDescription[l1CacheMode] << '\n';
+ log << '\n';
+ */
+
+ log << "Other capabilities\n";
+ log << " " << (properties.canMapHostMemory ? "can" : "cannot")
+ << " map host memory into the ROCm address space for use with hipHostAlloc()/hipHostGetDevicePointer()\n";
+ log << " " << (properties.pageableMemoryAccess ? "supports" : "does not support")
+ << " coherently accessing pageable memory without calling hipHostRegister() on it\n";
+ log << " " << (properties.pageableMemoryAccessUsesHostPageTables ? "can" : "cannot")
+ << " access pageable memory via the host's page tables\n";
+ /*
+ log << " " << (properties.canUseHostPointerForRegisteredMem ? "can" : "cannot")
+ << " access host registered memory at the same virtual address as the host\n";
+ log << " " << (properties.unifiedAddressing ? "shares" : "does not share")
+ << " a unified address space with the host\n";
+ */
+ log << " " << (properties.managedMemory ? "supports" : "does not support")
+ << " allocating managed memory on this system\n";
+ log << " " << (properties.concurrentManagedAccess ? "can" : "cannot")
+ << " coherently access managed memory concurrently with the host\n";
+ log << " "
+ << "the host " << (properties.directManagedMemAccessFromHost ? "can" : "cannot")
+ << " directly access managed memory on the device without migration\n";
+ log << " " << (properties.cooperativeLaunch ? "supports" : "does not support")
+ << " launching cooperative kernels via hipLaunchCooperativeKernel()\n";
+ log << " " << (properties.cooperativeMultiDeviceLaunch ? "supports" : "does not support")
+ << " launching cooperative kernels via hipLaunchCooperativeKernelMultiDevice()\n";
+ log << '\n';
+ }
+
+ // set and read the ROCm device flags.
+ // see the documentation of hipSetDeviceFlags and hipGetDeviceFlags for more information.
+ if (verbose_) {
+ log << "ROCm flags\n";
+ unsigned int flags;
+ hipCheck(hipGetDeviceFlags(&flags));
+ switch (flags & hipDeviceScheduleMask) {
+ case hipDeviceScheduleAuto:
+ log << " thread policy: default\n";
+ break;
+ case hipDeviceScheduleSpin:
+ log << " thread policy: spin\n";
+ break;
+ case hipDeviceScheduleYield:
+ log << " thread policy: yield\n";
+ break;
+ case hipDeviceScheduleBlockingSync:
+ log << " thread policy: blocking sync\n";
+ break;
+ default:
+ log << " thread policy: undefined\n";
+ }
+ if (flags & hipDeviceMapHost) {
+ log << " pinned host memory allocations: enabled\n";
+ } else {
+ log << " pinned host memory allocations: disabled\n";
+ }
+ if (flags & hipDeviceLmemResizeToMax) {
+ log << " kernel host memory reuse: enabled\n";
+ } else {
+ log << " kernel host memory reuse: disabled\n";
+ }
+ log << '\n';
+ }
+
+ // set and read the ROCm resource limits.
+ // see the documentation of hipDeviceSetLimit() for more information.
+
+ /*
+ // hipLimitPrintfFifoSize controls the size in bytes of the shared FIFO used by the
+ // printf() device system call.
+ if (printfFifoSize >= 0) {
+ setHipLimit(hipLimitPrintfFifoSize, "hipLimitPrintfFifoSize", printfFifoSize);
+ }
+ */
+ // hipLimitStackSize controls the stack size in bytes of each GPU thread.
+ if (stackSize >= 0) {
+ setHipLimit(hipLimitStackSize, "hipLimitStackSize", stackSize);
+ }
+ // hipLimitMallocHeapSize controls the size in bytes of the heap used by the malloc()
+ // and free() device system calls.
+ if (mallocHeapSize >= 0) {
+ setHipLimit(hipLimitMallocHeapSize, "hipLimitMallocHeapSize", mallocHeapSize);
+ }
+ /*
+ if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
+ // hipLimitDevRuntimeSyncDepth controls the maximum nesting depth of a grid at which
+ // a thread can safely call hipDeviceSynchronize().
+ if (devRuntimeSyncDepth >= 0) {
+ setHipLimit(hipLimitDevRuntimeSyncDepth, "hipLimitDevRuntimeSyncDepth", devRuntimeSyncDepth);
+ }
+ // hipLimitDevRuntimePendingLaunchCount controls the maximum number of outstanding
+ // device runtime launches that can be made from the current device.
+ if (devRuntimePendingLaunchCount >= 0) {
+ setHipLimit(
+ hipLimitDevRuntimePendingLaunchCount, "hipLimitDevRuntimePendingLaunchCount", devRuntimePendingLaunchCount);
+ }
+ }
+ */
+
+ if (verbose_) {
+ size_t value;
+ log << "ROCm limits\n";
+ /*
+ hipCheck(hipDeviceGetLimit(&value, hipLimitPrintfFifoSize));
+ log << " printf buffer size: " << std::setw(10) << value / (1 << 20) << " MB\n";
+ */
+ hipCheck(hipDeviceGetLimit(&value, hipLimitStackSize));
+ log << " stack size: " << std::setw(10) << value / (1 << 10) << " kB\n";
+ hipCheck(hipDeviceGetLimit(&value, hipLimitMallocHeapSize));
+ log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
+ /*
+ if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
+ hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimeSyncDepth));
+ log << " runtime sync depth: " << std::setw(10) << value << '\n';
+ hipCheck(hipDeviceGetLimit(&value, hipLimitDevRuntimePendingLaunchCount));
+ log << " runtime pending launch count: " << std::setw(10) << value << '\n';
+ }
+ */
+ }
+ }
+
+ edm::Service resourceInformationService;
+ if (resourceInformationService.isAvailable()) {
+ std::vector modelsV(models.begin(), models.end());
+ resourceInformationService->setGPUModels(modelsV);
+ /*
+ std::string nvidiaDriverVersion{systemDriverVersion};
+ resourceInformationService->setNvidiaDriverVersion(nvidiaDriverVersion);
+ resourceInformationService->setCudaDriverVersion(driverVersion);
+ resourceInformationService->setCudaRuntimeVersion(runtimeVersion);
+ */
+ }
+
+ if (verbose_) {
+ log << '\n' << "ROCmService fully initialized";
+ }
+ enabled_ = true;
+}
+
+ROCmService::~ROCmService() {
+ if (enabled_) {
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipDeviceSynchronize());
+ // Explicitly destroys and cleans up all resources associated with the current device in the
+ // current process. Any subsequent API call to this device will reinitialize the device.
+ // Useful to check for memory leaks.
+ hipCheck(hipDeviceReset());
+ }
+ }
+}
+
+void ROCmService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.addUntracked("enabled", true);
+ desc.addUntracked("verbose", false);
+
+ edm::ParameterSetDescription limits;
+ /*
+ limits.addUntracked("hipLimitPrintfFifoSize", -1)
+ ->setComment("Size in bytes of the shared FIFO used by the printf() device system call.");
+ */
+ limits.addUntracked("hipLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
+ limits.addUntracked("hipLimitMallocHeapSize", -1)
+ ->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
+ limits.addUntracked("hipLimitDevRuntimeSyncDepth", -1)
+ ->setComment("Maximum nesting depth of a grid at which a thread can safely call hipDeviceSynchronize().");
+ limits.addUntracked("hipLimitDevRuntimePendingLaunchCount", -1)
+ ->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
+ desc.addUntracked("limits", limits)
+ ->setComment(
+ "See the documentation of hipDeviceSetLimit for more information.\nSetting any of these options to -1 keeps "
+ "the default value.");
+
+ descriptions.add("ROCmService", desc);
+}
+
+int ROCmService::deviceWithMostFreeMemory() const {
+ // save the current device
+ int currentDevice;
+ hipCheck(hipGetDevice(¤tDevice));
+
+ size_t maxFreeMemory = 0;
+ int device = -1;
+ for (int i = 0; i < numberOfDevices_; ++i) {
+ size_t freeMemory, totalMemory;
+ hipCheck(hipSetDevice(i));
+ hipCheck(hipMemGetInfo(&freeMemory, &totalMemory));
+ edm::LogPrint("ROCmService") << "ROCm device " << i << ": " << freeMemory / (1 << 20) << " MB free / "
+ << totalMemory / (1 << 20) << " MB total memory";
+ if (freeMemory > maxFreeMemory) {
+ maxFreeMemory = freeMemory;
+ device = i;
+ }
+ }
+ // restore the current device
+ hipCheck(hipSetDevice(currentDevice));
+ return device;
+}
diff --git a/HeterogeneousCore/ROCmServices/test/BuildFile.xml b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
new file mode 100644
index 0000000000000..7fbe8d1931848
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.cpp b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
new file mode 100644
index 0000000000000..06b2c90c6db8b
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/testROCmService.cpp
@@ -0,0 +1,155 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#define CATCH_CONFIG_MAIN
+#include "catch.hpp"
+
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSetReader/interface/ParameterSetReader.h"
+#include "FWCore/PluginManager/interface/PluginManager.h"
+#include "FWCore/PluginManager/interface/standard.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
+#include "FWCore/ServiceRegistry/interface/ServiceToken.h"
+#include "FWCore/Utilities/interface/Exception.h"
+#include "FWCore/Utilities/interface/ResourceInformation.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace {
+ ROCmService makeROCmService(edm::ParameterSet ps) {
+ auto desc = edm::ConfigurationDescriptions("Service", "ROCmService");
+ ROCmService::fillDescriptions(desc);
+ desc.validate(ps, "ROCmService");
+ return ROCmService(ps);
+ }
+} // namespace
+
+TEST_CASE("Tests of ROCmService", "[ROCmService]") {
+ // Test setup: check if a simple ROCm runtime API call fails:
+ // if so, skip the test with the ROCmService enabled
+ int deviceCount = 0;
+ auto ret = hipGetDeviceCount(&deviceCount);
+
+ if (ret != hipSuccess) {
+ WARN("Unable to query the ROCm capable devices from the ROCm runtime API: ("
+ << ret << ") " << hipGetErrorString(ret) << ". Running only tests not requiring devices.");
+ }
+
+ // Make Service system available as ROCmService depends on ResourceInformationService
+ std::vector psets;
+ edm::ServiceToken serviceToken = edm::ServiceRegistry::createSet(psets);
+ edm::ServiceRegistry::Operate operate(serviceToken);
+
+ SECTION("ROCmService enabled") {
+ edm::ParameterSet ps;
+ ps.addUntrackedParameter("enabled", true);
+ SECTION("Enabled only if there are ROCm capable GPUs") {
+ auto cs = makeROCmService(ps);
+ if (deviceCount <= 0) {
+ REQUIRE(cs.enabled() == false);
+ WARN("ROCmService is disabled as there are no ROCm GPU devices");
+ } else {
+ REQUIRE(cs.enabled() == true);
+ INFO("ROCmService is enabled");
+ }
+ }
+
+ if (deviceCount <= 0) {
+ return;
+ }
+
+ auto cs = makeROCmService(ps);
+ int driverVersion = 0, runtimeVersion = 0;
+ ret = hipDriverGetVersion(&driverVersion);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm driver version from the ROCm runtime API: (" << ret << ") "
+ << hipGetErrorString(ret));
+ }
+ ret = hipRuntimeGetVersion(&runtimeVersion);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm runtime API version: (" << ret << ") " << hipGetErrorString(ret));
+ }
+
+ SECTION("ROCm Queries") {
+ WARN("ROCm Driver Version / Runtime Version: " << driverVersion / 1000 << "." << (driverVersion % 100) / 10
+ << " / " << runtimeVersion / 1000 << "."
+ << (runtimeVersion % 100) / 10);
+
+ // Test that the number of devices found by the service
+ // is the same as detected by the ROCm runtime API
+ REQUIRE(cs.numberOfDevices() == deviceCount);
+ WARN("Detected " << cs.numberOfDevices() << " ROCm Capable device(s)");
+
+ // Test that the compute capabilities of each device
+ // are the same as detected by the ROCm runtime API
+ for (int i = 0; i < deviceCount; ++i) {
+ hipDeviceProp_t deviceProp;
+ ret = hipGetDeviceProperties(&deviceProp, i);
+ if (ret != hipSuccess) {
+ FAIL("Unable to query the ROCm properties for device " << i << " from the ROCm runtime API: (" << ret << ") "
+ << hipGetErrorString(ret));
+ }
+
+ REQUIRE(deviceProp.major == cs.computeCapability(i).first);
+ REQUIRE(deviceProp.minor == cs.computeCapability(i).second);
+ INFO("Device " << i << ": " << deviceProp.name << "\n ROCm Capability Major/Minor version number: "
+ << deviceProp.major << "." << deviceProp.minor);
+ }
+ }
+
+ SECTION("ROCmService device free memory") {
+ size_t mem = 0;
+ int dev = -1;
+ for (int i = 0; i < deviceCount; ++i) {
+ size_t free, tot;
+ REQUIRE_NOTHROW(hipCheck(hipSetDevice(i)));
+ REQUIRE_NOTHROW(hipCheck(hipMemGetInfo(&free, &tot)));
+ WARN("Device " << i << " memory total " << tot << " free " << free);
+ if (free > mem) {
+ mem = free;
+ dev = i;
+ }
+ }
+ WARN("Device with most free memory " << dev << "\n"
+ << " as given by ROCmService " << cs.deviceWithMostFreeMemory());
+ }
+
+ SECTION("With ResourceInformationService available") {
+ edmplugin::PluginManager::configure(edmplugin::standard::config());
+
+ std::string const config = R"_(import FWCore.ParameterSet.Config as cms
+process = cms.Process('Test')
+process.add_(cms.Service('ResourceInformationService'))
+)_";
+ std::unique_ptr params;
+ edm::makeParameterSets(config, params);
+ edm::ServiceToken tempToken(edm::ServiceRegistry::createServicesFromConfig(std::move(params)));
+ edm::ServiceRegistry::Operate operate2(tempToken);
+
+ auto cs = makeROCmService(edm::ParameterSet{});
+ REQUIRE(cs.enabled());
+ edm::Service ri;
+ REQUIRE(ri->gpuModels().size() > 0);
+ /*
+ REQUIRE(ri->nvidiaDriverVersion().size() > 0);
+ REQUIRE(ri->cudaDriverVersion() == driverVersion);
+ REQUIRE(ri->cudaRuntimeVersion() == runtimeVersion);
+ */
+ }
+ }
+
+ SECTION("Force to be disabled") {
+ edm::ParameterSet ps;
+ ps.addUntrackedParameter("enabled", false);
+ auto cs = makeROCmService(ps);
+ REQUIRE(cs.enabled() == false);
+ REQUIRE(cs.numberOfDevices() == 0);
+ }
+}
diff --git a/HeterogeneousCore/ROCmServices/test/testROCmService.py b/HeterogeneousCore/ROCmServices/test/testROCmService.py
new file mode 100644
index 0000000000000..d96d02f25be44
--- /dev/null
+++ b/HeterogeneousCore/ROCmServices/test/testROCmService.py
@@ -0,0 +1,20 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process( "TEST" )
+
+process.options = cms.untracked.PSet(
+ numberOfThreads = cms.untracked.uint32( 4 ),
+ numberOfStreams = cms.untracked.uint32( 0 ),
+)
+
+process.load('FWCore.MessageService.MessageLogger_cfi')
+process.MessageLogger.ROCmService = {}
+
+process.load('HeterogeneousCore.ROCmServices.ROCmService_cfi')
+process.ROCmService.verbose = True
+
+process.source = cms.Source("EmptySource")
+
+process.maxEvents = cms.untracked.PSet(
+ input = cms.untracked.int32( 0 )
+)
diff --git a/HeterogeneousTest/ROCmDevice/BuildFile.xml b/HeterogeneousTest/ROCmDevice/BuildFile.xml
new file mode 100644
index 0000000000000..1b2f19e002eff
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/BuildFile.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmDevice/README.md b/HeterogeneousTest/ROCmDevice/README.md
new file mode 100644
index 0000000000000..cb1142be6c3ad
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/README.md
@@ -0,0 +1,46 @@
+# Introduction
+
+The packages `HeterogeneousTest/ROCmDevice`, `HeterogeneousTest/ROCmKernel`,
+`HeterogeneousTest/ROCmWrapper` and `HeterogeneousTest/ROCmOpaque` implement a set of libraries,
+plugins and tests to exercise the build rules for ROCm.
+In particular, these tests show what is supported and what are the limitations implementing
+ROCm-based libraries, and using them from multiple plugins.
+
+
+# `HeterogeneousTest/ROCmDevice`
+
+The package `HeterogeneousTest/ROCmDevice` implements a library that defines and exports ROCm
+device-side functions:
+```c++
+namespace cms::cudatest {
+
+ __device__ void add_vectors_f(...);
+ __device__ void add_vectors_d(...);
+
+} // namespace cms::cudatest
+```
+
+The `plugins` directory implements the `ROCmTestDeviceAdditionModule` `EDAnalyzer` that launches a
+ROCm kernel using the functions defined in ths library. As a byproduct this plugin also shows how
+to split an `EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a
+device part (in a `.cu` file).
+
+The `test` directory implements the `testCudaDeviceAddition` binary that launches a ROCm kernel
+using these functions.
+It also contains the `testROCmTestDeviceAdditionModule.py` python configuration to exercise the
+`ROCmTestDeviceAdditionModule` plugin.
+
+
+# Other packages
+
+For various ways in which this library and plugin can be tested, see also the other
+`HeterogeneousTest/ROCm...` packages:
+ - [`HeterogeneousTest/ROCmKernel/README.md`](../../HeterogeneousTest/ROCmKernel/README.md)
+ - [`HeterogeneousTest/ROCmWrapper/README.md`](../../HeterogeneousTest/ROCmWrapper/README.md)
+ - [`HeterogeneousTest/ROCmOpaque/README.md`](../../HeterogeneousTest/ROCmOpaque/README.md)
+
+
+# Combining plugins
+
+`HeterogeneousTest/ROCmOpaque/test` contains the `testROCmTestAdditionModules.py` python
+configuration that exercise all four plugins in a single application.
diff --git a/HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h b/HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h
new file mode 100644
index 0000000000000..c4df75d1d519e
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h
@@ -0,0 +1,22 @@
+#ifndef HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h
+#define HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h
+
+#include
+
+#include
+
+namespace cms::rocmtest {
+
+ __device__ void add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size);
+
+ __device__ void add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size);
+
+} // namespace cms::rocmtest
+
+#endif // HeterogeneousTest_ROCmDevice_interface_DeviceAddition_h
diff --git a/HeterogeneousTest/ROCmDevice/plugins/BuildFile.xml b/HeterogeneousTest/ROCmDevice/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..d4ea3b3e698af
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h
new file mode 100644
index 0000000000000..17a04ef5d23d0
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.h
@@ -0,0 +1,15 @@
+#ifndef HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h
+#define HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h
+
+#include
+
+namespace HeterogeneousCoreROCmTestDevicePlugins {
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size);
+
+} // namespace HeterogeneousCoreROCmTestDevicePlugins
+
+#endif // HeterogeneousTest_ROCmDevice_plugins_ROCmTestDeviceAdditionAlgo_h
diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc
new file mode 100644
index 0000000000000..3d54ecdf04e83
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionAlgo.hip.cc
@@ -0,0 +1,27 @@
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+#include "ROCmTestDeviceAdditionAlgo.h"
+
+namespace HeterogeneousCoreROCmTestDevicePlugins {
+
+ __global__ void kernel_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ cms::rocmtest::add_vectors_f(in1, in2, out, size);
+ }
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ kernel_add_vectors_f<<<32, 32>>>(in1, in2, out, size);
+ hipCheck(hipGetLastError());
+ }
+
+} // namespace HeterogeneousCoreROCmTestDevicePlugins
diff --git a/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
new file mode 100644
index 0000000000000..7cb12d3b0ce70
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/plugins/ROCmTestDeviceAdditionModule.cc
@@ -0,0 +1,106 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/global/EDAnalyzer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+#include "ROCmTestDeviceAdditionAlgo.h"
+
+class ROCmTestDeviceAdditionModule : public edm::global::EDAnalyzer<> {
+public:
+ explicit ROCmTestDeviceAdditionModule(edm::ParameterSet const& config);
+ ~ROCmTestDeviceAdditionModule() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
+
+private:
+ const uint32_t size_;
+};
+
+ROCmTestDeviceAdditionModule::ROCmTestDeviceAdditionModule(edm::ParameterSet const& config)
+ : size_(config.getParameter("size")) {}
+
+void ROCmTestDeviceAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("size", 1024 * 1024);
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void ROCmTestDeviceAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
+ // require ROCm for running
+ edm::Service cs;
+ if (not cs->enabled()) {
+ std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ return;
+ }
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size_);
+ std::vector in2_h(size_);
+ std::vector out_h(size_);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size_; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ hipCheck(hipMalloc(&in1_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&in2_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&out_d, size_ * sizeof(float)));
+
+ // copy the input data to the device
+ hipCheck(hipMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+ hipCheck(hipMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+
+ // fill the output buffer with zeros
+ hipCheck(hipMemset(out_d, 0, size_ * sizeof(float)));
+
+ // launch the 1-dimensional kernel for vector addition
+ HeterogeneousCoreROCmTestDevicePlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
+
+ // copy the results from the device to the host
+ hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost));
+
+ // wait for all the operations to complete
+ hipCheck(hipDeviceSynchronize());
+
+ // check the results
+ for (size_t i = 0; i < size_; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ assert(out_h[i] < sum + epsilon);
+ assert(out_h[i] > sum - epsilon);
+ }
+
+ std::cout << "All tests passed.\n";
+}
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(ROCmTestDeviceAdditionModule);
diff --git a/HeterogeneousTest/ROCmDevice/src/DeviceAddition.hip.cc b/HeterogeneousTest/ROCmDevice/src/DeviceAddition.hip.cc
new file mode 100644
index 0000000000000..4d9da1d624596
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/src/DeviceAddition.hip.cc
@@ -0,0 +1,34 @@
+#include
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
+
+namespace cms::rocmtest {
+
+ __device__ void add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
+ uint32_t stride = blockDim.x * gridDim.x;
+
+ for (size_t i = thread; i < size; i += stride) {
+ out[i] = in1[i] + in2[i];
+ }
+ }
+
+ __device__ void add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size) {
+ uint32_t thread = threadIdx.x + blockIdx.x * blockDim.x;
+ uint32_t stride = blockDim.x * gridDim.x;
+
+ for (size_t i = thread; i < size; i += stride) {
+ out[i] = in1[i] + in2[i];
+ }
+ }
+
+} // namespace cms::rocmtest
diff --git a/HeterogeneousTest/ROCmDevice/test/BuildFile.xml b/HeterogeneousTest/ROCmDevice/test/BuildFile.xml
new file mode 100644
index 0000000000000..474538ef1509a
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/test/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmDevice/test/testDeviceAddition.hip.cc b/HeterogeneousTest/ROCmDevice/test/testDeviceAddition.hip.cc
new file mode 100644
index 0000000000000..87bce95ecea5c
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/test/testDeviceAddition.hip.cc
@@ -0,0 +1,80 @@
+#include
+#include
+#include
+#include
+
+#define CATCH_CONFIG_MAIN
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
+
+__global__ void kernel_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ cms::rocmtest::add_vectors_f(in1, in2, out, size);
+}
+
+TEST_CASE("HeterogeneousTest/ROCmDevice test", "[rocmTestDeviceAddition]") {
+ cms::rocmtest::requireDevices();
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // buffer size
+ constexpr size_t size = 1024 * 1024;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size);
+ std::vector in2_h(size);
+ std::vector out_h(size);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ SECTION("Test add_vectors_f") {
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in1_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in2_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&out_d, size * sizeof(float))));
+
+ // copy the input data to the device
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in1_d, in1_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in2_d, in2_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+
+ // fill the output buffer with zeros
+ REQUIRE_NOTHROW(hipCheck(hipMemset(out_d, 0, size * sizeof(float))));
+
+ // launch the 1-dimensional kernel for vector addition
+ kernel_add_vectors_f<<<32, 32>>>(in1_d, in2_d, out_d, size);
+ REQUIRE_NOTHROW(hipCheck(hipGetLastError()));
+
+ // copy the results from the device to the host
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(out_h.data(), out_d, size * sizeof(float), hipMemcpyDeviceToHost)));
+
+ // wait for all the operations to complete
+ REQUIRE_NOTHROW(hipCheck(hipDeviceSynchronize()));
+
+ // check the results
+ for (size_t i = 0; i < size; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
+ }
+ }
+}
diff --git a/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py
new file mode 100644
index 0000000000000..5e31e902452f0
--- /dev/null
+++ b/HeterogeneousTest/ROCmDevice/test/testROCmTestDeviceAdditionModule.py
@@ -0,0 +1,15 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process('TestROCmTestDeviceAdditionModule')
+
+process.source = cms.Source('EmptySource')
+
+process.ROCmService = cms.Service('ROCmService')
+
+process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.path = cms.Path(process.rocmTestDeviceAdditionModule)
+
+process.maxEvents.input = 1
diff --git a/HeterogeneousTest/ROCmKernel/BuildFile.xml b/HeterogeneousTest/ROCmKernel/BuildFile.xml
new file mode 100644
index 0000000000000..7282aea1ec4e1
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/BuildFile.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmKernel/README.md b/HeterogeneousTest/ROCmKernel/README.md
new file mode 100644
index 0000000000000..e857d1a5477be
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/README.md
@@ -0,0 +1,46 @@
+# Introduction
+
+The packages `HeterogeneousTest/ROCmDevice`, `HeterogeneousTest/ROCmKernel`,
+`HeterogeneousTest/ROCmWrapper` and `HeterogeneousTest/ROCmOpaque` implement a set of libraries,
+plugins and tests to exercise the build rules for ROCm.
+In particular, these tests show what is supported and what are the limitations implementing
+ROCm-based libraries, and using them from multiple plugins.
+
+
+# `HeterogeneousTest/ROCmKernel`
+
+The package `HeterogeneousTest/ROCmKernel` implements a library that defines and exports ROCm
+kernels that call the device functions defined in the `HeterogeneousTest/ROCmDevice` library:
+```c++
+namespace cms::cudatest {
+
+ __global__ void kernel_add_vectors_f(...);
+ __global__ void kernel_add_vectors_d(...);
+
+} // namespace cms::cudatest
+```
+
+The `plugins` directory implements the `ROCmTestKernelAdditionModule` `EDAnalyzer` that launches the
+ROCm kernels defined in this library. As a byproduct this plugin also shows how to split an
+`EDAnalyzer` or other framework plugin into a host-only part (in a `.cc` file) and a device part (in
+a `.cu` file).
+
+The `test` directory implements the `testCudaKernelAddition` test binary that launches the ROCm kernel
+defined in this library.
+It also contains the `testROCmTestKernelAdditionModule.py` python configuration to exercise the
+`ROCmTestKernelAdditionModule` module.
+
+
+# Other packages
+
+For various ways in which this library and plugin can be tested, see also the other
+`HeterogeneousTest/ROCm...` packages:
+ - [`HeterogeneousTest/ROCmDevice/README.md`](../../HeterogeneousTest/ROCmDevice/README.md)
+ - [`HeterogeneousTest/ROCmWrapper/README.md`](../../HeterogeneousTest/ROCmWrapper/README.md)
+ - [`HeterogeneousTest/ROCmOpaque/README.md`](../../HeterogeneousTest/ROCmOpaque/README.md)
+
+
+# Combining plugins
+
+`HeterogeneousTest/ROCmOpaque/test` contains the `testROCmTestAdditionModules.py` python
+configuration that exercise all four plugins in a single application.
diff --git a/HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h b/HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h
new file mode 100644
index 0000000000000..4f8ba57d08552
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h
@@ -0,0 +1,22 @@
+#ifndef HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h
+#define HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h
+
+#include
+
+#include
+
+namespace cms::rocmtest {
+
+ __global__ void kernel_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size);
+
+ __global__ void kernel_add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size);
+
+} // namespace cms::rocmtest
+
+#endif // HeterogeneousTest_ROCmKernel_interface_DeviceAdditionKernel_h
diff --git a/HeterogeneousTest/ROCmKernel/plugins/BuildFile.xml b/HeterogeneousTest/ROCmKernel/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..41ac7a796a79c
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h
new file mode 100644
index 0000000000000..2461fad80ff17
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.h
@@ -0,0 +1,15 @@
+#ifndef HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h
+#define HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h
+
+#include
+
+namespace HeterogeneousCoreROCmTestKernelPlugins {
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size);
+
+} // namespace HeterogeneousCoreROCmTestKernelPlugins
+
+#endif // HeterogeneousTest_ROCmKernel_plugins_ROCmTestKernelAdditionAlgo_h
diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc
new file mode 100644
index 0000000000000..6239e70905196
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionAlgo.hip.cc
@@ -0,0 +1,20 @@
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+#include "ROCmTestKernelAdditionAlgo.h"
+
+namespace HeterogeneousCoreROCmTestKernelPlugins {
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ cms::rocmtest::kernel_add_vectors_f<<<32, 32>>>(in1, in2, out, size);
+ hipCheck(hipGetLastError());
+ }
+
+} // namespace HeterogeneousCoreROCmTestKernelPlugins
diff --git a/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc
new file mode 100644
index 0000000000000..cab3415e4551d
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/plugins/ROCmTestKernelAdditionModule.cc
@@ -0,0 +1,106 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/global/EDAnalyzer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+#include "ROCmTestKernelAdditionAlgo.h"
+
+class ROCmTestKernelAdditionModule : public edm::global::EDAnalyzer<> {
+public:
+ explicit ROCmTestKernelAdditionModule(edm::ParameterSet const& config);
+ ~ROCmTestKernelAdditionModule() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
+
+private:
+ const uint32_t size_;
+};
+
+ROCmTestKernelAdditionModule::ROCmTestKernelAdditionModule(edm::ParameterSet const& config)
+ : size_(config.getParameter("size")) {}
+
+void ROCmTestKernelAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("size", 1024 * 1024);
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void ROCmTestKernelAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
+ // require ROCm for running
+ edm::Service cs;
+ if (not cs->enabled()) {
+ std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ return;
+ }
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size_);
+ std::vector in2_h(size_);
+ std::vector out_h(size_);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size_; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ hipCheck(hipMalloc(&in1_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&in2_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&out_d, size_ * sizeof(float)));
+
+ // copy the input data to the device
+ hipCheck(hipMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+ hipCheck(hipMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+
+ // fill the output buffer with zeros
+ hipCheck(hipMemset(out_d, 0, size_ * sizeof(float)));
+
+ // launch the 1-dimensional kernel for vector addition
+ HeterogeneousCoreROCmTestKernelPlugins::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
+
+ // copy the results from the device to the host
+ hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost));
+
+ // wait for all the operations to complete
+ hipCheck(hipDeviceSynchronize());
+
+ // check the results
+ for (size_t i = 0; i < size_; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ assert(out_h[i] < sum + epsilon);
+ assert(out_h[i] > sum - epsilon);
+ }
+
+ std::cout << "All tests passed.\n";
+}
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(ROCmTestKernelAdditionModule);
diff --git a/HeterogeneousTest/ROCmKernel/src/DeviceAdditionKernel.hip.cc b/HeterogeneousTest/ROCmKernel/src/DeviceAdditionKernel.hip.cc
new file mode 100644
index 0000000000000..6928328a2446b
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/src/DeviceAdditionKernel.hip.cc
@@ -0,0 +1,24 @@
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmDevice/interface/DeviceAddition.h"
+#include "HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h"
+
+namespace cms::rocmtest {
+
+ __global__ void kernel_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ add_vectors_f(in1, in2, out, size);
+ }
+
+ __global__ void kernel_add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size) {
+ add_vectors_d(in1, in2, out, size);
+ }
+
+} // namespace cms::rocmtest
diff --git a/HeterogeneousTest/ROCmKernel/test/BuildFile.xml b/HeterogeneousTest/ROCmKernel/test/BuildFile.xml
new file mode 100644
index 0000000000000..893f80e2ba861
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/test/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmKernel/test/testDeviceAdditionKernel.hip.cc b/HeterogeneousTest/ROCmKernel/test/testDeviceAdditionKernel.hip.cc
new file mode 100644
index 0000000000000..a31ff6d61d5f4
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/test/testDeviceAdditionKernel.hip.cc
@@ -0,0 +1,73 @@
+#include
+#include
+#include
+#include
+
+#define CATCH_CONFIG_MAIN
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
+
+TEST_CASE("HeterogeneousTest/ROCmKernel test", "[rocmTestKernelAdditionKernel]") {
+ cms::rocmtest::requireDevices();
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // buffer size
+ constexpr size_t size = 1024 * 1024;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size);
+ std::vector in2_h(size);
+ std::vector out_h(size);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ SECTION("Test add_vectors_f") {
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in1_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in2_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&out_d, size * sizeof(float))));
+
+ // copy the input data to the device
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in1_d, in1_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in2_d, in2_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+
+ // fill the output buffer with zeros
+ REQUIRE_NOTHROW(hipCheck(hipMemset(out_d, 0, size * sizeof(float))));
+
+ // launch the 1-dimensional kernel for vector addition
+ cms::rocmtest::kernel_add_vectors_f<<<32, 32>>>(in1_d, in2_d, out_d, size);
+ REQUIRE_NOTHROW(hipCheck(hipGetLastError()));
+
+ // copy the results from the device to the host
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(out_h.data(), out_d, size * sizeof(float), hipMemcpyDeviceToHost)));
+
+ // wait for all the operations to complete
+ REQUIRE_NOTHROW(hipCheck(hipDeviceSynchronize()));
+
+ // check the results
+ for (size_t i = 0; i < size; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
+ }
+ }
+}
diff --git a/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py
new file mode 100644
index 0000000000000..b05991338da3b
--- /dev/null
+++ b/HeterogeneousTest/ROCmKernel/test/testROCmTestKernelAdditionModule.py
@@ -0,0 +1,15 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process('TestROCmTestKernelAdditionModule')
+
+process.source = cms.Source('EmptySource')
+
+process.ROCmService = cms.Service('ROCmService')
+
+process.rocmTestKernelAdditionModule = cms.EDAnalyzer('ROCmTestKernelAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.path = cms.Path(process.rocmTestKernelAdditionModule)
+
+process.maxEvents.input = 1
diff --git a/HeterogeneousTest/ROCmOpaque/BuildFile.xml b/HeterogeneousTest/ROCmOpaque/BuildFile.xml
new file mode 100644
index 0000000000000..1ce9781f55dab
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmOpaque/README.md b/HeterogeneousTest/ROCmOpaque/README.md
new file mode 100644
index 0000000000000..4da89f879e12d
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/README.md
@@ -0,0 +1,46 @@
+# Introduction
+
+The packages `HeterogeneousTest/ROCmDevice`, `HeterogeneousTest/ROCmKernel`,
+`HeterogeneousTest/ROCmWrapper` and `HeterogeneousTest/ROCmOpaque` implement a set of libraries,
+plugins and tests to exercise the build rules for ROCm.
+In particular, these tests show what is supported and what are the limitations implementing
+ROCm-based libraries, and using them from multiple plugins.
+
+
+# `HeterogeneousTest/ROCmOpaque`
+
+The package `HeterogeneousTest/ROCmOpaque` implements a non-ROCm aware library, with functions that
+call the wrappers defined in the `HeterogeneousTest/ROCmWrapper` library:
+```c++
+namespace cms::cudatest {
+
+ void opaque_add_vectors_f(...);
+ void opaque_add_vectors_d(...);
+
+} // namespace cms::cudatest
+```
+
+The `plugins` directory implements the `ROCmTestOpqaueAdditionModule` `EDAnalyzer` that calls the
+function defined in this library. This plugin shows how the function can be used directly from a
+host-only, non-ROCm aware plugin.
+
+The `test` directory implements the `testCudaDeviceAdditionOpqaue` test binary that calls the
+function defined in this library, and shows how they can be used directly from a host-only, non-ROCm
+aware application.
+It also contains the `testROCmTestOpqaueAdditionModule.py` python configuration to exercise the
+`ROCmTestOpqaueAdditionModule` module.
+
+
+# Other packages
+
+For various ways in which this library and plugin can be tested, see also the other
+`HeterogeneousTest/ROCm...` packages:
+ - [`HeterogeneousTest/ROCmDevice/README.md`](../../HeterogeneousTest/ROCmDevice/README.md)
+ - [`HeterogeneousTest/ROCmKernel/README.md`](../../HeterogeneousTest/ROCmKernel/README.md)
+ - [`HeterogeneousTest/ROCmWrapper/README.md`](../../HeterogeneousTest/ROCmWrapper/README.md)
+
+
+# Combining plugins
+
+`HeterogeneousTest/ROCmOpaque/test` contains also the `testROCmTestAdditionModules.py` python
+configuration that exercise all four plugins in a single application.
diff --git a/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h b/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h
new file mode 100644
index 0000000000000..9d4a314bd17c9
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h
@@ -0,0 +1,14 @@
+#ifndef HeterogeneousTest_ROCmOpaque_interface_DeviceAdditionOpaque_h
+#define HeterogeneousTest_ROCmOpaque_interface_DeviceAdditionOpaque_h
+
+#include
+
+namespace cms::rocmtest {
+
+ void opqaue_add_vectors_f(const float* in1, const float* in2, float* out, size_t size);
+
+ void opqaue_add_vectors_d(const double* in1, const double* in2, double* out, size_t size);
+
+} // namespace cms::rocmtest
+
+#endif // HeterogeneousTest_ROCmOpaque_interface_DeviceAdditionOpaque_h
diff --git a/HeterogeneousTest/ROCmOpaque/plugins/BuildFile.xml b/HeterogeneousTest/ROCmOpaque/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..91fdd97658824
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc
new file mode 100644
index 0000000000000..901b6eac51122
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/plugins/ROCmTestOpaqueAdditionModule.cc
@@ -0,0 +1,81 @@
+#include
+#include
+#include
+#include
+#include
+
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/global/EDAnalyzer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h"
+
+class ROCmTestOpaqueAdditionModule : public edm::global::EDAnalyzer<> {
+public:
+ explicit ROCmTestOpaqueAdditionModule(edm::ParameterSet const& config);
+ ~ROCmTestOpaqueAdditionModule() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
+
+private:
+ const uint32_t size_;
+};
+
+ROCmTestOpaqueAdditionModule::ROCmTestOpaqueAdditionModule(edm::ParameterSet const& config)
+ : size_(config.getParameter("size")) {}
+
+void ROCmTestOpaqueAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("size", 1024 * 1024);
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void ROCmTestOpaqueAdditionModule::analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const {
+ // require ROCm for running
+ edm::Service cs;
+ if (not cs->enabled()) {
+ std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ return;
+ }
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // allocate input and output host buffers
+ std::vector in1(size_);
+ std::vector in2(size_);
+ std::vector out(size_);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size_; ++i) {
+ in1[i] = dist(rand);
+ in2[i] = dist(rand);
+ out[i] = 0.;
+ }
+
+ // launch the 1-dimensional kernel for vector addition
+ cms::rocmtest::opqaue_add_vectors_f(in1.data(), in2.data(), out.data(), size_);
+
+ // check the results
+ for (size_t i = 0; i < size_; ++i) {
+ float sum = in1[i] + in2[i];
+ assert(out[i] < sum + epsilon);
+ assert(out[i] > sum - epsilon);
+ }
+
+ std::cout << "All tests passed.\n";
+}
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(ROCmTestOpaqueAdditionModule);
diff --git a/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc b/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc
new file mode 100644
index 0000000000000..3de89369df8a9
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/src/DeviceAdditionOpaque.cc
@@ -0,0 +1,73 @@
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h"
+#include "HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace cms::rocmtest {
+
+ void opqaue_add_vectors_f(const float* in1_h, const float* in2_h, float* out_h, size_t size) {
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ hipCheck(hipMalloc(&in1_d, size * sizeof(float)));
+ hipCheck(hipMalloc(&in2_d, size * sizeof(float)));
+ hipCheck(hipMalloc(&out_d, size * sizeof(float)));
+
+ // copy the input data to the device
+ hipCheck(hipMemcpy(in1_d, in1_h, size * sizeof(float), hipMemcpyHostToDevice));
+ hipCheck(hipMemcpy(in2_d, in2_h, size * sizeof(float), hipMemcpyHostToDevice));
+
+ // fill the output buffer with zeros
+ hipCheck(hipMemset(out_d, 0, size * sizeof(float)));
+
+ // launch the 1-dimensional kernel for vector addition
+ wrapper_add_vectors_f(in1_d, in2_d, out_d, size);
+
+ // copy the results from the device to the host
+ hipCheck(hipMemcpy(out_h, out_d, size * sizeof(float), hipMemcpyDeviceToHost));
+
+ // wait for all the operations to complete
+ hipCheck(hipDeviceSynchronize());
+
+ // free the input and output buffers on the device
+ hipCheck(hipFree(in1_d));
+ hipCheck(hipFree(in2_d));
+ hipCheck(hipFree(out_d));
+ }
+
+ void opqaue_add_vectors_d(const double* in1_h, const double* in2_h, double* out_h, size_t size) {
+ // allocate input and output buffers on the device
+ double* in1_d;
+ double* in2_d;
+ double* out_d;
+ hipCheck(hipMalloc(&in1_d, size * sizeof(double)));
+ hipCheck(hipMalloc(&in2_d, size * sizeof(double)));
+ hipCheck(hipMalloc(&out_d, size * sizeof(double)));
+
+ // copy the input data to the device
+ hipCheck(hipMemcpy(in1_d, in1_h, size * sizeof(double), hipMemcpyHostToDevice));
+ hipCheck(hipMemcpy(in2_d, in2_h, size * sizeof(double), hipMemcpyHostToDevice));
+
+ // fill the output buffer with zeros
+ hipCheck(hipMemset(out_d, 0, size * sizeof(double)));
+
+ // launch the 1-dimensional kernel for vector addition
+ wrapper_add_vectors_d(in1_d, in2_d, out_d, size);
+
+ // copy the results from the device to the host
+ hipCheck(hipMemcpy(out_h, out_d, size * sizeof(double), hipMemcpyDeviceToHost));
+
+ // wait for all the operations to complete
+ hipCheck(hipDeviceSynchronize());
+
+ // free the input and output buffers on the device
+ hipCheck(hipFree(in1_d));
+ hipCheck(hipFree(in2_d));
+ hipCheck(hipFree(out_d));
+ }
+
+} // namespace cms::rocmtest
diff --git a/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml b/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml
new file mode 100644
index 0000000000000..a26e1a8a43b05
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/test/BuildFile.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc b/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc
new file mode 100644
index 0000000000000..c3ea68dbce85d
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/test/testDeviceAdditionOpaque.cc
@@ -0,0 +1,48 @@
+#include
+#include
+#include
+#include
+
+#define CATCH_CONFIG_MAIN
+#include
+
+#include "HeterogeneousTest/ROCmOpaque/interface/DeviceAdditionOpaque.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
+
+TEST_CASE("HeterogeneousTest/ROCmOpaque test", "[rocmTestOpaqueAdditionOpaque]") {
+ cms::rocmtest::requireDevices();
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // buffer size
+ constexpr size_t size = 1024 * 1024;
+
+ // allocate input and output host buffers
+ std::vector in1(size);
+ std::vector in2(size);
+ std::vector out(size);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size; ++i) {
+ in1[i] = dist(rand);
+ in2[i] = dist(rand);
+ out[i] = 0.;
+ }
+
+ SECTION("Test add_vectors_f") {
+ // launch the 1-dimensional kernel for vector addition
+ REQUIRE_NOTHROW(cms::rocmtest::opqaue_add_vectors_f(in1.data(), in2.data(), out.data(), size));
+
+ // check the results
+ for (size_t i = 0; i < size; ++i) {
+ float sum = in1[i] + in2[i];
+ CHECK_THAT(out[i], Catch::Matchers::WithinAbs(sum, epsilon));
+ }
+ }
+}
diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py
new file mode 100644
index 0000000000000..2ae6853a8e7ee
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestAdditionModules.py
@@ -0,0 +1,31 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process('TestROCmTestOpaqueAdditionModule')
+
+process.source = cms.Source('EmptySource')
+
+process.ROCmService = cms.Service('ROCmService')
+
+process.rocmTestDeviceAdditionModule = cms.EDAnalyzer('ROCmTestDeviceAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.rocmTestKernelAdditionModule = cms.EDAnalyzer('ROCmTestKernelAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.rocmTestWrapperAdditionModule = cms.EDAnalyzer('ROCmTestWrapperAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.rocmTestOpaqueAdditionModule = cms.EDAnalyzer('ROCmTestOpaqueAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.path = cms.Path(
+ process.rocmTestDeviceAdditionModule +
+ process.rocmTestKernelAdditionModule +
+ process.rocmTestWrapperAdditionModule +
+ process.rocmTestOpaqueAdditionModule)
+
+process.maxEvents.input = 1
diff --git a/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py
new file mode 100644
index 0000000000000..05c4bf20d3f17
--- /dev/null
+++ b/HeterogeneousTest/ROCmOpaque/test/testROCmTestOpaqueAdditionModule.py
@@ -0,0 +1,15 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process('TestROCmTestOpaqueAdditionModule')
+
+process.source = cms.Source('EmptySource')
+
+process.ROCmService = cms.Service('ROCmService')
+
+process.rocmTestOpaqueAdditionModule = cms.EDAnalyzer('ROCmTestOpaqueAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.path = cms.Path(process.rocmTestOpaqueAdditionModule)
+
+process.maxEvents.input = 1
diff --git a/HeterogeneousTest/ROCmWrapper/BuildFile.xml b/HeterogeneousTest/ROCmWrapper/BuildFile.xml
new file mode 100644
index 0000000000000..4e9ba08b32e57
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmWrapper/README.md b/HeterogeneousTest/ROCmWrapper/README.md
new file mode 100644
index 0000000000000..4ccdcea86958f
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/README.md
@@ -0,0 +1,48 @@
+# Introduction
+
+The packages `HeterogeneousTest/ROCmDevice`, `HeterogeneousTest/ROCmKernel`,
+`HeterogeneousTest/ROCmWrapper` and `HeterogeneousTest/ROCmOpaque` implement a set of libraries,
+plugins and tests to exercise the build rules for ROCm.
+In particular, these tests show what is supported and what are the limitations implementing
+ROCm-based libraries, and using them from multiple plugins.
+
+
+# `HeterogeneousTest/ROCmWrapper`
+
+The package `HeterogeneousTest/ROCmWrapper` implements a library that defines and exports host-side
+wrappers that launch the kernels defined in the `HeterogeneousTest/ROCmKernel` library:
+```c++
+namespace cms::cudatest {
+
+ void wrapper_add_vectors_f(...);
+ void wrapper_add_vectors_d(...);
+
+} // namespace cms::cudatest
+```
+These wrappers can be used from host-only, non-ROCm aware libraries, plugins and applications. They
+can be linked with the standard host linker.
+
+The `plugins` directory implements the `ROCmTestWrapperAdditionModule` `EDAnalyzer` that calls the
+wrappers defined in this library. This plugin shows how the wrappers can be used directly from a
+host-only, non-ROCm aware plugin.
+
+The `test` directory implements the `testCudaDeviceAdditionWrapper` test binary that calls the
+wrappers defined in this library, and shows how they can be used directly from a host-only, non-ROCm
+aware application.
+It also contains the `testROCmTestWrapperAdditionModule.py` python configuration to exercise the
+`ROCmTestWrapperAdditionModule` module.
+
+
+# Other packages
+
+For various ways in which this library and plugin can be tested, see also the other
+`HeterogeneousTest/ROCm...` packages:
+ - [`HeterogeneousTest/ROCmDevice/README.md`](../../HeterogeneousTest/ROCmDevice/README.md)
+ - [`HeterogeneousTest/ROCmKernel/README.md`](../../HeterogeneousTest/ROCmKernel/README.md)
+ - [`HeterogeneousTest/ROCmOpaque/README.md`](../../HeterogeneousTest/ROCmOpaque/README.md)
+
+
+# Combining plugins
+
+`HeterogeneousTest/ROCmOpaque/test` contains the `testROCmTestAdditionModules.py` python
+configuration that exercise all four plugins in a single application.
diff --git a/HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h b/HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h
new file mode 100644
index 0000000000000..2dc6aadaa8a96
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h
@@ -0,0 +1,20 @@
+#ifndef HeterogeneousTest_ROCmWrapper_interface_DeviceAdditionWrapper_h
+#define HeterogeneousTest_ROCmWrapper_interface_DeviceAdditionWrapper_h
+
+#include
+
+namespace cms::rocmtest {
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size);
+
+ void wrapper_add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size);
+
+} // namespace cms::rocmtest
+
+#endif // HeterogeneousTest_ROCmWrapper_interface_DeviceAdditionWrapper_h
diff --git a/HeterogeneousTest/ROCmWrapper/plugins/BuildFile.xml b/HeterogeneousTest/ROCmWrapper/plugins/BuildFile.xml
new file mode 100644
index 0000000000000..e00d97b1654c2
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/plugins/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc
new file mode 100644
index 0000000000000..48b2b9dc91a20
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/plugins/ROCmTestWrapperAdditionModule.cc
@@ -0,0 +1,107 @@
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/Frameworkfwd.h"
+#include "FWCore/Framework/interface/global/EDAnalyzer.h"
+#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
+#include "FWCore/ServiceRegistry/interface/Service.h"
+#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
+#include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+class ROCmTestWrapperAdditionModule : public edm::global::EDAnalyzer<> {
+public:
+ explicit ROCmTestWrapperAdditionModule(edm::ParameterSet const& config);
+ ~ROCmTestWrapperAdditionModule() override = default;
+
+ static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
+
+ void analyze(edm::StreamID, edm::Event const& event, edm::EventSetup const& setup) const override;
+
+private:
+ const uint32_t size_;
+};
+
+ROCmTestWrapperAdditionModule::ROCmTestWrapperAdditionModule(edm::ParameterSet const& config)
+ : size_(config.getParameter("size")) {}
+
+void ROCmTestWrapperAdditionModule::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
+ edm::ParameterSetDescription desc;
+ desc.add("size", 1024 * 1024);
+ descriptions.addWithDefaultLabel(desc);
+}
+
+void ROCmTestWrapperAdditionModule::analyze(edm::StreamID,
+ edm::Event const& event,
+ edm::EventSetup const& setup) const {
+ // require ROCm for running
+ edm::Service cs;
+ if (not cs->enabled()) {
+ std::cout << "The ROCmService is disabled, the test will be skipped.\n";
+ return;
+ }
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size_);
+ std::vector in2_h(size_);
+ std::vector out_h(size_);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size_; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ hipCheck(hipMalloc(&in1_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&in2_d, size_ * sizeof(float)));
+ hipCheck(hipMalloc(&out_d, size_ * sizeof(float)));
+
+ // copy the input data to the device
+ hipCheck(hipMemcpy(in1_d, in1_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+ hipCheck(hipMemcpy(in2_d, in2_h.data(), size_ * sizeof(float), hipMemcpyHostToDevice));
+
+ // fill the output buffer with zeros
+ hipCheck(hipMemset(out_d, 0, size_ * sizeof(float)));
+
+ // launch the 1-dimensional kernel for vector addition
+ cms::rocmtest::wrapper_add_vectors_f(in1_d, in2_d, out_d, size_);
+
+ // copy the results from the device to the host
+ hipCheck(hipMemcpy(out_h.data(), out_d, size_ * sizeof(float), hipMemcpyDeviceToHost));
+
+ // wait for all the operations to complete
+ hipCheck(hipDeviceSynchronize());
+
+ // check the results
+ for (size_t i = 0; i < size_; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ assert(out_h[i] < sum + epsilon);
+ assert(out_h[i] > sum - epsilon);
+ }
+
+ std::cout << "All tests passed.\n";
+}
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(ROCmTestWrapperAdditionModule);
diff --git a/HeterogeneousTest/ROCmWrapper/src/DeviceAdditionWrapper.hip.cc b/HeterogeneousTest/ROCmWrapper/src/DeviceAdditionWrapper.hip.cc
new file mode 100644
index 0000000000000..fcf7d41bd4974
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/src/DeviceAdditionWrapper.hip.cc
@@ -0,0 +1,29 @@
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmKernel/interface/DeviceAdditionKernel.h"
+#include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+
+namespace cms::rocmtest {
+
+ void wrapper_add_vectors_f(const float* __restrict__ in1,
+ const float* __restrict__ in2,
+ float* __restrict__ out,
+ size_t size) {
+ // launch the 1-dimensional kernel for vector addition
+ kernel_add_vectors_f<<<32, 32>>>(in1, in2, out, size);
+ hipCheck(hipGetLastError());
+ }
+
+ void wrapper_add_vectors_d(const double* __restrict__ in1,
+ const double* __restrict__ in2,
+ double* __restrict__ out,
+ size_t size) {
+ // launch the 1-dimensional kernel for vector addition
+ kernel_add_vectors_d<<<32, 32>>>(in1, in2, out, size);
+ hipCheck(hipGetLastError());
+ }
+
+} // namespace cms::rocmtest
diff --git a/HeterogeneousTest/ROCmWrapper/test/BuildFile.xml b/HeterogeneousTest/ROCmWrapper/test/BuildFile.xml
new file mode 100644
index 0000000000000..6cc8125fcaa27
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/test/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/HeterogeneousTest/ROCmWrapper/test/testDeviceAdditionWrapper.cc b/HeterogeneousTest/ROCmWrapper/test/testDeviceAdditionWrapper.cc
new file mode 100644
index 0000000000000..b34cd285b7373
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/test/testDeviceAdditionWrapper.cc
@@ -0,0 +1,72 @@
+#include
+#include
+#include
+#include
+
+#define CATCH_CONFIG_MAIN
+#include
+
+#include
+
+#include "HeterogeneousTest/ROCmWrapper/interface/DeviceAdditionWrapper.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h"
+#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h"
+
+TEST_CASE("HeterogeneousTest/ROCmWrapper test", "[rocmTestWrapperAdditionWrapper]") {
+ cms::rocmtest::requireDevices();
+
+ // random number generator with a gaussian distribution
+ std::random_device rd{};
+ std::default_random_engine rand{rd()};
+ std::normal_distribution dist{0., 1.};
+
+ // tolerance
+ constexpr float epsilon = 0.000001;
+
+ // buffer size
+ constexpr size_t size = 1024 * 1024;
+
+ // allocate input and output host buffers
+ std::vector in1_h(size);
+ std::vector in2_h(size);
+ std::vector out_h(size);
+
+ // fill the input buffers with random data, and the output buffer with zeros
+ for (size_t i = 0; i < size; ++i) {
+ in1_h[i] = dist(rand);
+ in2_h[i] = dist(rand);
+ out_h[i] = 0.;
+ }
+
+ SECTION("Test add_vectors_f") {
+ // allocate input and output buffers on the device
+ float* in1_d;
+ float* in2_d;
+ float* out_d;
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in1_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&in2_d, size * sizeof(float))));
+ REQUIRE_NOTHROW(hipCheck(hipMalloc(&out_d, size * sizeof(float))));
+
+ // copy the input data to the device
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in1_d, in1_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(in2_d, in2_h.data(), size * sizeof(float), hipMemcpyHostToDevice)));
+
+ // fill the output buffer with zeros
+ REQUIRE_NOTHROW(hipCheck(hipMemset(out_d, 0, size * sizeof(float))));
+
+ // launch the 1-dimensional kernel for vector addition
+ REQUIRE_NOTHROW(cms::rocmtest::wrapper_add_vectors_f(in1_d, in2_d, out_d, size));
+
+ // copy the results from the device to the host
+ REQUIRE_NOTHROW(hipCheck(hipMemcpy(out_h.data(), out_d, size * sizeof(float), hipMemcpyDeviceToHost)));
+
+ // wait for all the operations to complete
+ REQUIRE_NOTHROW(hipCheck(hipDeviceSynchronize()));
+
+ // check the results
+ for (size_t i = 0; i < size; ++i) {
+ float sum = in1_h[i] + in2_h[i];
+ CHECK_THAT(out_h[i], Catch::Matchers::WithinAbs(sum, epsilon));
+ }
+ }
+}
diff --git a/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py
new file mode 100644
index 0000000000000..b493b484ed82a
--- /dev/null
+++ b/HeterogeneousTest/ROCmWrapper/test/testROCmTestWrapperAdditionModule.py
@@ -0,0 +1,15 @@
+import FWCore.ParameterSet.Config as cms
+
+process = cms.Process('TestROCmTestWrapperAdditionModule')
+
+process.source = cms.Source('EmptySource')
+
+process.ROCmService = cms.Service('ROCmService')
+
+process.rocmTestWrapperAdditionModule = cms.EDAnalyzer('ROCmTestWrapperAdditionModule',
+ size = cms.uint32( 1024*1024 )
+)
+
+process.path = cms.Path(process.rocmTestWrapperAdditionModule)
+
+process.maxEvents.input = 1