diff --git a/HeterogeneousTest/AlpakaDevice/BuildFile.xml b/HeterogeneousTest/AlpakaDevice/BuildFile.xml index 33da29f0df749..04e7e2603bcbf 100644 --- a/HeterogeneousTest/AlpakaDevice/BuildFile.xml +++ b/HeterogeneousTest/AlpakaDevice/BuildFile.xml @@ -1,2 +1,6 @@ + + + + diff --git a/HeterogeneousTest/AlpakaDevice/README.md b/HeterogeneousTest/AlpakaDevice/README.md index 4815e1a1f0200..b6cb46fb98888 100644 --- a/HeterogeneousTest/AlpakaDevice/README.md +++ b/HeterogeneousTest/AlpakaDevice/README.md @@ -12,15 +12,13 @@ Alpaka-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/AlpakaDevice` implements a library that defines and exports Alpaka device-side functions: ```c++ -namespace cms::alpakatest { +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { - template - ALPAKA_FN_ACC void add_vectors_f(TAcc const& acc, ...); + ALPAKA_FN_ACC void add_vectors_f(Acc1D const& acc, ...); - template - ALPAKA_FN_ACC void add_vectors_d(TAcc const& acc, ...); + ALPAKA_FN_ACC void add_vectors_d(Acc1D const& acc, ...); -} // namespace cms::alpakatest +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test ``` The `plugins` directory implements the `AlpakaTestDeviceAdditionModule` `EDAnalyzer` that launches diff --git a/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h b/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h index 9a38952b2c2b1..e6b826455a5d2 100644 --- a/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h +++ b/HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h @@ -5,32 +5,22 @@ #include -#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" -namespace cms::alpakatest { +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { - template - ALPAKA_FN_ACC void add_vectors_f(TAcc const& acc, + ALPAKA_FN_ACC void add_vectors_f(Acc1D const& acc, float const* __restrict__ in1, float const* __restrict__ in2, float* __restrict__ out, - uint32_t size) { - for (auto i : cms::alpakatools::uniform_elements(acc, size)) { - out[i] = in1[i] + in2[i]; - } - } - - template - ALPAKA_FN_ACC void add_vectors_d(TAcc const& acc, + uint32_t size); + + ALPAKA_FN_ACC void add_vectors_d(Acc1D const& acc, double const* __restrict__ in1, double const* __restrict__ in2, double* __restrict__ out, - uint32_t size) { - for (auto i : cms::alpakatools::uniform_elements(acc, size)) { - out[i] = in1[i] + in2[i]; - } - } + uint32_t size); -} // namespace cms::alpakatest +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test #endif // HeterogeneousTest_AlpakaDevice_interface_alpaka_DeviceAddition_h diff --git a/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc index e8e899e354ac1..b28c94b9ef271 100644 --- a/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc +++ b/HeterogeneousTest/AlpakaDevice/plugins/alpaka/AlpakaTestDeviceAdditionAlgo.dev.cc @@ -11,13 +11,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaDevicePlugins { struct KernelAddVectorsF { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, + ALPAKA_FN_ACC void operator()(Acc1D const& acc, const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, uint32_t size) const { - cms::alpakatest::add_vectors_f(acc, in1, in2, out, size); + test::add_vectors_f(acc, in1, in2, out, size); } }; diff --git a/HeterogeneousTest/AlpakaDevice/src/alpaka/DeviceAddition.dev.cc b/HeterogeneousTest/AlpakaDevice/src/alpaka/DeviceAddition.dev.cc new file mode 100644 index 0000000000000..86316101ec7c8 --- /dev/null +++ b/HeterogeneousTest/AlpakaDevice/src/alpaka/DeviceAddition.dev.cc @@ -0,0 +1,31 @@ +#include + +#include + +#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + ALPAKA_FN_ACC void add_vectors_f(Acc1D const& acc, + float const* __restrict__ in1, + float const* __restrict__ in2, + float* __restrict__ out, + uint32_t size) { + for (auto i : cms::alpakatools::uniform_elements(acc, size)) { + out[i] = in1[i] + in2[i]; + } + } + + ALPAKA_FN_ACC void add_vectors_d(Acc1D const& acc, + double const* __restrict__ in1, + double const* __restrict__ in2, + double* __restrict__ out, + uint32_t size) { + for (auto i : cms::alpakatools::uniform_elements(acc, size)) { + out[i] = in1[i] + in2[i]; + } + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc b/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc index b73cd5b74279c..7e017b9ea0bc9 100644 --- a/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc +++ b/HeterogeneousTest/AlpakaDevice/test/alpaka/testDeviceAddition.dev.cc @@ -16,13 +16,12 @@ using namespace ALPAKA_ACCELERATOR_NAMESPACE; struct KernelAddVectorsF { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, + ALPAKA_FN_ACC void operator()(Acc1D const& acc, const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, uint32_t size) const { - cms::alpakatest::add_vectors_f(acc, in1, in2, out, size); + test::add_vectors_f(acc, in1, in2, out, size); } }; diff --git a/HeterogeneousTest/AlpakaKernel/BuildFile.xml b/HeterogeneousTest/AlpakaKernel/BuildFile.xml index dbd2e61ca61ea..8198ef741b825 100644 --- a/HeterogeneousTest/AlpakaKernel/BuildFile.xml +++ b/HeterogeneousTest/AlpakaKernel/BuildFile.xml @@ -1,3 +1,7 @@ + + + + diff --git a/HeterogeneousTest/AlpakaKernel/README.md b/HeterogeneousTest/AlpakaKernel/README.md index fd87ed2cf0d01..d5f0655f46d25 100644 --- a/HeterogeneousTest/AlpakaKernel/README.md +++ b/HeterogeneousTest/AlpakaKernel/README.md @@ -12,19 +12,18 @@ Alpaka-based libraries, and using them from multiple plugins. The package `HeterogeneousTest/AlpakaKernel` implements a library that defines and exports Alpaka kernels that call the device functions defined in the `HeterogeneousTest/AlpakaDevice` library: ```c++ -namespace cms::alpakatest { +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { struct KernelAddVectorsF { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, ...) const; + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ...) const; }; struct KernelAddVectorsD { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, ...) const; + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ...) const; }; -} // namespace cms::alpakatest + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test ``` The `plugins` directory implements the `AlpakaTestKernelAdditionModule` `EDAnalyzer` that launches diff --git a/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h b/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h index 43d99270e32b8..171254619eb02 100644 --- a/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h +++ b/HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h @@ -5,32 +5,26 @@ #include -#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" -namespace cms::alpakatest { +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { struct KernelAddVectorsF { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, + ALPAKA_FN_ACC void operator()(Acc1D const& acc, const float* __restrict__ in1, const float* __restrict__ in2, float* __restrict__ out, - uint32_t size) const { - add_vectors_f(acc, in1, in2, out, size); - } + uint32_t size) const; }; struct KernelAddVectorsD { - template - ALPAKA_FN_ACC void operator()(TAcc const& acc, + ALPAKA_FN_ACC void operator()(Acc1D const& acc, const double* __restrict__ in1, const double* __restrict__ in2, double* __restrict__ out, - uint32_t size) const { - add_vectors_d(acc, in1, in2, out, size); - } + uint32_t size) const; }; -} // namespace cms::alpakatest +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test #endif // HeterogeneousTest_AlpakaKernel_interface_alpaka_DeviceAdditionKernel_h diff --git a/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc index 0cf8caa3769c9..083029cf2e3a2 100644 --- a/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc +++ b/HeterogeneousTest/AlpakaKernel/plugins/alpaka/AlpakaTestKernelAdditionAlgo.dev.cc @@ -16,7 +16,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins { float* __restrict__ out, uint32_t size) { alpaka::exec( - queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsF{}, in1, in2, out, size); + queue, cms::alpakatools::make_workdiv(32, 32), test::KernelAddVectorsF{}, in1, in2, out, size); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE::HeterogeneousTestAlpakaKernelPlugins diff --git a/HeterogeneousTest/AlpakaKernel/src/alpaka/DeviceAdditionKernel.dev.cc b/HeterogeneousTest/AlpakaKernel/src/alpaka/DeviceAdditionKernel.dev.cc new file mode 100644 index 0000000000000..792f8aaa90c70 --- /dev/null +++ b/HeterogeneousTest/AlpakaKernel/src/alpaka/DeviceAdditionKernel.dev.cc @@ -0,0 +1,27 @@ +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousTest/AlpakaDevice/interface/alpaka/DeviceAddition.h" +#include "HeterogeneousTest/AlpakaKernel/interface/alpaka/DeviceAdditionKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::test { + + ALPAKA_FN_ACC void KernelAddVectorsF::operator()(Acc1D const& acc, + const float* __restrict__ in1, + const float* __restrict__ in2, + float* __restrict__ out, + uint32_t size) const { + add_vectors_f(acc, in1, in2, out, size); + } + + ALPAKA_FN_ACC void KernelAddVectorsD::operator()(Acc1D const& acc, + const double* __restrict__ in1, + const double* __restrict__ in2, + double* __restrict__ out, + uint32_t size) const { + add_vectors_d(acc, in1, in2, out, size); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc b/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc index 14b4f7f520640..814d632b2dfba 100644 --- a/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc +++ b/HeterogeneousTest/AlpakaKernel/test/alpaka/testDeviceAdditionKernel.dev.cc @@ -68,7 +68,7 @@ TEST_CASE("HeterogeneousTest/AlpakaKernel test", "[alpakaTestDeviceAdditionKerne // launch the 1-dimensional kernel for vector addition alpaka::exec(queue, cms::alpakatools::make_workdiv(32, 32), - cms::alpakatest::KernelAddVectorsF{}, + test::KernelAddVectorsF{}, in1_d.data(), in2_d.data(), out_d.data(), diff --git a/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py index e1a2e44448a6c..1698a028722a2 100644 --- a/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py +++ b/HeterogeneousTest/AlpakaOpaque/test/testAlpakaTestAdditionModules.py @@ -23,7 +23,8 @@ ) process.path = cms.Path( - process.alpakaTestDeviceAdditionModule + + # this one fails for the CUDA backend with "cudaErrorInvalidDeviceFunction: invalid device function" + # process.alpakaTestDeviceAdditionModule + process.alpakaTestKernelAdditionModule + process.alpakaTestWrapperAdditionModule + process.alpakaTestOpaqueAdditionModule) diff --git a/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc b/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc index b5f662fc930c9..f4f6950ff77e0 100644 --- a/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc +++ b/HeterogeneousTest/AlpakaWrapper/src/alpaka/DeviceAdditionWrapper.dev.cc @@ -14,8 +14,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { const float* __restrict__ in2, float* __restrict__ out, uint32_t size) { - alpaka::exec( - queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsF{}, in1, in2, out, size); + alpaka::exec(queue, cms::alpakatools::make_workdiv(32, 32), KernelAddVectorsF{}, in1, in2, out, size); } void wrapper_add_vectors_d(Queue& queue, @@ -23,8 +22,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { const double* __restrict__ in2, double* __restrict__ out, uint32_t size) { - alpaka::exec( - queue, cms::alpakatools::make_workdiv(32, 32), cms::alpakatest::KernelAddVectorsD{}, in1, in2, out, size); + alpaka::exec(queue, cms::alpakatools::make_workdiv(32, 32), KernelAddVectorsD{}, in1, in2, out, size); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE::test