diff --git a/DataFormats/Portable/test/BuildFile.xml b/DataFormats/Portable/test/BuildFile.xml index e2684fc029bf4..464cf210fe23d 100644 --- a/DataFormats/Portable/test/BuildFile.xml +++ b/DataFormats/Portable/test/BuildFile.xml @@ -22,3 +22,12 @@ + + + + + + + + + diff --git a/DataFormats/Portable/test/alpaka/test_catch2_SoAMethods.dev.cc b/DataFormats/Portable/test/alpaka/test_catch2_SoAMethods.dev.cc new file mode 100644 index 0000000000000..87ef0b727aaa4 --- /dev/null +++ b/DataFormats/Portable/test/alpaka/test_catch2_SoAMethods.dev.cc @@ -0,0 +1,179 @@ +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +GENERATE_SOA_LAYOUT(SoATemplate, + SOA_COLUMN(float, x), + SOA_COLUMN(float, y), + SOA_COLUMN(float, z), + SOA_COLUMN(double, v_x), + SOA_COLUMN(double, v_y), + SOA_COLUMN(double, v_z), + + SOA_ELEMENT_METHODS( + + SOA_HOST_DEVICE void normalise() { + float norm_position = square_norm_position(); + if (norm_position > 0.0f) { + x() /= norm_position; + y() /= norm_position; + z() /= norm_position; + }; + double norm_velocity = square_norm_velocity(); + if (norm_velocity > 0.0f) { + v_x() /= norm_velocity; + v_y() /= norm_velocity; + v_z() /= norm_velocity; + }; + }), + + SOA_CONST_ELEMENT_METHODS( + SOA_HOST_DEVICE float square_norm_position() + const { return sqrt(x() * x() + y() * y() + z() * z()); }; + + SOA_HOST_DEVICE double square_norm_velocity() + const { return sqrt(v_x() * v_x() + v_y() * v_y() + v_z() * v_z()); }; + + template + SOA_HOST_DEVICE static auto time(T1 pos, T2 vel) { + if (not(vel == 0)) + return pos / vel; + return 0.; + }), + + SOA_SCALAR(int, detectorType)) + +using SoA = SoATemplate<>; +using SoAView = SoA::View; +using SoAConstView = SoA::ConstView; + +GENERATE_SOA_LAYOUT(ResultTemplate, + SOA_COLUMN(float, positionNorm), + SOA_COLUMN(double, velocityNorm), + SOA_COLUMN(double, times)) + +using ResultSoA = ResultTemplate<>; +using ResultView = ResultSoA::View; + +struct calculateNorm { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, SoAConstView soaConstView, ResultView resultView) const { + for (auto i : cms::alpakatools::uniform_elements(acc, soaConstView.metadata().size())) { + resultView[i].positionNorm() = soaConstView[i].square_norm_position(); + resultView[i].velocityNorm() = soaConstView[i].square_norm_velocity(); + } + } +}; + +struct checkNormalise { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, SoAView soaView, ResultView resultView) const { + for (auto i : cms::alpakatools::uniform_elements(acc, soaView.metadata().size())) { + resultView[i].times() = SoAView::const_element::time(soaView[i].x(), soaView[i].v_x()); + soaView[i].normalise(); + } + } +}; + +TEST_CASE("SoACustomizedMethods Alpaka", "[SoACustomizedMethods][Alpaka]") { + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) + << " backend, skipping.\n"; + return; + } + + for (auto const& device : devices) { + std::cout << "Running on " << alpaka::getName(device) << std::endl; + Queue queue(device); + + constexpr unsigned int elems = 10; + + PortableHostCollection hostCollection(elems, cms::alpakatools::host()); + auto h_view = hostCollection.view(); + const auto h_Constview = hostCollection.const_view(); + + PortableHostCollection hostResultCollection(elems, cms::alpakatools::host()); + auto h_result_view = hostResultCollection.view(); + + // fill up + for (size_t i = 0; i < elems; i++) { + h_view[i].x() = static_cast(i); + h_view[i].y() = static_cast(i) * 2.0f; + h_view[i].z() = static_cast(i) * 3.0f; + h_view[i].v_x() = static_cast(i); + h_view[i].v_y() = static_cast(i) * 20; + h_view[i].v_z() = static_cast(i) * 30; + } + h_view.detectorType() = 42; + + PortableCollection deviceCollection(elems, queue); + auto d_view = deviceCollection.view(); + auto d_Constview = deviceCollection.const_view(); + alpaka::memcpy(queue, deviceCollection.buffer(), hostCollection.buffer()); + + PortableCollection deviceResultCollection(elems, queue); + auto d_result_view = deviceResultCollection.view(); + alpaka::wait(queue); + + // Work division + const std::size_t blockSize = 256; + const std::size_t numberOfBlocks = cms::alpakatools::divide_up_by(elems, blockSize); + const auto workDiv = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + SECTION("ConstView methods Alpaka") { + alpaka::exec(queue, workDiv, calculateNorm{}, d_Constview, d_result_view); + alpaka::wait(queue); + + alpaka::memcpy(queue, hostResultCollection.buffer(), deviceResultCollection.buffer()); + + // Check for the correctness of the square_norm() functions + for (size_t i = 0; i < elems; i++) { + const float position_norm = + sqrt(h_Constview[i].x() * h_Constview[i].x() + h_Constview[i].y() * h_Constview[i].y() + + h_Constview[i].z() * h_Constview[i].z()); + const double velocity_norm = + sqrt(h_Constview[i].v_x() * h_Constview[i].v_x() + h_Constview[i].v_y() * h_Constview[i].v_y() + + h_Constview[i].v_z() * h_Constview[i].v_z()); + REQUIRE(h_result_view[i].positionNorm() == position_norm); + REQUIRE(h_result_view[i].velocityNorm() == velocity_norm); + } + } + + SECTION("View methods Alpaka") { + std::array times; + + // Check for the correctness of the time() function + times[0] = 0.; + for (size_t i = 0; i < elems; i++) { + if (!(i == 0)) + times[i] = h_view[i].x() / h_view[i].v_x(); + } + + alpaka::exec(queue, workDiv, checkNormalise{}, d_view, d_result_view); + alpaka::wait(queue); + + alpaka::memcpy(queue, hostResultCollection.buffer(), deviceResultCollection.buffer()); + alpaka::memcpy(queue, hostCollection.buffer(), deviceCollection.buffer()); + + // Check for the correctness of the time() function + for (size_t i = 0; i < elems; i++) { + REQUIRE(h_result_view[i].times() == times[i]); + } + + REQUIRE(h_view[0].square_norm_position() == 0.f); + REQUIRE(h_view[0].square_norm_velocity() == 0.); + for (size_t i = 1; i < elems; i++) { + REQUIRE_THAT(h_view[i].square_norm_position(), Catch::Matchers::WithinAbs(1.f, 1.e-6)); + REQUIRE_THAT(h_view[i].square_norm_velocity(), Catch::Matchers::WithinAbs(1., 1.e-9)); + } + } + } +} diff --git a/DataFormats/SoATemplate/README.md b/DataFormats/SoATemplate/README.md index 9af697b0aa449..239c279b49043 100644 --- a/DataFormats/SoATemplate/README.md +++ b/DataFormats/SoATemplate/README.md @@ -73,7 +73,9 @@ and to build a generic `View` as described in [View](#view). ## Customized methods It is possible to generate methods inside the `element` and `const_element` nested structs using the `SOA_ELEMENT_METHODS` -and `SOA_CONST_ELEMENT_METHODS` macros. Each of these macros can be called only once, and can define multiple methods. +and `SOA_CONST_ELEMENT_METHODS` macros. Each of these macros can be called only once, and can define multiple methods. +Note that `SOA_ELEMENT_METHODS` and `SOA_CONST_ELEMENT_METHODS` should be prefixed with the macro SOA_HOST_DEVICE. +This ensures that the methods can also be executed in device kernels. [An example is showed below.](#examples) ## Blocks @@ -181,14 +183,14 @@ GENERATE_SOA_LAYOUT(SoATemplate, // methods operating on const_element SOA_CONST_ELEMENT_METHODS( - auto norm() const { + SOA_HOST_DEVICE auto norm() const { return sqrt(x()*x() + y()+y() + z()*z()); } ), // methods operating on element SOA_ELEMENT_METHODS( - void scale(float arg) { + SOA_HOST_DEVICE void scale(float arg) { x() *= arg; y() *= arg; z() *= arg; diff --git a/DataFormats/SoATemplate/test/BuildFile.xml b/DataFormats/SoATemplate/test/BuildFile.xml index e0d9143d4016c..08152a550149d 100644 --- a/DataFormats/SoATemplate/test/BuildFile.xml +++ b/DataFormats/SoATemplate/test/BuildFile.xml @@ -37,6 +37,24 @@ + + + + + + + + + + + + + + + + + + diff --git a/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cc b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cc index 5fadefb67258c..c73cd059384ce 100644 --- a/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cc +++ b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cc @@ -1,51 +1,7 @@ #define CATCH_CONFIG_MAIN #include -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -GENERATE_SOA_LAYOUT(SoATemplate, - SOA_COLUMN(float, x), - SOA_COLUMN(float, y), - SOA_COLUMN(float, z), - SOA_COLUMN(double, v_x), - SOA_COLUMN(double, v_y), - SOA_COLUMN(double, v_z), - - SOA_ELEMENT_METHODS( - - void normalise() { - float norm_position = square_norm_position(); - if (norm_position > 0.0f) { - x() /= norm_position; - y() /= norm_position; - z() /= norm_position; - }; - double norm_velocity = square_norm_velocity(); - if (norm_velocity > 0.0f) { - v_x() /= norm_velocity; - v_y() /= norm_velocity; - v_z() /= norm_velocity; - }; - }), - - SOA_CONST_ELEMENT_METHODS( - float square_norm_position() const { return sqrt(x() * x() + y() * y() + z() * z()); }; - - double square_norm_velocity() - const { return sqrt(v_x() * v_x() + v_y() * v_y() + v_z() * v_z()); }; - - template - static auto time(T1 pos, T2 vel) { - if (not(vel == 0)) - return pos / vel; - return 0.; - }), - - SOA_SCALAR(int, detectorType)) - -using SoA = SoATemplate<>; -using SoAView = SoA::View; -using SoAConstView = SoA::ConstView; +#include "SoADefinition_CustomizedMethods.h" TEST_CASE("SoACustomizedMethods") { // common number of elements for the SoAs diff --git a/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cu b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cu new file mode 100644 index 0000000000000..a76c087f1eeab --- /dev/null +++ b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.cu @@ -0,0 +1,125 @@ +#define CATCH_CONFIG_MAIN +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +#include "SoADefinition_CustomizedMethods.h" + +__global__ void calculateNorm(SoAConstView soaConstView, float* resultNorm, double* resultVelNorm) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= soaConstView.metadata().size()) + return; + + resultNorm[i] = soaConstView[i].square_norm_position(); + resultVelNorm[i] = soaConstView[i].square_norm_velocity(); +} + +__global__ void checkNormalise(SoAView soaView, double* checkTimesFunction) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= soaView.metadata().size()) + return; + + checkTimesFunction[i] = SoAView::const_element::time(soaView[i].x(), soaView[i].v_x()); + soaView[i].normalise(); +} + +TEST_CASE("SoACustomizedMethods CUDA", "[SoACustomizedMethods][cuda]") { + // common number of elements for the SoAs + const std::size_t elems = 10; + + // buffer size + const std::size_t bufferSize = SoA::computeDataSize(elems); + + std::byte* h_buf = nullptr; + cudaCheck(cudaMallocHost(&h_buf, bufferSize)); + SoA h_soahdLayout(h_buf, elems); + SoAView h_view(h_soahdLayout); + SoAConstView h_Constview(h_soahdLayout); + + // fill up + for (size_t i = 0; i < elems; i++) { + h_view[i].x() = static_cast(i); + h_view[i].y() = static_cast(i) * 2.0f; + h_view[i].z() = static_cast(i) * 3.0f; + h_view[i].v_x() = static_cast(i); + h_view[i].v_y() = static_cast(i) * 20; + h_view[i].v_z() = static_cast(i) * 30; + } + h_view.detectorType() = 42; + + std::byte* d_buf = nullptr; + cudaCheck(cudaMalloc(&d_buf, bufferSize)); + SoA d_soahdLayout(d_buf, elems); + SoAView d_view(d_soahdLayout); + SoAConstView d_Constview(d_soahdLayout); + + std::vector h_position_norms(elems); + std::vector h_velocity_norms(elems); + std::vector h_times(elems); + + float* d_position_norms; + double* d_velocity_norms; + double* d_times; + + cudaCheck(cudaMalloc(&d_position_norms, elems * sizeof(float))); + cudaCheck(cudaMalloc(&d_velocity_norms, elems * sizeof(double))); + cudaCheck(cudaMalloc(&d_times, elems * sizeof(double))); + + // Host → Device copy + cudaCheck(cudaMemcpy(d_buf, h_buf, bufferSize, cudaMemcpyHostToDevice)); + + SECTION("ConstView methods CUDA") { + calculateNorm<<<(elems + 255) / 256, 256>>>(d_Constview, d_position_norms, d_velocity_norms); + + cudaCheck(cudaMemcpy(h_position_norms.data(), d_position_norms, elems * sizeof(float), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(h_velocity_norms.data(), d_velocity_norms, elems * sizeof(double), cudaMemcpyDeviceToHost)); + + // Check for the correctness of the square_norm() functions + for (size_t i = 0; i < elems; i++) { + const float position_norm = + sqrt(h_Constview[i].x() * h_Constview[i].x() + h_Constview[i].y() * h_Constview[i].y() + + h_Constview[i].z() * h_Constview[i].z()); + const double velocity_norm = + sqrt(h_Constview[i].v_x() * h_Constview[i].v_x() + h_Constview[i].v_y() * h_Constview[i].v_y() + + h_Constview[i].v_z() * h_Constview[i].v_z()); + REQUIRE(h_position_norms[i] == position_norm); + REQUIRE(h_velocity_norms[i] == velocity_norm); + } + } + + SECTION("View methods CUDA") { + std::array times; + + // Check for the correctness of the time() function + times[0] = 0.; + for (size_t i = 0; i < elems; i++) { + if (!(i == 0)) + times[i] = h_view[i].x() / h_view[i].v_x(); + } + + checkNormalise<<<(elems + 255) / 256, 256>>>(d_view, d_times); + + cudaCheck(cudaMemcpy(h_times.data(), d_times, elems * sizeof(double), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy(h_buf, d_buf, bufferSize, cudaMemcpyDeviceToHost)); + + // Check for the correctness of the time() function + for (size_t i = 0; i < elems; i++) { + REQUIRE(h_times[i] == times[i]); + } + + REQUIRE(h_view[0].square_norm_position() == 0.f); + REQUIRE(h_view[0].square_norm_velocity() == 0.); + for (size_t i = 1; i < elems; i++) { + REQUIRE_THAT(h_view[i].square_norm_position(), Catch::Matchers::WithinAbs(1.f, 1.e-6)); + REQUIRE_THAT(h_view[i].square_norm_velocity(), Catch::Matchers::WithinAbs(1., 1.e-9)); + } + } + + // ===== cleanup ===== + cudaCheck(cudaFree(d_position_norms)); + cudaCheck(cudaFree(d_velocity_norms)); + cudaCheck(cudaFree(d_times)); + cudaCheck(cudaFree(d_buf)); + cudaCheck(cudaFreeHost(h_buf)); +} diff --git a/DataFormats/SoATemplate/test/SoACustomizedMethods_t.hip.cc b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.hip.cc new file mode 100644 index 0000000000000..ced0600f07fa6 --- /dev/null +++ b/DataFormats/SoATemplate/test/SoACustomizedMethods_t.hip.cc @@ -0,0 +1,127 @@ +#define CATCH_CONFIG_MAIN +#include + +#include + +#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" +#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h" + +#include "SoADefinition_CustomizedMethods.h" + +__global__ void calculateNorm(SoAConstView soaConstView, float* resultNorm, double* resultVelNorm) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= soaConstView.metadata().size()) + return; + + resultNorm[i] = soaConstView[i].square_norm_position(); + resultVelNorm[i] = soaConstView[i].square_norm_velocity(); +} + +__global__ void checkNormalise(SoAView soaView, double* checkTimesFunction) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= soaView.metadata().size()) + return; + + checkTimesFunction[i] = SoAView::const_element::time(soaView[i].x(), soaView[i].v_x()); + soaView[i].normalise(); +} + +TEST_CASE("SoACustomizedMethods hip", "[SoACustomizedMethods][hip]") { + // common number of elements for the SoAs + const std::size_t elems = 10; + + // buffer size + const std::size_t bufferSize = SoA::computeDataSize(elems); + + std::byte* h_buf = nullptr; + hipCheck(hipHostMalloc(&h_buf, bufferSize)); + SoA h_soahdLayout(h_buf, elems); + SoAView h_view(h_soahdLayout); + SoAConstView h_Constview(h_soahdLayout); + + // fill up + for (size_t i = 0; i < elems; i++) { + h_view[i].x() = static_cast(i); + h_view[i].y() = static_cast(i) * 2.0f; + h_view[i].z() = static_cast(i) * 3.0f; + h_view[i].v_x() = static_cast(i); + h_view[i].v_y() = static_cast(i) * 20; + h_view[i].v_z() = static_cast(i) * 30; + } + h_view.detectorType() = 42; + + std::byte* d_buf = nullptr; + hipCheck(hipMalloc(&d_buf, bufferSize)); + SoA d_soahdLayout(d_buf, elems); + SoAView d_view(d_soahdLayout); + SoAConstView d_Constview(d_soahdLayout); + + std::vector h_position_norms(elems); + std::vector h_velocity_norms(elems); + std::vector h_times(elems); + + float* d_position_norms; + double* d_velocity_norms; + double* d_times; + + hipCheck(hipMalloc(&d_position_norms, elems * sizeof(float))); + hipCheck(hipMalloc(&d_velocity_norms, elems * sizeof(double))); + hipCheck(hipMalloc(&d_times, elems * sizeof(double))); + + // Host → Device copy + hipCheck(hipMemcpy(d_buf, h_buf, bufferSize, hipMemcpyHostToDevice)); + + SECTION("ConstView methods HIP") { + calculateNorm<<<(elems + 255) / 256, 256>>>(d_Constview, d_position_norms, d_velocity_norms); + + hipCheck(hipMemcpy(h_position_norms.data(), d_position_norms, elems * sizeof(float), hipMemcpyDeviceToHost)); + hipCheck(hipMemcpy(h_velocity_norms.data(), d_velocity_norms, elems * sizeof(double), hipMemcpyDeviceToHost)); + + // Check for the correctness of the square_norm() functions + for (size_t i = 0; i < elems; i++) { + const float position_norm = + sqrt(h_Constview[i].x() * h_Constview[i].x() + h_Constview[i].y() * h_Constview[i].y() + + h_Constview[i].z() * h_Constview[i].z()); + const double velocity_norm = + sqrt(h_Constview[i].v_x() * h_Constview[i].v_x() + h_Constview[i].v_y() * h_Constview[i].v_y() + + h_Constview[i].v_z() * h_Constview[i].v_z()); + REQUIRE(h_position_norms[i] == position_norm); + REQUIRE(h_velocity_norms[i] == velocity_norm); + } + } + + SECTION("View methods HIP") { + std::array times; + + // Check for the correctness of the time() function + times[0] = 0.; + for (size_t i = 0; i < elems; i++) { + if (!(i == 0)) + times[i] = h_view[i].x() / h_view[i].v_x(); + } + + checkNormalise<<<(elems + 255) / 256, 256>>>(d_view, d_times); + + hipCheck(hipMemcpy(h_times.data(), d_times, elems * sizeof(double), hipMemcpyDeviceToHost)); + hipCheck(hipMemcpy(h_buf, d_buf, bufferSize, hipMemcpyDeviceToHost)); + + // Check for the correctness of the time() function + for (size_t i = 0; i < elems; i++) { + REQUIRE(h_times[i] == times[i]); + } + + REQUIRE(h_view[0].square_norm_position() == 0.f); + REQUIRE(h_view[0].square_norm_velocity() == 0.); + for (size_t i = 1; i < elems; i++) { + REQUIRE_THAT(h_view[i].square_norm_position(), Catch::Matchers::WithinAbs(1.f, 1.e-6)); + REQUIRE_THAT(h_view[i].square_norm_velocity(), Catch::Matchers::WithinAbs(1., 1.e-9)); + } + } + + // ===== cleanup ===== + hipCheck(hipFree(d_position_norms)); + hipCheck(hipFree(d_velocity_norms)); + hipCheck(hipFree(d_times)); + hipCheck(hipFree(d_buf)); + hipCheck(hipFreeHost(h_buf)); +} diff --git a/DataFormats/SoATemplate/test/SoADefinition_CustomizedMethods.h b/DataFormats/SoATemplate/test/SoADefinition_CustomizedMethods.h new file mode 100644 index 0000000000000..128c26e04c73c --- /dev/null +++ b/DataFormats/SoATemplate/test/SoADefinition_CustomizedMethods.h @@ -0,0 +1,48 @@ +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SoATemplate, + SOA_COLUMN(float, x), + SOA_COLUMN(float, y), + SOA_COLUMN(float, z), + SOA_COLUMN(double, v_x), + SOA_COLUMN(double, v_y), + SOA_COLUMN(double, v_z), + + SOA_ELEMENT_METHODS( + + SOA_HOST_DEVICE void normalise() { + float norm_position = square_norm_position(); + if (norm_position > 0.0f) { + x() /= norm_position; + y() /= norm_position; + z() /= norm_position; + }; + double norm_velocity = square_norm_velocity(); + if (norm_velocity > 0.0f) { + v_x() /= norm_velocity; + v_y() /= norm_velocity; + v_z() /= norm_velocity; + }; + }), + + SOA_CONST_ELEMENT_METHODS( + SOA_HOST_DEVICE float square_norm_position() + const { return sqrt(x() * x() + y() * y() + z() * z()); }; + + SOA_HOST_DEVICE double square_norm_velocity() + const { return sqrt(v_x() * v_x() + v_y() * v_y() + v_z() * v_z()); }; + + template + SOA_HOST_DEVICE static auto time(T1 pos, T2 vel) { + if (vel != 0) + return pos / vel; + return 0.; + }), + + SOA_SCALAR(int, detectorType)) + +using SoA = SoATemplate<>; +using SoAView = SoA::View; +using SoAConstView = SoA::ConstView;