From 7f9fdd4bcbbd220f78991b6401fbf16e02cc3ba7 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Fri, 2 May 2025 14:57:02 +0200 Subject: [PATCH 01/11] Move test standard to C++20 --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 404e240..4ce1fd2 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,7 +12,7 @@ HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Werror -Wfatal- # Compiler flags supported by GCC but not by the LLVM-based compilers (clang, hipcc, icpx, etc.) LLVM_UNSUPPORTED_CXXFLAGS := --param vect-max-version-for-alias-checks=50 -Werror=format-contains-nul -Wno-non-template-friend -Werror=return-local-addr -Werror=unused-but-set-variable -CXXFLAGS := -std=c++17 $(HOST_CXXFLAGS) -g +CXXFLAGS := -std=c++20 $(HOST_CXXFLAGS) -g LDFLAGS := -O2 -fPIC -pthread -Wl,-E -lstdc++fs -ldl # CUDA From 1d964bd8b22a516b9778523aa378d3146d7476e1 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Fri, 2 May 2025 14:57:39 +0200 Subject: [PATCH 02/11] Add sorting algorithm header Add test for C++ sort --- include/algorithm.h | 4 ++++ include/algorithm/sort.h | 30 ++++++++++++++++++++++++++++++ test/sort/sort_t.cc | 21 +++++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 include/algorithm.h create mode 100644 include/algorithm/sort.h create mode 100644 test/sort/sort_t.cc diff --git a/include/algorithm.h b/include/algorithm.h new file mode 100644 index 0000000..52e7191 --- /dev/null +++ b/include/algorithm.h @@ -0,0 +1,4 @@ + +#pragma once + +#include "algorithm/sort.h" diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h new file mode 100644 index 0000000..5940adc --- /dev/null +++ b/include/algorithm/sort.h @@ -0,0 +1,30 @@ + +#pragma once + +#if defined(XTD_TARGET_CUDA) +#include +#elif defined(XTD_TARGET_HIP) +#include +#elif defined(XTD_TARGET_SYCL) +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_DEVICE_FUNCTION inline constexpr void sort(RandomAccessIterator first, + RandomAccessIterator last) { +#if defined(XTD_TARGET_CUDA) + thrust::sort(first, last); +#elif defined(XTD_TARGET_HIP) + rocthrust::sort(first, last); +#elif defined(XTD_TARGET_SYCL) + oneapi::dpl::sort(sycl::execution::dpcpp_default, first, last); +#else + std::sort(first, last); +#endif + } + +} // namespace xtd diff --git a/test/sort/sort_t.cc b/test/sort/sort_t.cc new file mode 100644 index 0000000..e0df072 --- /dev/null +++ b/test/sort/sort_t.cc @@ -0,0 +1,21 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("sortCPU", "[sort]") { + const int N = 100; + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::random_shuffle(values.begin(), values.end()); + + xtd::sort(values.begin(), values.end()); + + REQUIRE_THAT(std::equal(values, std::views::iota(N, 0))); +} From c4c1ede174c192464ab1598bf20d785f8cb3b845 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Wed, 7 May 2025 15:47:14 +0200 Subject: [PATCH 03/11] Add `min/max/minmax_element` algorithms Add tests for sort, max_element and min_element --- include/algorithm.h | 1 + include/algorithm/extrema.h | 195 +++++++++++++++++++++++++ include/algorithm/sort.h | 53 ++++++- test/max_element/max_element_t.cc | 42 ++++++ test/max_element/max_element_t.cu | 46 ++++++ test/max_element/max_element_t.hip.cc | 46 ++++++ test/max_element/max_element_t.sycl.cc | 42 ++++++ test/min_element/min_element_t.cc | 42 ++++++ test/min_element/min_element_t.cu | 46 ++++++ test/min_element/min_element_t.hip.cc | 46 ++++++ test/min_element/min_element_t.sycl.cc | 42 ++++++ test/sort/sort_t.cc | 25 +++- test/sort/sort_t.cu | 49 +++++++ test/sort/sort_t.hip.cc | 49 +++++++ test/sort/sort_t.sycl.cc | 41 ++++++ 15 files changed, 760 insertions(+), 5 deletions(-) create mode 100644 include/algorithm/extrema.h create mode 100644 test/max_element/max_element_t.cc create mode 100644 test/max_element/max_element_t.cu create mode 100644 test/max_element/max_element_t.hip.cc create mode 100644 test/max_element/max_element_t.sycl.cc create mode 100644 test/min_element/min_element_t.cc create mode 100644 test/min_element/min_element_t.cu create mode 100644 test/min_element/min_element_t.hip.cc create mode 100644 test/min_element/min_element_t.sycl.cc create mode 100644 test/sort/sort_t.cu create mode 100644 test/sort/sort_t.hip.cc create mode 100644 test/sort/sort_t.sycl.cc diff --git a/include/algorithm.h b/include/algorithm.h index 52e7191..595ff5c 100644 --- a/include/algorithm.h +++ b/include/algorithm.h @@ -1,4 +1,5 @@ #pragma once +#include "algorithm/extrema.h" #include "algorithm/sort.h" diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h new file mode 100644 index 0000000..4fdaa45 --- /dev/null +++ b/include/algorithm/extrema.h @@ -0,0 +1,195 @@ + +#pragma once + +#include "internal/defines.h" + +#if defined(XTD_TARGET_CUDA) +#include +#elif defined(XTD_TARGET_HIP) +#include +#elif defined(XTD_TARGET_SYCL) +#include +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::min_element(first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::min_element(first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::min_element(first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::min_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::min_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::min_element(std::forward(policy), first, last); +#else + return std::min_element(std::forward(policy), first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::min_element(first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::min_element(first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::min_element(first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::min_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::min_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::min_element(std::forward(policy), first, last, comp); +#else + return std::min_element(std::forward(policy), first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::max_element(first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::max_element(first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::max_element(first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::max_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::max_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::max_element(std::forward(policy), first, last); +#else + return std::max_element(std::forward(policy), first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::max_element(first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::max_element(first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::max_element(first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::max_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::max_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::max_element(std::forward(policy), first, last, comp); +#else + return std::max_element(std::forward(policy), first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + ForwardIterator first, ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::minmax_element(first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::minmax_element(first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::minmax_element(first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { +#if defined(XTD_TARGET_CUDA) + return thrust::minmax_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_HIP) + return rocthrust::minmax_element(std::forward(policy), first, last); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::minmax_element(std::forward(policy), first, last); +#else + return std::minmax_element(std::forward(policy), first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::minmax_element(first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::minmax_element(first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::minmax_element(first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { +#if defined(XTD_TARGET_CUDA) + return thrust::minmax_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_HIP) + return rocthrust::minmax_element(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_SYCL) + return oneapi::dpl::minmax_element(std::forward(policy), first, last, comp); +#else + return std::minmax_element(std::forward(policy), first, last, comp); +#endif + } + +} // namespace xtd diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h index 5940adc..5404e23 100644 --- a/include/algorithm/sort.h +++ b/include/algorithm/sort.h @@ -1,13 +1,16 @@ #pragma once +#include "internal/defines.h" + #if defined(XTD_TARGET_CUDA) #include #elif defined(XTD_TARGET_HIP) #include #elif defined(XTD_TARGET_SYCL) #include -#else +#include +#else #include #endif @@ -21,10 +24,56 @@ namespace xtd { #elif defined(XTD_TARGET_HIP) rocthrust::sort(first, last); #elif defined(XTD_TARGET_SYCL) - oneapi::dpl::sort(sycl::execution::dpcpp_default, first, last); + oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last); #else std::sort(first, last); #endif } + template + XTD_DEVICE_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + RandomAccessIterator first, + RandomAccessIterator last) { +#if defined(XTD_TARGET_CUDA) + thrust::sort(std::forward(policy), first, last); +#elif defined(XTD_TARGET_HIP) + rocthrust::sort(std::forward(policy), first, last); +#elif defined(XTD_TARGET_SYCL) + oneapi::dpl::sort(std::forward(policy), first, last); +#else + std::sort(std::forward(policy), first, last); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr void sort(RandomAccessIterator first, + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_TARGET_CUDA) + thrust::sort(first, last, comp); +#elif defined(XTD_TARGET_HIP) + rocthrust::sort(first, last, comp); +#elif defined(XTD_TARGET_SYCL) + oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + std::sort(first, last, comp); +#endif + } + + template + XTD_DEVICE_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + RandomAccessIterator first, + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_TARGET_CUDA) + thrust::sort(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_HIP) + rocthrust::sort(std::forward(policy), first, last, comp); +#elif defined(XTD_TARGET_SYCL) + oneapi::dpl::sort(std::forward(policy), first, last, comp); +#else + std::sort(std::forward(policy), first, last, comp); +#endif + } + } // namespace xtd diff --git a/test/max_element/max_element_t.cc b/test/max_element/max_element_t.cc new file mode 100644 index 0000000..42a70ba --- /dev/null +++ b/test/max_element/max_element_t.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("max_elementCPU", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + auto max = xtd::max_element(values.begin(), values.end()); + REQUIRE(*max == N - 1); + } + + SECTION("Greater comparison") { + auto max = xtd::max_element(values.begin(), values.end(), std::greater()); + REQUIRE(*max == 0); + } + + SECTION("Unseq execution policy") { + auto max = xtd::max_element(std::execution::unseq, values.begin(), values.end()); + REQUIRE(*max == N - 1); + } + + SECTION("Unseq execution policy with greater comparison") { + auto max = + xtd::max_element(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(*max == 0); + } +} diff --git a/test/max_element/max_element_t.cu b/test/max_element/max_element_t.cu new file mode 100644 index 0000000..a0ea1fa --- /dev/null +++ b/test/max_element/max_element_t.cu @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include + +TEST_CASE("max_elementCUDA", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + int* d_values; + CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); + CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + + SECTION("Default comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N); + int max; + thrust::copy(d_values, d_values + 1, &max); + REQUIRE(max == N - 1); + } + + SECTION("Greater comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); + int max; + thrust::copy(d_values, d_values + 1, &max); + REQUIRE(max == 0); + } +} diff --git a/test/max_element/max_element_t.hip.cc b/test/max_element/max_element_t.hip.cc new file mode 100644 index 0000000..872cc7f --- /dev/null +++ b/test/max_element/max_element_t.hip.cc @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("max_elementHIP", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + int* d_values; + HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); + HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + + SECTION("Default comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N); + int max; + thrust::copy(d_values, d_values + 1, &max); + REQUIRE(max == N - 1); + } + + SECTION("Greater comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); + int max; + thrust::copy(d_values, d_values + 1, &max); + REQUIRE(max == 0); + } +} diff --git a/test/max_element/max_element_t.sycl.cc b/test/max_element/max_element_t.sycl.cc new file mode 100644 index 0000000..b0ddde1 --- /dev/null +++ b/test/max_element/max_element_t.sycl.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default comparison") { + auto max = xtd::max_element(d_values, d_values + N); + REQUIRE(*max == N - 1); + } + + sycl::free(d_values, queue); +} diff --git a/test/min_element/min_element_t.cc b/test/min_element/min_element_t.cc new file mode 100644 index 0000000..36edadc --- /dev/null +++ b/test/min_element/min_element_t.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("min_elementCPU", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + auto min = xtd::min_element(values.begin(), values.end()); + REQUIRE(*min == 0); + } + + SECTION("Greater comparison") { + auto min = xtd::min_element(values.begin(), values.end(), std::greater()); + REQUIRE(*min == N - 1); + } + + SECTION("Unseq execution policy") { + auto min = xtd::min_element(std::execution::unseq, values.begin(), values.end()); + REQUIRE(*min == 0); + } + + SECTION("Unseq execution policy with greater comparison") { + auto min = + xtd::min_element(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(*min == N - 1); + } +} diff --git a/test/min_element/min_element_t.cu b/test/min_element/min_element_t.cu new file mode 100644 index 0000000..ba08da0 --- /dev/null +++ b/test/min_element/min_element_t.cu @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include + +TEST_CASE("min_elementCUDA", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + int* d_values; + CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); + CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + + SECTION("Default comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N); + int min; + thrust::copy(d_values, d_values + 1, &min); + REQUIRE(min == 0); + } + + SECTION("Greater comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); + int min; + thrust::copy(d_values, d_values + 1, &min); + REQUIRE(min == N - 1); + } +} diff --git a/test/min_element/min_element_t.hip.cc b/test/min_element/min_element_t.hip.cc new file mode 100644 index 0000000..431ba4c --- /dev/null +++ b/test/min_element/min_element_t.hip.cc @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("min_elementHIP", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + int* d_values; + HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); + HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + + SECTION("Default comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N); + int min; + thrust::copy(d_values, d_values + 1, &min); + REQUIRE(min == 0); + } + + SECTION("Greater comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); + int min; + thrust::copy(d_values, d_values + 1, &min); + REQUIRE(min == N - 1); + } +} diff --git a/test/min_element/min_element_t.sycl.cc b/test/min_element/min_element_t.sycl.cc new file mode 100644 index 0000000..f5cc787 --- /dev/null +++ b/test/min_element/min_element_t.sycl.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default comparison") { + auto min = xtd::min_element(d_values, d_values + N); + REQUIRE(*min == 0); + } + + sycl::free(d_values, queue); +} diff --git a/test/sort/sort_t.cc b/test/sort/sort_t.cc index e0df072..0cefb11 100644 --- a/test/sort/sort_t.cc +++ b/test/sort/sort_t.cc @@ -11,11 +11,30 @@ TEST_CASE("sortCPU", "[sort]") { const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + std::vector values(N); std::iota(values.begin(), values.end(), 0); - std::random_shuffle(values.begin(), values.end()); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + xtd::sort(values.begin(), values.end()); + REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + } + + SECTION("Greater comparison") { + xtd::sort(values.begin(), values.end(), std::greater()); + REQUIRE(std::ranges::equal(values, std::views::iota(N - 1, -1))); + } - xtd::sort(values.begin(), values.end()); + SECTION("Unseq execution policy") { + xtd::sort(std::execution::unseq, values.begin(), values.end()); + REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + } - REQUIRE_THAT(std::equal(values, std::views::iota(N, 0))); + SECTION("Unseq execution policy with greater comparison") { + xtd::sort(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(std::ranges::equal(values, std::views::iota(N - 1, -1))); + } } diff --git a/test/sort/sort_t.cu b/test/sort/sort_t.cu new file mode 100644 index 0000000..414fd8d --- /dev/null +++ b/test/sort/sort_t.cu @@ -0,0 +1,49 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include + +TEST_CASE("sortCUDA", "[sort]") { + int deviceCount; + cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount); + + if (cudaStatus != cudaSuccess || deviceCount == 0) { + std::cout << "No NVIDIA GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + CUDA_CHECK(cudaSetDevice(0)); + + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + xtd::sort(values.begin(), values.end()); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + int* d_values; + CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); + CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + xtd::sort(d_values, d_values + N); + CUDA_CHECK(cudaGetLastError()); + CUDA_CHECK(cudaMemcpyAsync(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost, stream)); + CUDA_CHECK(cudaStreamSynchronize(stream)); + + REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + + CUDA_CHECK(cudaFreeAsync(d_values, stream)); + CUDA_CHECK(cudaStreamDestroy(stream)); +} diff --git a/test/sort/sort_t.hip.cc b/test/sort/sort_t.hip.cc new file mode 100644 index 0000000..0588936 --- /dev/null +++ b/test/sort/sort_t.hip.cc @@ -0,0 +1,49 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include + +TEST_CASE("sortHIP", "[sort]") { + int deviceCount; + hipError_t hipStatus = hipGetDeviceCount(&deviceCount); + + if (hipStatus != hipSuccess || deviceCount == 0) { + std::cout << "No AMD GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + HIP_CHECK(hipSetDevice(0)); + + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + xtd::sort(values.begin(), values.end()); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + int* d_values; + HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); + HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + xtd::sort(d_values, d_values + N); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpyAsync(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + + HIP_CHECK(hipFreeAsync(d_values, stream)); + HIP_CHECK(hipStreamDestroy(stream)); +} diff --git a/test/sort/sort_t.sycl.cc b/test/sort/sort_t.sycl.cc new file mode 100644 index 0000000..567cc09 --- /dev/null +++ b/test/sort/sort_t.sycl.cc @@ -0,0 +1,41 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_result, values.data(), N * sizeof(int)).wait(); + xtd::sort(d_values, d_values + N); + queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); + + REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + + sycl::free(d_values, queue); +} From 5987db62030281d5326818273f9c0dad30e8b025 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Fri, 9 May 2025 00:14:30 +0200 Subject: [PATCH 04/11] Define `XTD_HOST_FUNCTION` macro --- include/algorithm/extrema.h | 56 +++++++++++++++---------------- include/internal/defines.h | 2 ++ test/max_element/max_element_t.cu | 5 +-- test/min_element/min_element_t.cu | 5 +-- test/sort/sort_t.cc | 1 + 5 files changed, 37 insertions(+), 32 deletions(-) diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h index 4fdaa45..6118e12 100644 --- a/include/algorithm/extrema.h +++ b/include/algorithm/extrema.h @@ -17,8 +17,8 @@ namespace xtd { template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, - ForwardIterator last) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::min_element(first, last); #elif defined(XTD_TARGET_HIP) @@ -31,9 +31,9 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, - ForwardIterator first, - ForwardIterator last) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::min_element(std::forward(policy), first, last); #elif defined(XTD_TARGET_HIP) @@ -46,9 +46,9 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::min_element(first, last, comp); #elif defined(XTD_TARGET_HIP) @@ -61,10 +61,10 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::min_element(std::forward(policy), first, last, comp); #elif defined(XTD_TARGET_HIP) @@ -77,8 +77,8 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, - ForwardIterator last) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::max_element(first, last); #elif defined(XTD_TARGET_HIP) @@ -91,9 +91,9 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, - ForwardIterator first, - ForwardIterator last) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::max_element(std::forward(policy), first, last); #elif defined(XTD_TARGET_HIP) @@ -106,9 +106,9 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::max_element(first, last, comp); #elif defined(XTD_TARGET_HIP) @@ -121,10 +121,10 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) { + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::max_element(std::forward(policy), first, last, comp); #elif defined(XTD_TARGET_HIP) @@ -137,7 +137,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::minmax_element(first, last); @@ -151,7 +151,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { #if defined(XTD_TARGET_CUDA) return thrust::minmax_element(std::forward(policy), first, last); @@ -165,7 +165,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::minmax_element(first, last, comp); @@ -179,7 +179,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr std::pair minmax_element( + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { #if defined(XTD_TARGET_CUDA) return thrust::minmax_element(std::forward(policy), first, last, comp); diff --git a/include/internal/defines.h b/include/internal/defines.h index 8be3730..6955e2e 100644 --- a/include/internal/defines.h +++ b/include/internal/defines.h @@ -10,9 +10,11 @@ #if defined(__CUDACC__) || defined(__HIPCC__) // CUDA or HIP/ROCm compiler #define XTD_DEVICE_FUNCTION __host__ __device__ +#define XTD_HOST_FUNCTION __host__ #else // SYCL or standard C++ code #define XTD_DEVICE_FUNCTION +#define XTD_HOST_FUNCTION #endif // XTD_TARGET_... diff --git a/test/max_element/max_element_t.cu b/test/max_element/max_element_t.cu index a0ea1fa..a643787 100644 --- a/test/max_element/max_element_t.cu +++ b/test/max_element/max_element_t.cu @@ -13,6 +13,7 @@ #include "common/cuda_check.h" #include #include +#include TEST_CASE("max_elementCUDA", "[max_element]") { const int N = 100; @@ -33,14 +34,14 @@ TEST_CASE("max_elementCUDA", "[max_element]") { SECTION("Default comparison") { auto max_iter = xtd::max_element(d_values, d_values + N); int max; - thrust::copy(d_values, d_values + 1, &max); + thrust::copy(thrust::device, d_values, d_values + 1, &max); REQUIRE(max == N - 1); } SECTION("Greater comparison") { auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); int max; - thrust::copy(d_values, d_values + 1, &max); + thrust::copy(thrust::device, d_values, d_values + 1, &max); REQUIRE(max == 0); } } diff --git a/test/min_element/min_element_t.cu b/test/min_element/min_element_t.cu index ba08da0..eab866e 100644 --- a/test/min_element/min_element_t.cu +++ b/test/min_element/min_element_t.cu @@ -13,6 +13,7 @@ #include "common/cuda_check.h" #include #include +#include TEST_CASE("min_elementCUDA", "[min_element]") { const int N = 100; @@ -33,14 +34,14 @@ TEST_CASE("min_elementCUDA", "[min_element]") { SECTION("Default comparison") { auto min_iter = xtd::min_element(d_values, d_values + N); int min; - thrust::copy(d_values, d_values + 1, &min); + thrust::copy(thrust::device, d_values, d_values + 1, &min); REQUIRE(min == 0); } SECTION("Greater comparison") { auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); int min; - thrust::copy(d_values, d_values + 1, &min); + thrust::copy(thrust::device, d_values, d_values + 1, &min); REQUIRE(min == N - 1); } } diff --git a/test/sort/sort_t.cc b/test/sort/sort_t.cc index 0cefb11..b283e7a 100644 --- a/test/sort/sort_t.cc +++ b/test/sort/sort_t.cc @@ -1,5 +1,6 @@ #include +#include #include #include #include From df2a96627e8ec1d63fbbc63f5734a69abedbc6db Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Fri, 9 May 2025 00:27:17 +0200 Subject: [PATCH 05/11] Update makefile --- test/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/Makefile b/test/Makefile index 4ce1fd2..6b67cd5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,7 +7,7 @@ CXX := g++ GCC_TOOLCHAIN := $(abspath $(dir $(shell which $(CXX)))/..) GCC_TARGET := $(shell $(CXX) -dumpmachine) # Catch2 needs -Wno-unused-variable -HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Werror -Wfatal-errors -Wno-unused-variable +HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Wfatal-errors -Wno-unused-variable # Compiler flags supported by GCC but not by the LLVM-based compilers (clang, hipcc, icpx, etc.) LLVM_UNSUPPORTED_CXXFLAGS := --param vect-max-version-for-alias-checks=50 -Werror=format-contains-nul -Wno-non-template-friend -Werror=return-local-addr -Werror=unused-but-set-variable @@ -31,11 +31,11 @@ else CUDA_NVCC := $(CUDA_BASE)/bin/nvcc define CUFLAGS_template $(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=[sm_$$(ARCH),compute_$$(ARCH)]) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --display-error-number --threads $$(words $(1)) --cudart=shared - $(2)NVCC_COMMON := -std=c++17 -O3 -g $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + $(2)NVCC_COMMON := -std=c++20 -O3 -g $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' $(2)CUDA_CUFLAGS := $$($(2)NVCC_COMMON) endef $(eval $(call CUFLAGS_template,$(CUDA_ARCH),)) - NVCC_COMMON := -std=c++17 -O3 -g $(NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + NVCC_COMMON := -std=c++20 -O3 -g $(NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' CUDA_CUFLAGS := $(NVCC_COMMON) endif @@ -51,7 +51,7 @@ else ROCM_DEPS := $(ROCM_LIBDIR)/libamdhip64.so ROCM_ARCH := gfx900 gfx90a gfx1030 ROCM_HIPCC := $(ROCM_BASE)/bin/hipcc - HIPCC_CXXFLAGS := -fno-gpu-rdc $(foreach ARCH,$(ROCM_ARCH),--offload-arch=$(ARCH)) $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) -I$(ROCM_BASE)/include/hip -Wno-unused-result + HIPCC_CXXFLAGS := -fno-gpu-rdc $(foreach ARCH,$(ROCM_ARCH),--offload-arch=$(ARCH)) $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) -I$(ROCM_BASE)/include/hip -I$(ROCM_BASE)/include/thrust -Wno-unused-result HIPCC_LDFLAGS := $(LDFLAGS) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) endif @@ -67,7 +67,7 @@ else SYCL_CXX := $(SYCL_BASE)/bin/icpx SYCL_CPU_TARGET := -fsycl-targets=spir64_x86_64 SYCL_FLAGS := -fsycl -fp-model=precise - SYCL_CXXFLAGS := $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) $(SYCL_FLAGS) -Wno-unused-variable + SYCL_CXXFLAGS := $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) $(SYCL_FLAGS) -Wno-unused-variable -Wunused-parameter SYCL_LDFLAGS := # Check for Intel GPU existence From 15d3cd961bf1f7111aa61e4a187eb3be975e5acf Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Fri, 9 May 2025 00:28:19 +0200 Subject: [PATCH 06/11] Specify sort as host functions --- include/algorithm/sort.h | 8 ++++---- test/max_element/max_element_t.cu | 4 ++-- test/max_element/max_element_t.hip.cc | 4 ++-- test/min_element/min_element_t.cu | 4 ++-- test/min_element/min_element_t.hip.cc | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h index 5404e23..e6539c6 100644 --- a/include/algorithm/sort.h +++ b/include/algorithm/sort.h @@ -17,7 +17,7 @@ namespace xtd { template - XTD_DEVICE_FUNCTION inline constexpr void sort(RandomAccessIterator first, + XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, RandomAccessIterator last) { #if defined(XTD_TARGET_CUDA) thrust::sort(first, last); @@ -31,7 +31,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, RandomAccessIterator first, RandomAccessIterator last) { #if defined(XTD_TARGET_CUDA) @@ -46,7 +46,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr void sort(RandomAccessIterator first, + XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, RandomAccessIterator last, Compare comp) { #if defined(XTD_TARGET_CUDA) @@ -61,7 +61,7 @@ namespace xtd { } template - XTD_DEVICE_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, RandomAccessIterator first, RandomAccessIterator last, Compare comp) { diff --git a/test/max_element/max_element_t.cu b/test/max_element/max_element_t.cu index a643787..b519348 100644 --- a/test/max_element/max_element_t.cu +++ b/test/max_element/max_element_t.cu @@ -34,14 +34,14 @@ TEST_CASE("max_elementCUDA", "[max_element]") { SECTION("Default comparison") { auto max_iter = xtd::max_element(d_values, d_values + N); int max; - thrust::copy(thrust::device, d_values, d_values + 1, &max); + thrust::copy(thrust::device, max_iter, max_iter + 1, &max); REQUIRE(max == N - 1); } SECTION("Greater comparison") { auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); int max; - thrust::copy(thrust::device, d_values, d_values + 1, &max); + thrust::copy(thrust::device, max_iter , max_iter + 1, &max); REQUIRE(max == 0); } } diff --git a/test/max_element/max_element_t.hip.cc b/test/max_element/max_element_t.hip.cc index 872cc7f..987bade 100644 --- a/test/max_element/max_element_t.hip.cc +++ b/test/max_element/max_element_t.hip.cc @@ -33,14 +33,14 @@ TEST_CASE("max_elementHIP", "[max_element]") { SECTION("Default comparison") { auto max_iter = xtd::max_element(d_values, d_values + N); int max; - thrust::copy(d_values, d_values + 1, &max); + thrust::copy(max_iter, max_iter + 1, &max); REQUIRE(max == N - 1); } SECTION("Greater comparison") { auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); int max; - thrust::copy(d_values, d_values + 1, &max); + thrust::copy(max_iter, max_iter + 1, &max); REQUIRE(max == 0); } } diff --git a/test/min_element/min_element_t.cu b/test/min_element/min_element_t.cu index eab866e..f132828 100644 --- a/test/min_element/min_element_t.cu +++ b/test/min_element/min_element_t.cu @@ -34,14 +34,14 @@ TEST_CASE("min_elementCUDA", "[min_element]") { SECTION("Default comparison") { auto min_iter = xtd::min_element(d_values, d_values + N); int min; - thrust::copy(thrust::device, d_values, d_values + 1, &min); + thrust::copy(thrust::device, min_iter , min_iter + 1, &min); REQUIRE(min == 0); } SECTION("Greater comparison") { auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); int min; - thrust::copy(thrust::device, d_values, d_values + 1, &min); + thrust::copy(thrust::device, min_iter, min_iter + 1, &min); REQUIRE(min == N - 1); } } diff --git a/test/min_element/min_element_t.hip.cc b/test/min_element/min_element_t.hip.cc index 431ba4c..1b646cf 100644 --- a/test/min_element/min_element_t.hip.cc +++ b/test/min_element/min_element_t.hip.cc @@ -33,14 +33,14 @@ TEST_CASE("min_elementHIP", "[min_element]") { SECTION("Default comparison") { auto min_iter = xtd::min_element(d_values, d_values + N); int min; - thrust::copy(d_values, d_values + 1, &min); + thrust::copy(min_iter, min_iter + 1, &min); REQUIRE(min == 0); } SECTION("Greater comparison") { auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); int min; - thrust::copy(d_values, d_values + 1, &min); + thrust::copy(min_iter, min_iter + 1, &min); REQUIRE(min == N - 1); } } From b21616abe1fc21460ffeb55dc8b85d1426f733c0 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Thu, 15 May 2025 10:43:44 +0200 Subject: [PATCH 07/11] Define flags for different backends --- include/algorithm/extrema.h | 88 ++++++++++++++++++------------------- include/algorithm/sort.h | 46 +++++++++---------- include/internal/defines.h | 10 +++++ 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h index 6118e12..950485b 100644 --- a/include/algorithm/extrema.h +++ b/include/algorithm/extrema.h @@ -3,11 +3,11 @@ #include "internal/defines.h" -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) #include -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) #include -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) #include #include #else @@ -19,11 +19,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) - return thrust::min_element(first, last); -#elif defined(XTD_TARGET_HIP) - return rocthrust::min_element(first, last); -#elif defined(XTD_TARGET_SYCL) +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last); #else return std::min_element(first, last); @@ -34,11 +34,11 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::min_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::min_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::min_element(std::forward(policy), first, last); #else return std::min_element(std::forward(policy), first, last); @@ -49,11 +49,11 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) - return thrust::min_element(first, last, comp); -#elif defined(XTD_TARGET_HIP) - return rocthrust::min_element(first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(thrust::device, first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(thrust::hip::par, first, last, comp); +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else return std::min_element(first, last, comp); @@ -65,11 +65,11 @@ namespace xtd { ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::min_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::min_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::min_element(std::forward(policy), first, last, comp); #else return std::min_element(std::forward(policy), first, last, comp); @@ -79,11 +79,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) - return thrust::max_element(first, last); -#elif defined(XTD_TARGET_HIP) +#if defined(XTD_CUDA_BACKEND) + return thrust::max_element(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) return rocthrust::max_element(first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last); #else return std::max_element(first, last); @@ -94,11 +94,11 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::max_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::max_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(std::forward(policy), first, last); #else return std::max_element(std::forward(policy), first, last); @@ -109,11 +109,11 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::max_element(first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::max_element(first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else return std::max_element(first, last, comp); @@ -125,11 +125,11 @@ namespace xtd { ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::max_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::max_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(std::forward(policy), first, last, comp); #else return std::max_element(std::forward(policy), first, last, comp); @@ -139,11 +139,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::minmax_element(first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::minmax_element(first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last); #else return std::minmax_element(first, last); @@ -153,11 +153,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::minmax_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::minmax_element(std::forward(policy), first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(std::forward(policy), first, last); #else return std::minmax_element(std::forward(policy), first, last); @@ -167,11 +167,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::minmax_element(first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::minmax_element(first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else return std::minmax_element(first, last, comp); @@ -181,11 +181,11 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) return thrust::minmax_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) return rocthrust::minmax_element(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(std::forward(policy), first, last, comp); #else return std::minmax_element(std::forward(policy), first, last, comp); diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h index e6539c6..36f4447 100644 --- a/include/algorithm/sort.h +++ b/include/algorithm/sort.h @@ -3,11 +3,11 @@ #include "internal/defines.h" -#if defined(XTD_TARGET_CUDA) +#if defined(XTD_CUDA_BACKEND) #include -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) #include -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) #include #include #else @@ -18,12 +18,12 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, - RandomAccessIterator last) { -#if defined(XTD_TARGET_CUDA) + RandomAccessIterator last) { +#if defined(XTD_CUDA_BACKEND) thrust::sort(first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) rocthrust::sort(first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last); #else std::sort(first, last); @@ -32,13 +32,13 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, - RandomAccessIterator first, - RandomAccessIterator last) { -#if defined(XTD_TARGET_CUDA) + RandomAccessIterator first, + RandomAccessIterator last) { +#if defined(XTD_CUDA_BACKEND) thrust::sort(std::forward(policy), first, last); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) rocthrust::sort(std::forward(policy), first, last); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(std::forward(policy), first, last); #else std::sort(std::forward(policy), first, last); @@ -47,13 +47,13 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, - RandomAccessIterator last, - Compare comp) { -#if defined(XTD_TARGET_CUDA) + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_CUDA_BACKEND) thrust::sort(first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) rocthrust::sort(first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else std::sort(first, last, comp); @@ -62,14 +62,14 @@ namespace xtd { template XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, - RandomAccessIterator first, - RandomAccessIterator last, - Compare comp) { -#if defined(XTD_TARGET_CUDA) + RandomAccessIterator first, + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_CUDA_BACKEND) thrust::sort(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_HIP) +#elif defined(XTD_HIP_BACKEND) rocthrust::sort(std::forward(policy), first, last, comp); -#elif defined(XTD_TARGET_SYCL) +#elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(std::forward(policy), first, last, comp); #else std::sort(std::forward(policy), first, last, comp); diff --git a/include/internal/defines.h b/include/internal/defines.h index 6955e2e..9b229a8 100644 --- a/include/internal/defines.h +++ b/include/internal/defines.h @@ -17,6 +17,16 @@ #define XTD_HOST_FUNCTION #endif +#if defined(__CUDACC__) +#define XTD_CUDA_BACKEND +#elif defined(__HIPCC__) +#define XTD_HIP_BACKEND +#elif defined(__SYCL_COMPILER_VERSION) +#define XTD_SYCL_BACKEND +#else +#define XTD_SERIAL_BACKEND +#endif + // XTD_TARGET_... #if defined(__CUDA_ARCH__) // CUDA device code From 78a470ec364a6ce36048e47426149f9943b02e0f Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Thu, 15 May 2025 11:46:48 +0200 Subject: [PATCH 08/11] Specify execution policies in algorithms Update tests --- include/algorithm/extrema.h | 14 ++++++------- include/algorithm/sort.h | 8 +++---- test/max_element/max_element_t.cu | 19 +++++++++-------- test/max_element/max_element_t.hip.cc | 13 ++++++------ test/max_element/max_element_t.sycl.cc | 9 ++++++-- test/min_element/min_element_t.cu | 9 ++++---- test/min_element/min_element_t.hip.cc | 13 ++++++------ test/min_element/min_element_t.sycl.cc | 9 ++++++-- test/sort/sort_t.cc | 8 +++---- test/sort/sort_t.cu | 29 ++++++++++++++------------ test/sort/sort_t.hip.cc | 29 ++++++++++++++------------ test/sort/sort_t.sycl.cc | 16 +++++++++++--- 12 files changed, 100 insertions(+), 76 deletions(-) diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h index 950485b..c27c74f 100644 --- a/include/algorithm/extrema.h +++ b/include/algorithm/extrema.h @@ -82,7 +82,7 @@ namespace xtd { #if defined(XTD_CUDA_BACKEND) return thrust::max_element(thrust::device, first, last); #elif defined(XTD_HIP_BACKEND) - return rocthrust::max_element(first, last); + return rocthrust::max_element(thrust::hip::par, first, last); #elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last); #else @@ -110,9 +110,9 @@ namespace xtd { ForwardIterator last, BinaryPredicate comp) { #if defined(XTD_CUDA_BACKEND) - return thrust::max_element(first, last, comp); + return thrust::max_element(thrust::device, first, last, comp); #elif defined(XTD_HIP_BACKEND) - return rocthrust::max_element(first, last, comp); + return rocthrust::max_element(thrust::hip::par, first, last, comp); #elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else @@ -140,9 +140,9 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last) { #if defined(XTD_CUDA_BACKEND) - return thrust::minmax_element(first, last); + return thrust::minmax_element(thrust::device, first, last); #elif defined(XTD_HIP_BACKEND) - return rocthrust::minmax_element(first, last); + return rocthrust::minmax_element(thrust::hip::par, first, last); #elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last); #else @@ -168,9 +168,9 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { #if defined(XTD_CUDA_BACKEND) - return thrust::minmax_element(first, last, comp); + return thrust::minmax_element(thrust::device, first, last, comp); #elif defined(XTD_HIP_BACKEND) - return rocthrust::minmax_element(first, last, comp); + return rocthrust::minmax_element(thrust::hip::par, first, last, comp); #elif defined(XTD_SYCL_BACKEND) return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h index 36f4447..63d8333 100644 --- a/include/algorithm/sort.h +++ b/include/algorithm/sort.h @@ -20,9 +20,9 @@ namespace xtd { XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, RandomAccessIterator last) { #if defined(XTD_CUDA_BACKEND) - thrust::sort(first, last); + thrust::sort(thrust::device, first, last); #elif defined(XTD_HIP_BACKEND) - rocthrust::sort(first, last); + rocthrust::sort(thrustd::hip::par, first, last); #elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last); #else @@ -50,9 +50,9 @@ namespace xtd { RandomAccessIterator last, Compare comp) { #if defined(XTD_CUDA_BACKEND) - thrust::sort(first, last, comp); + thrust::sort(thrust::device, first, last, comp); #elif defined(XTD_HIP_BACKEND) - rocthrust::sort(first, last, comp); + rocthrust::sort(thrust::hip::par, first, last, comp); #elif defined(XTD_SYCL_BACKEND) oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last, comp); #else diff --git a/test/max_element/max_element_t.cu b/test/max_element/max_element_t.cu index b519348..61115e0 100644 --- a/test/max_element/max_element_t.cu +++ b/test/max_element/max_element_t.cu @@ -13,7 +13,9 @@ #include "common/cuda_check.h" #include #include +#include #include +#include TEST_CASE("max_elementCUDA", "[max_element]") { const int N = 100; @@ -24,24 +26,23 @@ TEST_CASE("max_elementCUDA", "[max_element]") { std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - int* d_values; - CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); - CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); SECTION("Default comparison") { auto max_iter = xtd::max_element(d_values, d_values + N); - int max; - thrust::copy(thrust::device, max_iter, max_iter + 1, &max); + int max; + thrust::copy(thrust::device, max_iter, max_iter+1, &max); REQUIRE(max == N - 1); } SECTION("Greater comparison") { auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); - int max; - thrust::copy(thrust::device, max_iter , max_iter + 1, &max); + int max; + thrust::copy(thrust::device, max_iter, max_iter+1, &max); REQUIRE(max == 0); } + + CUDA_CHECK(cudaFree(d_values)); } diff --git a/test/max_element/max_element_t.hip.cc b/test/max_element/max_element_t.hip.cc index 987bade..82b2c7e 100644 --- a/test/max_element/max_element_t.hip.cc +++ b/test/max_element/max_element_t.hip.cc @@ -23,24 +23,23 @@ TEST_CASE("max_elementHIP", "[max_element]") { std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - int* d_values; - HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); - HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); SECTION("Default comparison") { auto max_iter = xtd::max_element(d_values, d_values + N); int max; - thrust::copy(max_iter, max_iter + 1, &max); + thrust::copy(thrust::hip::par, max_iter, max_iter + 1, &max); REQUIRE(max == N - 1); } SECTION("Greater comparison") { auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); int max; - thrust::copy(max_iter, max_iter + 1, &max); + thrust::copy(thrust::hip::par, max_iter, max_iter + 1, &max); REQUIRE(max == 0); } + + HIP_CHECK(hipFree(d_values)); } diff --git a/test/max_element/max_element_t.sycl.cc b/test/max_element/max_element_t.sycl.cc index b0ddde1..b28c4fd 100644 --- a/test/max_element/max_element_t.sycl.cc +++ b/test/max_element/max_element_t.sycl.cc @@ -34,8 +34,13 @@ TEST_CASE("sortSYCL", "[sort]") { queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); SECTION("Default comparison") { - auto max = xtd::max_element(d_values, d_values + N); - REQUIRE(*max == N - 1); + auto max = xtd::max_element(d_values, d_values + N); + REQUIRE(*max == N - 1); + } + + SECTION("Greater comparison") { + auto max = xtd::max_element(d_values, d_values + N, std::greater{}); + REQUIRE(*max == 0); } sycl::free(d_values, queue); diff --git a/test/min_element/min_element_t.cu b/test/min_element/min_element_t.cu index f132828..6fde3d4 100644 --- a/test/min_element/min_element_t.cu +++ b/test/min_element/min_element_t.cu @@ -24,12 +24,9 @@ TEST_CASE("min_elementCUDA", "[min_element]") { std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - int* d_values; - CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); - CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); SECTION("Default comparison") { auto min_iter = xtd::min_element(d_values, d_values + N); @@ -44,4 +41,6 @@ TEST_CASE("min_elementCUDA", "[min_element]") { thrust::copy(thrust::device, min_iter, min_iter + 1, &min); REQUIRE(min == N - 1); } + + CUDA_CHECK(cudaFree(d_values)); } diff --git a/test/min_element/min_element_t.hip.cc b/test/min_element/min_element_t.hip.cc index 1b646cf..7292494 100644 --- a/test/min_element/min_element_t.hip.cc +++ b/test/min_element/min_element_t.hip.cc @@ -23,24 +23,23 @@ TEST_CASE("min_elementHIP", "[min_element]") { std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - int* d_values; - HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); - HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); SECTION("Default comparison") { auto min_iter = xtd::min_element(d_values, d_values + N); int min; - thrust::copy(min_iter, min_iter + 1, &min); + thrust::copy(thrust::hip::par, min_iter, min_iter + 1, &min); REQUIRE(min == 0); } SECTION("Greater comparison") { auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); int min; - thrust::copy(min_iter, min_iter + 1, &min); + thrust::copy(thrust::hip::par, min_iter, min_iter + 1, &min); REQUIRE(min == N - 1); } + + HIP_CHECK(hipFree(d_values)); } diff --git a/test/min_element/min_element_t.sycl.cc b/test/min_element/min_element_t.sycl.cc index f5cc787..bfca1d8 100644 --- a/test/min_element/min_element_t.sycl.cc +++ b/test/min_element/min_element_t.sycl.cc @@ -34,8 +34,13 @@ TEST_CASE("sortSYCL", "[sort]") { queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); SECTION("Default comparison") { - auto min = xtd::min_element(d_values, d_values + N); - REQUIRE(*min == 0); + auto min = xtd::min_element(d_values, d_values + N); + REQUIRE(*min == 0); + } + + SECTION("Greater comparison") { + auto min = xtd::min_element(d_values, d_values + N, std::greater{}); + REQUIRE(*min == N - 1); } sycl::free(d_values, queue); diff --git a/test/sort/sort_t.cc b/test/sort/sort_t.cc index b283e7a..d0a95b1 100644 --- a/test/sort/sort_t.cc +++ b/test/sort/sort_t.cc @@ -21,21 +21,21 @@ TEST_CASE("sortCPU", "[sort]") { SECTION("Default comparison") { xtd::sort(values.begin(), values.end()); - REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); } SECTION("Greater comparison") { xtd::sort(values.begin(), values.end(), std::greater()); - REQUIRE(std::ranges::equal(values, std::views::iota(N - 1, -1))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); } SECTION("Unseq execution policy") { xtd::sort(std::execution::unseq, values.begin(), values.end()); - REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); } SECTION("Unseq execution policy with greater comparison") { xtd::sort(std::execution::unseq, values.begin(), values.end(), std::greater()); - REQUIRE(std::ranges::equal(values, std::views::iota(N - 1, -1))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); } } diff --git a/test/sort/sort_t.cu b/test/sort/sort_t.cu index 414fd8d..ec34bf8 100644 --- a/test/sort/sort_t.cu +++ b/test/sort/sort_t.cu @@ -29,21 +29,24 @@ TEST_CASE("sortCUDA", "[sort]") { std::vector values(N); std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - xtd::sort(values.begin(), values.end()); - - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); int* d_values; - CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); - CUDA_CHECK(cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); - xtd::sort(d_values, d_values + N); - CUDA_CHECK(cudaGetLastError()); - CUDA_CHECK(cudaMemcpyAsync(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost, stream)); - CUDA_CHECK(cudaStreamSynchronize(stream)); + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); + + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + CUDA_CHECK(cudaMemcpy(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + CUDA_CHECK(cudaMemcpy(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost)); - REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } - CUDA_CHECK(cudaFreeAsync(d_values, stream)); - CUDA_CHECK(cudaStreamDestroy(stream)); + CUDA_CHECK(cudaFree(d_values)); } diff --git a/test/sort/sort_t.hip.cc b/test/sort/sort_t.hip.cc index 0588936..90c25c2 100644 --- a/test/sort/sort_t.hip.cc +++ b/test/sort/sort_t.hip.cc @@ -29,21 +29,24 @@ TEST_CASE("sortHIP", "[sort]") { std::vector values(N); std::iota(values.begin(), values.end(), 0); std::shuffle(values.begin(), values.end(), rng); - xtd::sort(values.begin(), values.end()); - - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); int* d_values; - HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); - HIP_CHECK(hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); - xtd::sort(d_values, d_values + N); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipMemcpyAsync(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); + + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + HIP_CHECK(hipMemcpy(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + HIP_CHECK(hipMemcpy(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost)); - REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } - HIP_CHECK(hipFreeAsync(d_values, stream)); - HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipFree(d_values)); } diff --git a/test/sort/sort_t.sycl.cc b/test/sort/sort_t.sycl.cc index 567cc09..878dcfe 100644 --- a/test/sort/sort_t.sycl.cc +++ b/test/sort/sort_t.sycl.cc @@ -32,10 +32,20 @@ TEST_CASE("sortSYCL", "[sort]") { auto *d_values = sycl::malloc_device(N, queue); queue.memcpy(d_result, values.data(), N * sizeof(int)).wait(); - xtd::sort(d_values, d_values + N); - queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); - REQUIRE(std::ranges::equal(values, std::views::iota(N, 0))); + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } sycl::free(d_values, queue); } From bced3e99a652e261cd2449422b7fd8de30c403ad Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Thu, 15 May 2025 11:47:11 +0200 Subject: [PATCH 09/11] Compile cuda also for sm90 --- test/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Makefile b/test/Makefile index 6b67cd5..bdd7598 100644 --- a/test/Makefile +++ b/test/Makefile @@ -25,7 +25,7 @@ else # CUDA platform at $(CUDA_BASE) CUDA_LIBDIR := $(CUDA_BASE)/lib64 CUDA_DEPS := $(CUDA_LIBDIR)/libcudart.so - CUDA_ARCH := 60 70 80 + CUDA_ARCH := 60 70 80 90 CUDA_CXXFLAGS := -I$(CUDA_BASE)/include CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt CUDA_NVCC := $(CUDA_BASE)/bin/nvcc From 6060827cdc31f4ac7727bd1e6939b3a71a3a4ba7 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Thu, 12 Jun 2025 14:23:58 +0200 Subject: [PATCH 10/11] Add interface for `reduce` --- include/algorithm.h | 1 + include/algorithm/reduce.h | 109 +++++++++++++++++++++++++++++++++++ test/reduce/reduce_t.cc | 52 +++++++++++++++++ test/reduce/reduce_t.cu | 49 ++++++++++++++++ test/reduce/reduce_t.hip.cc | 48 +++++++++++++++ test/reduce/reduce_t.sycl.cc | 52 +++++++++++++++++ 6 files changed, 311 insertions(+) create mode 100644 include/algorithm/reduce.h create mode 100644 test/reduce/reduce_t.cc create mode 100644 test/reduce/reduce_t.cu create mode 100644 test/reduce/reduce_t.hip.cc create mode 100644 test/reduce/reduce_t.sycl.cc diff --git a/include/algorithm.h b/include/algorithm.h index 595ff5c..d7870d7 100644 --- a/include/algorithm.h +++ b/include/algorithm.h @@ -2,4 +2,5 @@ #pragma once #include "algorithm/extrema.h" +#include "algorithm/reduce.h" #include "algorithm/sort.h" diff --git a/include/algorithm/reduce.h b/include/algorithm/reduce.h new file mode 100644 index 0000000..fe57d11 --- /dev/null +++ b/include/algorithm/reduce.h @@ -0,0 +1,109 @@ + +#pragma once + +#include "internal/defines.h" + +#if defined(XTD_CUDA_BACKEND) +#include +#elif defined(XTD_HIP_BACKEND) +#include +#elif defined(XTD_SYCL_BACKEND) +#include +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits::value_type + reduce(InputIterator first, InputIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::reduce(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits::value_type + reduce(ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last); +#else + return std::reduce(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first, InputIterator last, T init) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last, init); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last, init); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init); +#else + return std::reduce(first, last, init); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + T init) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last, init); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last, init); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last, init); +#else + return std::reduce(std::forward(policy), first, last, init); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first, + InputIterator last, + T init, + BinaryOperation op) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last, init, op); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last, init, op); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init, op); +#else + return std::reduce(first, last, init, op); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + T init, + BinaryOperation op) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last, init, op); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last, init, op); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last, init, op); +#else + return std::reduce(std::forward(policy), first, last, init, op); +#endif + } + +} // namespace xtd diff --git a/test/reduce/reduce_t.cc b/test/reduce/reduce_t.cc new file mode 100644 index 0000000..5b7f89c --- /dev/null +++ b/test/reduce/reduce_t.cc @@ -0,0 +1,52 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("reduceCPU", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + + SECTION("Default reduction") { + auto red = xtd::reduce(values.begin(), values.end()); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(values.begin(), values.end(), -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Unseq execution policy") { + int red = xtd::reduce(std::execution::unseq, values.begin(), values.end()); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Unseq execution policy with less comparison") { + auto red = + xtd::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less()); + REQUIRE(red == + std::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(values.begin(), values.end(), 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } + + SECTION("Reduction with initial value and unseq policy") { + auto red = xtd::reduce(std::execution::unseq, values.begin(), values.end(), 1); + REQUIRE(red == std::reduce(std::execution::unseq, values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.cu b/test/reduce/reduce_t.cu new file mode 100644 index 0000000..7810df6 --- /dev/null +++ b/test/reduce/reduce_t.cu @@ -0,0 +1,49 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include +#include + +TEST_CASE("reduceCUDA", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + int* d_values; + CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); + CUDA_CHECK( + cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.hip.cc b/test/reduce/reduce_t.hip.cc new file mode 100644 index 0000000..575a603 --- /dev/null +++ b/test/reduce/reduce_t.hip.cc @@ -0,0 +1,48 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("reduceHIP", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + int* d_values; + HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); + HIP_CHECK( + hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.sycl.cc b/test/reduce/reduce_t.sycl.cc new file mode 100644 index 0000000..ada03d8 --- /dev/null +++ b/test/reduce/reduce_t.sycl.cc @@ -0,0 +1,52 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("reduceSYCL", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } + + sycl::free(d_values, queue); +} From 961440767dec1203a90f06685ccb2a80f62ea4d8 Mon Sep 17 00:00:00 2001 From: Simone Balducci Date: Wed, 23 Jul 2025 17:10:47 +0200 Subject: [PATCH 11/11] Fix path for rocthrust --- include/algorithm/extrema.h | 4 +--- include/algorithm/reduce.h | 4 +--- include/algorithm/sort.h | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h index c27c74f..45a7aba 100644 --- a/include/algorithm/extrema.h +++ b/include/algorithm/extrema.h @@ -3,10 +3,8 @@ #include "internal/defines.h" -#if defined(XTD_CUDA_BACKEND) +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) #include -#elif defined(XTD_HIP_BACKEND) -#include #elif defined(XTD_SYCL_BACKEND) #include #include diff --git a/include/algorithm/reduce.h b/include/algorithm/reduce.h index fe57d11..e5def9e 100644 --- a/include/algorithm/reduce.h +++ b/include/algorithm/reduce.h @@ -3,10 +3,8 @@ #include "internal/defines.h" -#if defined(XTD_CUDA_BACKEND) +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) #include -#elif defined(XTD_HIP_BACKEND) -#include #elif defined(XTD_SYCL_BACKEND) #include #include diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h index 63d8333..2df3f92 100644 --- a/include/algorithm/sort.h +++ b/include/algorithm/sort.h @@ -3,10 +3,8 @@ #include "internal/defines.h" -#if defined(XTD_CUDA_BACKEND) +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) #include -#elif defined(XTD_HIP_BACKEND) -#include #elif defined(XTD_SYCL_BACKEND) #include #include