diff --git a/include/algorithm.h b/include/algorithm.h new file mode 100644 index 0000000..d7870d7 --- /dev/null +++ b/include/algorithm.h @@ -0,0 +1,6 @@ + +#pragma once + +#include "algorithm/extrema.h" +#include "algorithm/reduce.h" +#include "algorithm/sort.h" diff --git a/include/algorithm/extrema.h b/include/algorithm/extrema.h new file mode 100644 index 0000000..45a7aba --- /dev/null +++ b/include/algorithm/extrema.h @@ -0,0 +1,193 @@ + +#pragma once + +#include "internal/defines.h" + +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) +#include +#elif defined(XTD_SYCL_BACKEND) +#include +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::min_element(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::min_element(std::forward(policy), first, last); +#else + return std::min_element(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(thrust::device, first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(thrust::hip::par, first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::min_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::min_element(first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator min_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::min_element(std::forward(policy), first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::min_element(std::forward(policy), first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::min_element(std::forward(policy), first, last, comp); +#else + return std::min_element(std::forward(policy), first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::max_element(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::max_element(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::max_element(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::max_element(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::max_element(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::max_element(std::forward(policy), first, last); +#else + return std::max_element(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::max_element(thrust::device, first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::max_element(thrust::hip::par, first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::max_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::max_element(first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr ForwardIterator max_element(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::max_element(std::forward(policy), first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::max_element(std::forward(policy), first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::max_element(std::forward(policy), first, last, comp); +#else + return std::max_element(std::forward(policy), first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( + ForwardIterator first, ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::minmax_element(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::minmax_element(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::minmax_element(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( + ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::minmax_element(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::minmax_element(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::minmax_element(std::forward(policy), first, last); +#else + return std::minmax_element(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( + ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::minmax_element(thrust::device, first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::minmax_element(thrust::hip::par, first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::minmax_element(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + return std::minmax_element(first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr std::pair minmax_element( + ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { +#if defined(XTD_CUDA_BACKEND) + return thrust::minmax_element(std::forward(policy), first, last, comp); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::minmax_element(std::forward(policy), first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::minmax_element(std::forward(policy), first, last, comp); +#else + return std::minmax_element(std::forward(policy), first, last, comp); +#endif + } + +} // namespace xtd diff --git a/include/algorithm/reduce.h b/include/algorithm/reduce.h new file mode 100644 index 0000000..e5def9e --- /dev/null +++ b/include/algorithm/reduce.h @@ -0,0 +1,107 @@ + +#pragma once + +#include "internal/defines.h" + +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) +#include +#elif defined(XTD_SYCL_BACKEND) +#include +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits::value_type + reduce(InputIterator first, InputIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last); +#else + return std::reduce(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr typename std::iterator_traits::value_type + reduce(ExecutionPolicy&& policy, ForwardIterator first, ForwardIterator last) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last); +#else + return std::reduce(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first, InputIterator last, T init) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last, init); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last, init); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init); +#else + return std::reduce(first, last, init); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + T init) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last, init); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last, init); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last, init); +#else + return std::reduce(std::forward(policy), first, last, init); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(InputIterator first, + InputIterator last, + T init, + BinaryOperation op) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(thrust::device, first, last, init, op); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(thrust::hip::par, first, last, init, op); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(oneapi::dpl::execution::dpcpp_default, first, last, init, op); +#else + return std::reduce(first, last, init, op); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr T reduce(ExecutionPolicy&& policy, + ForwardIterator first, + ForwardIterator last, + T init, + BinaryOperation op) { +#if defined(XTD_CUDA_BACKEND) + return thrust::reduce(std::forward(policy), first, last, init, op); +#elif defined(XTD_HIP_BACKEND) + return rocthrust::reduce(std::forward(policy), first, last, init, op); +#elif defined(XTD_SYCL_BACKEND) + return oneapi::dpl::reduce(std::forward(policy), first, last, init, op); +#else + return std::reduce(std::forward(policy), first, last, init, op); +#endif + } + +} // namespace xtd diff --git a/include/algorithm/sort.h b/include/algorithm/sort.h new file mode 100644 index 0000000..2df3f92 --- /dev/null +++ b/include/algorithm/sort.h @@ -0,0 +1,77 @@ + +#pragma once + +#include "internal/defines.h" + +#if defined(XTD_CUDA_BACKEND) || defined(XTD_HIP_BACKEND) +#include +#elif defined(XTD_SYCL_BACKEND) +#include +#include +#else +#include +#endif + +namespace xtd { + + template + XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, + RandomAccessIterator last) { +#if defined(XTD_CUDA_BACKEND) + thrust::sort(thrust::device, first, last); +#elif defined(XTD_HIP_BACKEND) + rocthrust::sort(thrustd::hip::par, first, last); +#elif defined(XTD_SYCL_BACKEND) + oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last); +#else + std::sort(first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + RandomAccessIterator first, + RandomAccessIterator last) { +#if defined(XTD_CUDA_BACKEND) + thrust::sort(std::forward(policy), first, last); +#elif defined(XTD_HIP_BACKEND) + rocthrust::sort(std::forward(policy), first, last); +#elif defined(XTD_SYCL_BACKEND) + oneapi::dpl::sort(std::forward(policy), first, last); +#else + std::sort(std::forward(policy), first, last); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr void sort(RandomAccessIterator first, + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_CUDA_BACKEND) + thrust::sort(thrust::device, first, last, comp); +#elif defined(XTD_HIP_BACKEND) + rocthrust::sort(thrust::hip::par, first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + oneapi::dpl::sort(oneapi::dpl::execution::dpcpp_default, first, last, comp); +#else + std::sort(first, last, comp); +#endif + } + + template + XTD_HOST_FUNCTION inline constexpr void sort(ExecutionPolicy&& policy, + RandomAccessIterator first, + RandomAccessIterator last, + Compare comp) { +#if defined(XTD_CUDA_BACKEND) + thrust::sort(std::forward(policy), first, last, comp); +#elif defined(XTD_HIP_BACKEND) + rocthrust::sort(std::forward(policy), first, last, comp); +#elif defined(XTD_SYCL_BACKEND) + oneapi::dpl::sort(std::forward(policy), first, last, comp); +#else + std::sort(std::forward(policy), first, last, comp); +#endif + } + +} // namespace xtd diff --git a/include/internal/defines.h b/include/internal/defines.h index 8be3730..9b229a8 100644 --- a/include/internal/defines.h +++ b/include/internal/defines.h @@ -10,9 +10,21 @@ #if defined(__CUDACC__) || defined(__HIPCC__) // CUDA or HIP/ROCm compiler #define XTD_DEVICE_FUNCTION __host__ __device__ +#define XTD_HOST_FUNCTION __host__ #else // SYCL or standard C++ code #define XTD_DEVICE_FUNCTION +#define XTD_HOST_FUNCTION +#endif + +#if defined(__CUDACC__) +#define XTD_CUDA_BACKEND +#elif defined(__HIPCC__) +#define XTD_HIP_BACKEND +#elif defined(__SYCL_COMPILER_VERSION) +#define XTD_SYCL_BACKEND +#else +#define XTD_SERIAL_BACKEND #endif // XTD_TARGET_... diff --git a/test/Makefile b/test/Makefile index 404e240..bdd7598 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,12 +7,12 @@ CXX := g++ GCC_TOOLCHAIN := $(abspath $(dir $(shell which $(CXX)))/..) GCC_TARGET := $(shell $(CXX) -dumpmachine) # Catch2 needs -Wno-unused-variable -HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Werror -Wfatal-errors -Wno-unused-variable +HOST_CXXFLAGS := -O2 -fPIC -pthread -march=native -Wall -Wextra -Wfatal-errors -Wno-unused-variable # Compiler flags supported by GCC but not by the LLVM-based compilers (clang, hipcc, icpx, etc.) LLVM_UNSUPPORTED_CXXFLAGS := --param vect-max-version-for-alias-checks=50 -Werror=format-contains-nul -Wno-non-template-friend -Werror=return-local-addr -Werror=unused-but-set-variable -CXXFLAGS := -std=c++17 $(HOST_CXXFLAGS) -g +CXXFLAGS := -std=c++20 $(HOST_CXXFLAGS) -g LDFLAGS := -O2 -fPIC -pthread -Wl,-E -lstdc++fs -ldl # CUDA @@ -25,17 +25,17 @@ else # CUDA platform at $(CUDA_BASE) CUDA_LIBDIR := $(CUDA_BASE)/lib64 CUDA_DEPS := $(CUDA_LIBDIR)/libcudart.so - CUDA_ARCH := 60 70 80 + CUDA_ARCH := 60 70 80 90 CUDA_CXXFLAGS := -I$(CUDA_BASE)/include CUDA_LDFLAGS := -L$(CUDA_LIBDIR) -lcudart -lcudadevrt CUDA_NVCC := $(CUDA_BASE)/bin/nvcc define CUFLAGS_template $(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=[sm_$$(ARCH),compute_$$(ARCH)]) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --display-error-number --threads $$(words $(1)) --cudart=shared - $(2)NVCC_COMMON := -std=c++17 -O3 -g $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + $(2)NVCC_COMMON := -std=c++20 -O3 -g $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' $(2)CUDA_CUFLAGS := $$($(2)NVCC_COMMON) endef $(eval $(call CUFLAGS_template,$(CUDA_ARCH),)) - NVCC_COMMON := -std=c++17 -O3 -g $(NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' + NVCC_COMMON := -std=c++20 -O3 -g $(NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS)' CUDA_CUFLAGS := $(NVCC_COMMON) endif @@ -51,7 +51,7 @@ else ROCM_DEPS := $(ROCM_LIBDIR)/libamdhip64.so ROCM_ARCH := gfx900 gfx90a gfx1030 ROCM_HIPCC := $(ROCM_BASE)/bin/hipcc - HIPCC_CXXFLAGS := -fno-gpu-rdc $(foreach ARCH,$(ROCM_ARCH),--offload-arch=$(ARCH)) $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) -I$(ROCM_BASE)/include/hip -Wno-unused-result + HIPCC_CXXFLAGS := -fno-gpu-rdc $(foreach ARCH,$(ROCM_ARCH),--offload-arch=$(ARCH)) $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) -I$(ROCM_BASE)/include/hip -I$(ROCM_BASE)/include/thrust -Wno-unused-result HIPCC_LDFLAGS := $(LDFLAGS) --target=$(GCC_TARGET) --gcc-toolchain=$(GCC_TOOLCHAIN) endif @@ -67,7 +67,7 @@ else SYCL_CXX := $(SYCL_BASE)/bin/icpx SYCL_CPU_TARGET := -fsycl-targets=spir64_x86_64 SYCL_FLAGS := -fsycl -fp-model=precise - SYCL_CXXFLAGS := $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) $(SYCL_FLAGS) -Wno-unused-variable + SYCL_CXXFLAGS := $(filter-out $(LLVM_UNSUPPORTED_CXXFLAGS),$(CXXFLAGS)) $(SYCL_FLAGS) -Wno-unused-variable -Wunused-parameter SYCL_LDFLAGS := # Check for Intel GPU existence diff --git a/test/max_element/max_element_t.cc b/test/max_element/max_element_t.cc new file mode 100644 index 0000000..42a70ba --- /dev/null +++ b/test/max_element/max_element_t.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("max_elementCPU", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + auto max = xtd::max_element(values.begin(), values.end()); + REQUIRE(*max == N - 1); + } + + SECTION("Greater comparison") { + auto max = xtd::max_element(values.begin(), values.end(), std::greater()); + REQUIRE(*max == 0); + } + + SECTION("Unseq execution policy") { + auto max = xtd::max_element(std::execution::unseq, values.begin(), values.end()); + REQUIRE(*max == N - 1); + } + + SECTION("Unseq execution policy with greater comparison") { + auto max = + xtd::max_element(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(*max == 0); + } +} diff --git a/test/max_element/max_element_t.cu b/test/max_element/max_element_t.cu new file mode 100644 index 0000000..61115e0 --- /dev/null +++ b/test/max_element/max_element_t.cu @@ -0,0 +1,48 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include +#include +#include +#include + +TEST_CASE("max_elementCUDA", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); + + SECTION("Default comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N); + int max; + thrust::copy(thrust::device, max_iter, max_iter+1, &max); + REQUIRE(max == N - 1); + } + + SECTION("Greater comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); + int max; + thrust::copy(thrust::device, max_iter, max_iter+1, &max); + REQUIRE(max == 0); + } + + CUDA_CHECK(cudaFree(d_values)); +} diff --git a/test/max_element/max_element_t.hip.cc b/test/max_element/max_element_t.hip.cc new file mode 100644 index 0000000..82b2c7e --- /dev/null +++ b/test/max_element/max_element_t.hip.cc @@ -0,0 +1,45 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("max_elementHIP", "[max_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); + + SECTION("Default comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N); + int max; + thrust::copy(thrust::hip::par, max_iter, max_iter + 1, &max); + REQUIRE(max == N - 1); + } + + SECTION("Greater comparison") { + auto max_iter = xtd::max_element(d_values, d_values + N, std::greater()); + int max; + thrust::copy(thrust::hip::par, max_iter, max_iter + 1, &max); + REQUIRE(max == 0); + } + + HIP_CHECK(hipFree(d_values)); +} diff --git a/test/max_element/max_element_t.sycl.cc b/test/max_element/max_element_t.sycl.cc new file mode 100644 index 0000000..b28c4fd --- /dev/null +++ b/test/max_element/max_element_t.sycl.cc @@ -0,0 +1,47 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default comparison") { + auto max = xtd::max_element(d_values, d_values + N); + REQUIRE(*max == N - 1); + } + + SECTION("Greater comparison") { + auto max = xtd::max_element(d_values, d_values + N, std::greater{}); + REQUIRE(*max == 0); + } + + sycl::free(d_values, queue); +} diff --git a/test/min_element/min_element_t.cc b/test/min_element/min_element_t.cc new file mode 100644 index 0000000..36edadc --- /dev/null +++ b/test/min_element/min_element_t.cc @@ -0,0 +1,42 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("min_elementCPU", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + auto min = xtd::min_element(values.begin(), values.end()); + REQUIRE(*min == 0); + } + + SECTION("Greater comparison") { + auto min = xtd::min_element(values.begin(), values.end(), std::greater()); + REQUIRE(*min == N - 1); + } + + SECTION("Unseq execution policy") { + auto min = xtd::min_element(std::execution::unseq, values.begin(), values.end()); + REQUIRE(*min == 0); + } + + SECTION("Unseq execution policy with greater comparison") { + auto min = + xtd::min_element(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(*min == N - 1); + } +} diff --git a/test/min_element/min_element_t.cu b/test/min_element/min_element_t.cu new file mode 100644 index 0000000..6fde3d4 --- /dev/null +++ b/test/min_element/min_element_t.cu @@ -0,0 +1,46 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include +#include + +TEST_CASE("min_elementCUDA", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); + + SECTION("Default comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N); + int min; + thrust::copy(thrust::device, min_iter , min_iter + 1, &min); + REQUIRE(min == 0); + } + + SECTION("Greater comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); + int min; + thrust::copy(thrust::device, min_iter, min_iter + 1, &min); + REQUIRE(min == N - 1); + } + + CUDA_CHECK(cudaFree(d_values)); +} diff --git a/test/min_element/min_element_t.hip.cc b/test/min_element/min_element_t.hip.cc new file mode 100644 index 0000000..7292494 --- /dev/null +++ b/test/min_element/min_element_t.hip.cc @@ -0,0 +1,45 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("min_elementHIP", "[min_element]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); + + SECTION("Default comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N); + int min; + thrust::copy(thrust::hip::par, min_iter, min_iter + 1, &min); + REQUIRE(min == 0); + } + + SECTION("Greater comparison") { + auto min_iter = xtd::min_element(d_values, d_values + N, std::greater()); + int min; + thrust::copy(thrust::hip::par, min_iter, min_iter + 1, &min); + REQUIRE(min == N - 1); + } + + HIP_CHECK(hipFree(d_values)); +} diff --git a/test/min_element/min_element_t.sycl.cc b/test/min_element/min_element_t.sycl.cc new file mode 100644 index 0000000..bfca1d8 --- /dev/null +++ b/test/min_element/min_element_t.sycl.cc @@ -0,0 +1,47 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default comparison") { + auto min = xtd::min_element(d_values, d_values + N); + REQUIRE(*min == 0); + } + + SECTION("Greater comparison") { + auto min = xtd::min_element(d_values, d_values + N, std::greater{}); + REQUIRE(*min == N - 1); + } + + sycl::free(d_values, queue); +} diff --git a/test/reduce/reduce_t.cc b/test/reduce/reduce_t.cc new file mode 100644 index 0000000..5b7f89c --- /dev/null +++ b/test/reduce/reduce_t.cc @@ -0,0 +1,52 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("reduceCPU", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + + SECTION("Default reduction") { + auto red = xtd::reduce(values.begin(), values.end()); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(values.begin(), values.end(), -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Unseq execution policy") { + int red = xtd::reduce(std::execution::unseq, values.begin(), values.end()); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Unseq execution policy with less comparison") { + auto red = + xtd::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less()); + REQUIRE(red == + std::reduce(std::execution::unseq, values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(values.begin(), values.end(), 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } + + SECTION("Reduction with initial value and unseq policy") { + auto red = xtd::reduce(std::execution::unseq, values.begin(), values.end(), 1); + REQUIRE(red == std::reduce(std::execution::unseq, values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.cu b/test/reduce/reduce_t.cu new file mode 100644 index 0000000..7810df6 --- /dev/null +++ b/test/reduce/reduce_t.cu @@ -0,0 +1,49 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include +#include +#include + +TEST_CASE("reduceCUDA", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + + int* d_values; + CUDA_CHECK(cudaMallocAsync(&d_values, N * sizeof(int), stream)); + CUDA_CHECK( + cudaMemcpyAsync(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice, stream)); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.hip.cc b/test/reduce/reduce_t.hip.cc new file mode 100644 index 0000000..575a603 --- /dev/null +++ b/test/reduce/reduce_t.hip.cc @@ -0,0 +1,48 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include +#include + +TEST_CASE("reduceHIP", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + int* d_values; + HIP_CHECK(hipMallocAsync(&d_values, N * sizeof(int), stream)); + HIP_CHECK( + hipMemcpyAsync(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice, stream)); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } +} diff --git a/test/reduce/reduce_t.sycl.cc b/test/reduce/reduce_t.sycl.cc new file mode 100644 index 0000000..ada03d8 --- /dev/null +++ b/test/reduce/reduce_t.sycl.cc @@ -0,0 +1,52 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("reduceSYCL", "[reduce]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_values, values.data(), N * sizeof(int)).wait(); + + SECTION("Default reduction") { + auto red = xtd::reduce(d_values, d_values + N); + REQUIRE(red == std::reduce(values.begin(), values.end())); + } + + SECTION("Less comparison") { + auto red = xtd::reduce(d_values, d_values + N, -1, std::less()); + REQUIRE(red == std::reduce(values.begin(), values.end(), -1, std::less())); + } + + SECTION("Reduction with initial value") { + auto red = xtd::reduce(d_values, d_values + N, 1); + REQUIRE(red == std::reduce(values.begin(), values.end(), 1)); + } + + sycl::free(d_values, queue); +} diff --git a/test/sort/sort_t.cc b/test/sort/sort_t.cc new file mode 100644 index 0000000..d0a95b1 --- /dev/null +++ b/test/sort/sort_t.cc @@ -0,0 +1,41 @@ + +#include +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +TEST_CASE("sortCPU", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + SECTION("Default comparison") { + xtd::sort(values.begin(), values.end()); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(values.begin(), values.end(), std::greater()); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } + + SECTION("Unseq execution policy") { + xtd::sort(std::execution::unseq, values.begin(), values.end()); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Unseq execution policy with greater comparison") { + xtd::sort(std::execution::unseq, values.begin(), values.end(), std::greater()); + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } +} diff --git a/test/sort/sort_t.cu b/test/sort/sort_t.cu new file mode 100644 index 0000000..ec34bf8 --- /dev/null +++ b/test/sort/sort_t.cu @@ -0,0 +1,52 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/cuda_check.h" +#include + +TEST_CASE("sortCUDA", "[sort]") { + int deviceCount; + cudaError_t cudaStatus = cudaGetDeviceCount(&deviceCount); + + if (cudaStatus != cudaSuccess || deviceCount == 0) { + std::cout << "No NVIDIA GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + CUDA_CHECK(cudaSetDevice(0)); + + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + CUDA_CHECK(cudaMalloc(&d_values, N * sizeof(int))); + CUDA_CHECK(cudaMemcpy(d_values, values.data(), N * sizeof(int), cudaMemcpyHostToDevice)); + + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + CUDA_CHECK(cudaMemcpy(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + CUDA_CHECK(cudaMemcpy(values.data(), d_values, N * sizeof(int), cudaMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } + + CUDA_CHECK(cudaFree(d_values)); +} diff --git a/test/sort/sort_t.hip.cc b/test/sort/sort_t.hip.cc new file mode 100644 index 0000000..90c25c2 --- /dev/null +++ b/test/sort/sort_t.hip.cc @@ -0,0 +1,52 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include "common/hip_check.h" +#include + +TEST_CASE("sortHIP", "[sort]") { + int deviceCount; + hipError_t hipStatus = hipGetDeviceCount(&deviceCount); + + if (hipStatus != hipSuccess || deviceCount == 0) { + std::cout << "No AMD GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + HIP_CHECK(hipSetDevice(0)); + + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + int* d_values; + HIP_CHECK(hipMalloc(&d_values, N * sizeof(int))); + HIP_CHECK(hipMemcpy(d_values, values.data(), N * sizeof(int), hipMemcpyHostToDevice)); + + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + HIP_CHECK(hipMemcpy(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + HIP_CHECK(hipMemcpy(values.data(), d_values, N * sizeof(int), hipMemcpyDeviceToHost)); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } + + HIP_CHECK(hipFree(d_values)); +} diff --git a/test/sort/sort_t.sycl.cc b/test/sort/sort_t.sycl.cc new file mode 100644 index 0000000..878dcfe --- /dev/null +++ b/test/sort/sort_t.sycl.cc @@ -0,0 +1,51 @@ + +#include +#include +#include +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "algorithm.h" + +#include + +TEST_CASE("sortSYCL", "[sort]") { + const int N = 100; + std::random_device rd; + std::mt19937 rng(rd()); + +#ifdef ONEAPI_CPU + auto queue = sycl::queue{sycl::cpu_selector_v, sycl::property::queue::in_order()}; +#else + if (sycl::device::get_devices(sycl::info::device_type::gpu).size() == 0) { + std::cout << "No SYCL GPUs found, the test will be skipped." << std::endl; + exit(EXIT_SUCCESS); + } + auto queue = sycl::queue{sycl::gpu_selector_v, sycl::property::queue::in_order()}; +#endif + + std::vector values(N); + std::iota(values.begin(), values.end(), 0); + std::shuffle(values.begin(), values.end(), rng); + + auto *d_values = sycl::malloc_device(N, queue); + queue.memcpy(d_result, values.data(), N * sizeof(int)).wait(); + + SECTION("Default comparison") { + xtd::sort(d_values, d_values + N); + queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N))); + } + + SECTION("Greater comparison") { + xtd::sort(d_values, d_values + N, std::greater{}); + queue.memcpy(values.data(), d_values, N * sizeof(int)).wait(); + + REQUIRE(std::ranges::equal(values, std::views::iota(0, N) | std::views::reverse)); + } + + sycl::free(d_values, queue); +}