diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/find.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/find.hpp index 8d80fe055f3c..38d9a325276b 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/find.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/find.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -34,6 +35,8 @@ namespace hpx::parallel::detail { sequential_find_t, Iterator first, Sentinel last, T const& value, Proj proj = Proj()) { + static_assert(hpx::is_sequenced_execution_policy::value || + hpx::is_unsequenced_execution_policy::value); return util::loop_pred( first, last, [&value, &proj](auto const& curr) { return HPX_INVOKE(proj, *curr) == value; diff --git a/libs/core/algorithms/include/hpx/parallel/unseq/simd_helpers.hpp b/libs/core/algorithms/include/hpx/parallel/unseq/simd_helpers.hpp index cf7bf8380e86..906d21072acd 100644 --- a/libs/core/algorithms/include/hpx/parallel/unseq/simd_helpers.hpp +++ b/libs/core/algorithms/include/hpx/parallel/unseq/simd_helpers.hpp @@ -25,6 +25,7 @@ namespace hpx::parallel::util { Compiler and Hardware should also support vector operations for IterDiff, else we see slower performance when compared to sequential version */ + // f(Iter) -> bool template Iter unseq_first_n(Iter const first, IterDiff const n, F&& f) noexcept { diff --git a/libs/core/algorithms/include/hpx/parallel/util/loop.hpp b/libs/core/algorithms/include/hpx/parallel/util/loop.hpp index 56d642f23f10..621efb722ad3 100644 --- a/libs/core/algorithms/include/hpx/parallel/util/loop.hpp +++ b/libs/core/algorithms/include/hpx/parallel/util/loop.hpp @@ -900,6 +900,20 @@ namespace hpx::parallel::util { return call(base_idx, it, num, HPX_FORWARD(F, f)); } + + template + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter unseq_call( + std::size_t base_idx, Iter it, std::size_t num, F&& f) + { + // clang-format off + HPX_VECTORIZE + for (std::size_t i = 0; i < num; i++) // -V112 + { + HPX_INVOKE(f, *(it + i), base_idx++); + } + // clang-format on + return it + num; + } }; } // namespace detail @@ -931,6 +945,36 @@ namespace hpx::parallel::util { } }; + template )> + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter tag_invoke( + hpx::parallel::util::loop_idx_n_t, + std::size_t base_idx, Iter it, std::size_t count, CancelToken& tok, + F&& f) + { + if (tok.was_cancelled(base_idx)) + return it; + + return detail::loop_idx_n::unseq_call(base_idx, it, count, + HPX_FORWARD(F, f)); + } + + template )> + HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter tag_invoke( + hpx::parallel::util::loop_idx_n_t, + std::size_t base_idx, Iter it, std::size_t count, CancelToken& tok, + F&& f) + { + if (tok.was_cancelled(base_idx)) + return it; + + return detail::loop_idx_n::unseq_call(base_idx, it, count, + HPX_FORWARD(F, f)); + } + #if !defined(HPX_COMPUTE_DEVICE_CODE) template inline constexpr loop_idx_n_t loop_idx_n = diff --git a/libs/core/algorithms/tests/unit/algorithms/util/test_simd_helpers.cpp b/libs/core/algorithms/tests/unit/algorithms/util/test_simd_helpers.cpp index e2927525257c..ab512b4c8316 100644 --- a/libs/core/algorithms/tests/unit/algorithms/util/test_simd_helpers.cpp +++ b/libs/core/algorithms/tests/unit/algorithms/util/test_simd_helpers.cpp @@ -23,7 +23,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index) { first_index = first_index % length; - std::vector v(length, static_cast(0)); + std::vector v(length); std::size_t i = 0; std::for_each(v.begin(), v.end(), [&](T& t) {