|
| 1 | +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_60 |
| 2 | +// RUN: %CPU_RUN_PLACEHOLDER %t.out |
| 3 | +// RUN: %GPU_RUN_PLACEHOLDER %t.out |
| 4 | + |
| 5 | +// TODO: accelerator may not suport atomics required by the current |
| 6 | +// implementation. Enable testing when implementation is fixed. |
| 7 | +// RUNx: %ACC_RUN_PLACEHOLDER %t.out |
| 8 | + |
| 9 | +// This test performs basic checks of parallel_for(range<3>, reduction, func) |
| 10 | +// with reductions initialized with a one element buffer. Additionally, some |
| 11 | +// reducers will not be written to. |
| 12 | + |
| 13 | +#include "reduction_utils.hpp" |
| 14 | + |
| 15 | +using namespace sycl; |
| 16 | + |
| 17 | +int NumErrors = 0; |
| 18 | + |
| 19 | +template <typename T> class SkipEvenName; |
| 20 | +template <typename T> class SkipOddName; |
| 21 | +template <typename T> class SkipAllName; |
| 22 | + |
| 23 | +template <typename Name, typename T, typename... ArgTys> |
| 24 | +void tests(ArgTys &&...Args) { |
| 25 | + NumErrors += test<SkipEvenName<Name>, T>(std::forward<ArgTys>(Args)..., |
| 26 | + property_list{}, SkipEvenOp{}); |
| 27 | + NumErrors += test<SkipOddName<Name>, T>(std::forward<ArgTys>(Args)..., |
| 28 | + property_list{}, SkipOddOp{}); |
| 29 | + NumErrors += test<SkipAllName<Name>, T>(std::forward<ArgTys>(Args)..., |
| 30 | + property_list{}, SkipAllOp{}); |
| 31 | +} |
| 32 | + |
| 33 | +int main() { |
| 34 | + queue Q; |
| 35 | + printDeviceInfo(Q); |
| 36 | + size_t MaxWGSize = |
| 37 | + Q.get_device().get_info<info::device::max_work_group_size>(); |
| 38 | + |
| 39 | + tests<class A1, int>(Q, 0, 99, std::plus<>{}, range<3>{1, 1, 1}); |
| 40 | + tests<class A2, int>(Q, 0, 99, std::plus<>{}, range<3>{2, 2, 2}); |
| 41 | + tests<class A3, int>(Q, 0, 99, std::plus<>{}, range<3>{2, 3, 4}); |
| 42 | + |
| 43 | + tests<class A4, int64_t>(Q, 0, 99, std::plus<>{}, |
| 44 | + range<3>{1, 1, MaxWGSize + 1}); |
| 45 | + tests<class A5, int64_t>(Q, 0, 99, std::plus<>{}, |
| 46 | + range<3>{1, MaxWGSize + 1, 1}); |
| 47 | + tests<class A6, int64_t>(Q, 0, 99, std::plus<>{}, |
| 48 | + range<3>{MaxWGSize + 1, 1, 1}); |
| 49 | + |
| 50 | + tests<class A7, int64_t>(Q, 0, 99, std::plus<>{}, |
| 51 | + range<3>{2, 5, MaxWGSize * 2}); |
| 52 | + tests<class A8, int64_t>(Q, 0, 99, std::plus<>{}, |
| 53 | + range<3>{3, MaxWGSize * 3, 2}); |
| 54 | + tests<class A9, int64_t>(Q, 0, 99, std::plus<>{}, |
| 55 | + range<3>{MaxWGSize * 3, 8, 4}); |
| 56 | + |
| 57 | + tests<class B1, CustomVec<long long>>(Q, 0, 99, CustomVecPlus<long long>{}, |
| 58 | + range<3>{2, 33, MaxWGSize}); |
| 59 | + tests<class B2, CustomVec<long long>>(Q, 99, CustomVecPlus<long long>{}, |
| 60 | + range<3>{2, 33, MaxWGSize}); |
| 61 | + |
| 62 | + tests<class C1, int>(Q, 99, PlusWithoutIdentity<int>{}, range<3>{1, 1, 1}); |
| 63 | + tests<class C2, int>(Q, 99, PlusWithoutIdentity<int>{}, range<3>{2, 2, 2}); |
| 64 | + tests<class C3, int>(Q, 99, PlusWithoutIdentity<int>{}, range<3>{2, 3, 4}); |
| 65 | + |
| 66 | + tests<class C4, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 67 | + range<3>{1, 1, MaxWGSize + 1}); |
| 68 | + tests<class C5, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 69 | + range<3>{1, MaxWGSize + 1, 1}); |
| 70 | + tests<class C6, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 71 | + range<3>{MaxWGSize + 1, 1, 1}); |
| 72 | + |
| 73 | + tests<class C7, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 74 | + range<3>{2, 5, MaxWGSize * 2}); |
| 75 | + tests<class C8, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 76 | + range<3>{3, MaxWGSize * 3, 2}); |
| 77 | + tests<class C9, int64_t>(Q, 99, PlusWithoutIdentity<int64_t>{}, |
| 78 | + range<3>{MaxWGSize * 3, 8, 4}); |
| 79 | + |
| 80 | + printFinalStatus(NumErrors); |
| 81 | + return NumErrors; |
| 82 | +} |
0 commit comments