forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[pstl] Initial implementation of OpenMP backend, on behalf of Christo…
…pher Nelson [email protected] Phabricator Review: https://reviews.llvm.org/D99836 A couple of parallel patterns still remains serial - "Parallel partial sort", and "Parallel transform scan" - there are //TODOs in the code.
- Loading branch information
1 parent
e741890
commit 6069a6a
Showing
19 changed files
with
1,057 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,7 @@ D: Created the initial implementation. | |
N: Thomas Rodgers | ||
E: [email protected] | ||
D: Identifier name transformation for inclusion in a Standard C++ library. | ||
|
||
N: Christopher Nelson | ||
E: [email protected] | ||
D: Add support for an OpenMP backend. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// -*- C++ -*- | ||
// -*-===----------------------------------------------------------------------===// | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_H | ||
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_H | ||
|
||
#include <cstddef> | ||
|
||
#include "util.h" | ||
|
||
namespace __pstl | ||
{ | ||
namespace __omp_backend | ||
{ | ||
|
||
template <class _Index, class _Fp> | ||
void | ||
__parallel_for_body(_Index __first, _Index __last, _Fp __f) | ||
{ | ||
// initial partition of the iteration space into chunks | ||
auto __policy = __omp_backend::__chunk_partitioner(__first, __last); | ||
|
||
// To avoid over-subscription we use taskloop for the nested parallelism | ||
_PSTL_PRAGMA(omp taskloop untied mergeable) | ||
for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk) | ||
{ | ||
__omp_backend::__process_chunk(__policy, __first, __chunk, __f); | ||
} | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// Notation: | ||
// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last) | ||
//------------------------------------------------------------------------ | ||
|
||
template <class _ExecutionPolicy, class _Index, class _Fp> | ||
void | ||
__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) | ||
{ | ||
if (omp_in_parallel()) | ||
{ | ||
// we don't create a nested parallel region in an existing parallel | ||
// region: just create tasks | ||
__pstl::__omp_backend::__parallel_for_body(__first, __last, __f); | ||
} | ||
else | ||
{ | ||
// in any case (nested or non-nested) one parallel region is created and | ||
// only one thread creates a set of tasks | ||
_PSTL_PRAGMA(omp parallel) | ||
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); } | ||
} | ||
} | ||
|
||
} // namespace __omp_backend | ||
} // namespace __pstl | ||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// -*- C++ -*- | ||
// -*-===----------------------------------------------------------------------===// | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H | ||
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H | ||
|
||
#include "util.h" | ||
|
||
namespace __pstl | ||
{ | ||
namespace __omp_backend | ||
{ | ||
|
||
template <class _ForwardIterator, class _Fp> | ||
void | ||
__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f) | ||
{ | ||
using DifferenceType = typename std::iterator_traits<_ForwardIterator>::difference_type; | ||
// TODO: Think of an approach to remove the std::distance call | ||
auto __size = std::distance(__first, __last); | ||
|
||
_PSTL_PRAGMA(omp taskloop untied mergeable) | ||
for (DifferenceType __index = 0; __index < __size; ++__index) | ||
{ | ||
// TODO: Think of an approach to remove the increment here each time. | ||
auto __iter = std::next(__first, __index); | ||
__f(*__iter); | ||
} | ||
} | ||
|
||
template <class _ExecutionPolicy, class _ForwardIterator, class _Fp> | ||
void | ||
__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) | ||
{ | ||
if (omp_in_parallel()) | ||
{ | ||
// we don't create a nested parallel region in an existing parallel | ||
// region: just create tasks | ||
__pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); | ||
} | ||
else | ||
{ | ||
// in any case (nested or non-nested) one parallel region is created and | ||
// only one thread creates a set of tasks | ||
_PSTL_PRAGMA(omp parallel) | ||
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); } | ||
} | ||
} | ||
|
||
} // namespace __omp_backend | ||
} // namespace __pstl | ||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// -*- C++ -*- | ||
// -*-===----------------------------------------------------------------------===// | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H | ||
#define _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H | ||
|
||
#include "util.h" | ||
|
||
namespace __pstl | ||
{ | ||
namespace __omp_backend | ||
{ | ||
|
||
template <typename _F1, typename _F2> | ||
void | ||
__parallel_invoke_body(_F1&& __f1, _F2&& __f2) | ||
{ | ||
_PSTL_PRAGMA(omp taskgroup) | ||
{ | ||
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); } | ||
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); } | ||
} | ||
} | ||
|
||
template <class _ExecutionPolicy, typename _F1, typename _F2> | ||
void | ||
__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) | ||
{ | ||
if (omp_in_parallel()) | ||
{ | ||
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); | ||
} | ||
else | ||
{ | ||
_PSTL_PRAGMA(omp parallel) | ||
_PSTL_PRAGMA(omp single nowait) | ||
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); | ||
} | ||
} | ||
|
||
} // namespace __omp_backend | ||
} // namespace __pstl | ||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// -*- C++ -*- | ||
// -*-===----------------------------------------------------------------------===// | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H | ||
#define _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H | ||
|
||
#include "util.h" | ||
|
||
namespace __pstl | ||
{ | ||
namespace __omp_backend | ||
{ | ||
|
||
template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3, | ||
typename _Compare, typename _LeafMerge> | ||
void | ||
__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs, | ||
_RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, | ||
_RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) | ||
{ | ||
|
||
if (__size_x + __size_y <= __omp_backend::__default_chunk_size) | ||
{ | ||
__leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp); | ||
return; | ||
} | ||
|
||
_RandomAccessIterator1 __xm; | ||
_RandomAccessIterator2 __ym; | ||
|
||
if (__size_x < __size_y) | ||
{ | ||
__ym = __ys + (__size_y / 2); | ||
__xm = std::upper_bound(__xs, __xe, *__ym, __comp); | ||
} | ||
else | ||
{ | ||
__xm = __xs + (__size_x / 2); | ||
__ym = std::lower_bound(__ys, __ye, *__xm, __comp); | ||
} | ||
|
||
auto __zm = __zs + (__xm - __xs) + (__ym - __ys); | ||
|
||
_PSTL_PRAGMA(omp task untied mergeable default(none) | ||
firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge)) | ||
__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge); | ||
|
||
_PSTL_PRAGMA(omp task untied mergeable default(none) | ||
firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge)) | ||
__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge); | ||
|
||
_PSTL_PRAGMA(omp taskwait) | ||
} | ||
|
||
template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2, | ||
typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge> | ||
void | ||
__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, | ||
_RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, | ||
_LeafMerge __leaf_merge) | ||
|
||
{ | ||
std::size_t __size_x = __xe - __xs; | ||
std::size_t __size_y = __ye - __ys; | ||
|
||
/* | ||
* Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the | ||
* larger range as the search range. | ||
*/ | ||
|
||
if (omp_in_parallel()) | ||
{ | ||
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge); | ||
} | ||
else | ||
{ | ||
_PSTL_PRAGMA(omp parallel) | ||
{ | ||
_PSTL_PRAGMA(omp single nowait) | ||
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge); | ||
} | ||
} | ||
} | ||
|
||
} // namespace __omp_backend | ||
} // namespace __pstl | ||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// -*- C++ -*- | ||
// -*-===----------------------------------------------------------------------===// | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H | ||
#define _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H | ||
|
||
#include "util.h" | ||
|
||
namespace __pstl | ||
{ | ||
namespace __omp_backend | ||
{ | ||
|
||
template <class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction> | ||
_Value | ||
__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, | ||
_RealBody __real_body, _Reduction __reduce) | ||
{ | ||
auto __middle = __first + ((__last - __first) / 2); | ||
_Value __v1(__identity), __v2(__identity); | ||
__parallel_invoke_body( | ||
[&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); }, | ||
[&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); }); | ||
|
||
return __reduce(__v1, __v2); | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// Notation: | ||
// r(i,j,init) returns reduction of init with reduction over [i,j) | ||
// c(x,y) combines values x and y that were the result of r | ||
//------------------------------------------------------------------------ | ||
|
||
template <class _ExecutionPolicy, class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction> | ||
_Value | ||
__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, | ||
_RealBody __real_body, _Reduction __reduction) | ||
{ | ||
// We don't create a nested parallel region in an existing parallel region: | ||
// just create tasks. | ||
if (omp_in_parallel()) | ||
{ | ||
return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); | ||
} | ||
|
||
// In any case (nested or non-nested) one parallel region is created and only | ||
// one thread creates a set of tasks. | ||
_Value __res = __identity; | ||
|
||
_PSTL_PRAGMA(omp parallel) | ||
_PSTL_PRAGMA(omp single nowait) | ||
{ | ||
__res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction); | ||
} | ||
|
||
return __res; | ||
} | ||
|
||
} // namespace __omp_backend | ||
} // namespace __pstl | ||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H |
Oops, something went wrong.