Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
cd5065e
initial commit for launch loop optimization
artv3 Nov 25, 2025
484ff1a
add structs to store gpu thread/block info in launch ctx
artv3 Nov 25, 2025
18f332b
add cuda variant and add build guards for cpu
artv3 Dec 2, 2025
21f6184
Merge branch 'develop' into artv3/launch-loop-opt
artv3 Dec 2, 2025
73f224a
rework to support dim3 copy in ctx
artv3 Dec 11, 2025
8a02fee
Merge branch 'artv3/launch-loop-opt' of https://github.com/LLNL/RAJA …
artv3 Dec 11, 2025
1fbe50b
minor clean up pass
artv3 Dec 11, 2025
672889e
make format
artv3 Dec 11, 2025
5908a20
Update include/RAJA/pattern/launch/launch_core.hpp
artv3 Dec 11, 2025
316e019
Merge branch 'develop' into artv3/launch-loop-opt
rhornung67 Dec 15, 2025
4d9f800
clean up pass
artv3 Dec 18, 2025
d9ce271
update with develop and fix merge conflicts
artv3 Dec 18, 2025
85aef5a
fix build error
artv3 Dec 18, 2025
0469302
take develop submodule
artv3 Dec 18, 2025
4a695f2
cuda backend
artv3 Dec 18, 2025
f91a498
make style
artv3 Dec 18, 2025
d21c41f
omp backend
artv3 Dec 18, 2025
40a5c1b
seq backend + make style
artv3 Dec 18, 2025
e0f4825
clean up pass
artv3 Dec 18, 2025
96e99d5
Update include/RAJA/pattern/launch/launch_context_policy.hpp
artv3 Dec 18, 2025
a9f0cca
minor clean up
artv3 Dec 18, 2025
7d4595b
minor clean up
artv3 Dec 18, 2025
c23f76f
Merge branch 'artv3/launch-loop-opt' of github.com:LLNL/RAJA into art…
artv3 Dec 18, 2025
c990a4f
revert changes to example
artv3 Dec 18, 2025
f7939fd
remove specialization from launch policy
artv3 Dec 18, 2025
c24331c
make work for function pointers
artv3 Dec 18, 2025
0518138
store dim3 based on launch context type - hip
artv3 Dec 19, 2025
d5da29a
rework omp backend
artv3 Dec 19, 2025
af88dbb
update sequential backend
artv3 Dec 19, 2025
21ad0a8
get things building for cuda -- need a good clean up pass
artv3 Dec 19, 2025
646a95b
cuda clean up pass
artv3 Dec 19, 2025
597641b
clean up ordering in hip launch
artv3 Dec 19, 2025
5403737
clean up ordering
artv3 Dec 19, 2025
e41e970
make style
artv3 Dec 19, 2025
7c95430
use constexpt for getting dim values
artv3 Dec 19, 2025
bfe72de
merge develop, fix conflict
artv3 Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions include/RAJA/pattern/launch/launch_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,28 @@ class LaunchContext

void* shared_mem_ptr;

const size_t thread_id[3];
const size_t block_dim[3];

#if defined(RAJA_ENABLE_SYCL)
mutable ::sycl::nd_item<3>* itm;
#endif

RAJA_HOST_DEVICE LaunchContext()
RAJA_HOST_DEVICE LaunchContext()
: shared_mem_offset(0),
shared_mem_ptr(nullptr)
shared_mem_ptr(nullptr),
thread_id{1, 1, 1},
block_dim{1, 1, 1}
{}

RAJA_HOST_DEVICE LaunchContext(const size_t tx, const size_t ty, const size_t tz,
const size_t bx, const size_t by, const size_t bz)
: shared_mem_offset(0),
shared_mem_ptr(nullptr),
thread_id{tx, ty, tz},
block_dim{bx, by, bz}
{}

// TODO handle alignment
template<typename T>
RAJA_HOST_DEVICE T* getSharedMemory(size_t bytes)
Expand Down
40 changes: 38 additions & 2 deletions include/RAJA/policy/hip/launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ template<typename BODY, typename ReduceParams>
__global__ void launch_new_reduce_global_fcn(const BODY body_in,
ReduceParams reduce_params)
{
LaunchContext ctx;
LaunchContext ctx(threadIdx.x, threadIdx.y, threadIdx.z,
blockDim.x, blockDim.y, blockDim.z);

using RAJA::internal::thread_privatize;
auto privatizer = thread_privatize(body_in);
Expand Down Expand Up @@ -137,7 +138,8 @@ __launch_bounds__(num_threads, 1) __global__
void launch_new_reduce_global_fcn_fixed(const BODY body_in,
ReduceParams reduce_params)
{
LaunchContext ctx;
LaunchContext ctx(threadIdx.x, threadIdx.y, threadIdx.z,
blockDim.x, blockDim.y, blockDim.z);

using RAJA::internal::thread_privatize;
auto privatizer = thread_privatize(body_in);
Expand Down Expand Up @@ -239,6 +241,40 @@ struct LaunchExecute<RAJA::policy::hip::hip_launch_t<async, nthreads>>
}
};

template<named_dim DIM>
struct hip_ctx_thread_loop;

using hip_ctx_thread_loop_x = hip_ctx_thread_loop<named_dim::x>;
using hip_ctx_thread_loop_y = hip_ctx_thread_loop<named_dim::y>;
using hip_ctx_thread_loop_z = hip_ctx_thread_loop<named_dim::z>;

template<typename SEGMENT, named_dim DIM>
struct LoopExecute<hip_ctx_thread_loop<DIM>, SEGMENT>
{

template<typename BODY>
static RAJA_INLINE RAJA_DEVICE void exec(LaunchContext const& ctx,
SEGMENT const& segment,
BODY const& body)
{

const int len = segment.end() - segment.begin();
constexpr int int_dim = static_cast<int>(DIM);

//for(int i=::RAJA::internal::HipDimHelper<DIM>::get(threadIdx);
for(int i = ctx.thread_id[int_dim];
i < len;
i+=ctx.block_dim[int_dim])
//i+=4)
{
body(*(segment.begin() + i));
}

}
};



/*
HIP generic loop implementations
*/
Expand Down
Loading