Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
64c32c7
cleaned up diff
rohanchanani Jan 9, 2026
45d9973
Updated api
rohanchanani Jan 21, 2026
fd2fe62
API that builds
rohanchanani Jan 29, 2026
0987dc8
Finished image API
rohanchanani Feb 2, 2026
460710d
builds with new APIs (ops themselves are slightly broken)
rohanchanani Feb 2, 2026
d7e9e47
Added ifdef REALM_USE_CUDA guards to gpu deppart
rohanchanani Feb 3, 2026
d82566d
renamed suggested to required and provided target proc instead of mem…
rohanchanani Feb 9, 2026
0d92106
deleted default alignment
rohanchanani Feb 10, 2026
59ad878
removed ft from byfield estimate template
rohanchanani Feb 11, 2026
b2f64a9
renamed gpu deppart requirement functions
rohanchanani Feb 12, 2026
9f7be25
Added default initializations to DeppartBufferRequirements
rohanchanani Feb 12, 2026
a72be3e
updated 1d image range
rohanchanani Feb 18, 2026
c9325ae
working multidimensional, no fixed buffer
rohanchanani Feb 19, 2026
761cd1b
working multidimensional
rohanchanani Feb 19, 2026
c05776f
byfield tiled
rohanchanani Feb 20, 2026
2182a04
Added host fallback
rohanchanani Feb 23, 2026
1fc6368
benchmarks done for byfield and image
rohanchanani Feb 23, 2026
3434f6a
implemented cpu bvh
rohanchanani Mar 9, 2026
7a0c30c
preparing to run on perlmutter
rohanchanani Mar 11, 2026
83cb1d6
trying full benchmark
rohanchanani Mar 11, 2026
a55e5c6
bumped upper bounds
rohanchanani Mar 11, 2026
669b69a
fixed construct input rectlist
rohanchanani Mar 11, 2026
17003b1
fixed overflow
rohanchanani Mar 11, 2026
dc8d574
fixed overflow
rohanchanani Mar 11, 2026
0e836f0
removed prints
rohanchanani Mar 11, 2026
27771ca
picked better host memories
rohanchanani Mar 12, 2026
07e354a
for flecsii
rohanchanani Mar 19, 2026
15628d9
Export CPU_BVH for shared builds
rohanchanani Mar 24, 2026
2301eb2
Restore feature-gated source selection
rohanchanani Mar 24, 2026
d63ddee
Merge remote-tracking branch 'upstream/main' into review-tiling
rohanchanani Mar 25, 2026
04df586
deppart: add pinned host pool and NVTX tracing
rohanchanani Mar 25, 2026
de04613
Revert "deppart: add pinned host pool and NVTX tracing"
rohanchanani Apr 8, 2026
be8544f
added .codex to gitignore
rohanchanani Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ install/
.idea/
.vscode/
.cursor/
.codex

# clangd LSP cache
.cache/
Expand Down
20 changes: 20 additions & 0 deletions cmake/deppart_tmpl.cu.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright 2025 Stanford University, NVIDIA Corporation
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#cmakedefine INST_N1 @INST_N1@
#cmakedefine INST_N2 @INST_N2@
#include "@SRCFILE@_gpu_tmpl.cu"
18 changes: 15 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ set(REALM_SOURCES
nodeset.cc
operation.cc
proc_impl.cc
realm_assert.cc
repl_heap.cc
rsrv_impl.cc
runtime_impl.cc
Expand All @@ -64,6 +63,7 @@ set(REALM_SOURCES
deppart/partitions.cc
deppart/setops.cc
deppart/sparsity_impl.cc
deppart/untemplated_gpu_kernels.cu
numa/numa_module.cc
numa/numasysif.cc
procset/procset_module.cc
Expand Down Expand Up @@ -120,7 +120,7 @@ if(REALM_USE_UCX)
endif()

if(REALM_USE_GASNETEX)
if (NOT REALM_ENABLE_GASNETEX_WRAPPER)
if(NOT REALM_ENABLE_GASNETEX_WRAPPER)
list(APPEND REALM_SOURCES gasnet1/gasnet1_module.cc gasnet1/gasnetmsg.cc)
endif()
list(APPEND REALM_SOURCES gasnetex/gasnetex_module.cc gasnetex/gasnetex_internal.cc)
Expand All @@ -145,7 +145,7 @@ configure_file(
@ONLY
)

# generate per-dimension object files for deppart stuff
# Generate per-dimension object files for CPU deppart.
foreach(INST_N1 RANGE 1 ${REALM_MAX_DIM})
foreach(INST_N2 RANGE 1 ${REALM_MAX_DIM})
foreach(SRCFILE realm/deppart/image realm/deppart/preimage realm/deppart/byfield)
Expand All @@ -157,6 +157,18 @@ foreach(INST_N1 RANGE 1 ${REALM_MAX_DIM})
endforeach()
endforeach()

# Generate per-dimension object files for GPU deppart.
foreach(INST_N1 RANGE 1 ${REALM_MAX_DIM})
foreach(INST_N2 RANGE 1 ${REALM_MAX_DIM})
foreach(SRCFILE realm/deppart/byfield realm/deppart/image realm/deppart/preimage)
set(_result_file "${CMAKE_CURRENT_BINARY_DIR}/${SRCFILE}_gpu_${INST_N1}_${INST_N2}.cu")
# use cmake's configure_file for a portable way of creating wrapper source files
configure_file("${PROJECT_SOURCE_DIR}/cmake/deppart_tmpl.cu.in" "${_result_file}")
list(APPEND REALM_SOURCES "${_result_file}")
endforeach()
endforeach()
endforeach()

set(REALM_SOURCES
${REALM_SOURCES}
PARENT_SCOPE
Expand Down
3 changes: 3 additions & 0 deletions src/realm/cuda/cuda_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ namespace Realm {
get_null_task_stream(void) const; // needed by librealm_kokkos.so
GPUStream *get_next_task_stream(bool create = false);
GPUStream *get_next_d2d_stream();
GPUStream *get_deppart_stream() const;

void launch_batch_affine_fill_kernel(void *fill_info, size_t dim, size_t elemSize,
size_t volume, GPUStream *stream);
Expand Down Expand Up @@ -489,6 +490,8 @@ namespace Realm {
GPUStream *host_to_device_stream = nullptr;
GPUStream *device_to_host_stream = nullptr;
GPUStream *device_to_device_stream = nullptr;
GPUStream *deppart_stream = nullptr;

std::vector<GPUStream *> device_to_device_streams;
std::vector<GPUStream *> peer_to_peer_streams; // indexed by target
std::vector<GPUStream *> task_streams;
Expand Down
7 changes: 7 additions & 0 deletions src/realm/cuda/cuda_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,11 @@ namespace Realm {
return device_to_device_streams[d2d_stream_index];
}

GPUStream *GPU::get_deppart_stream() const
{
return deppart_stream;
}

static void launch_kernel(const Realm::Cuda::GPU::GPUFuncInfo &func_info,
void *params, size_t num_elems, GPUStream *stream)
{
Expand Down Expand Up @@ -2040,6 +2045,7 @@ namespace Realm {

host_to_device_stream = new GPUStream(this, worker);
device_to_host_stream = new GPUStream(this, worker);
deppart_stream = new GPUStream(this, worker);

CUdevice dev;
int numSMs;
Expand Down Expand Up @@ -2164,6 +2170,7 @@ namespace Realm {
// destroy streams
delete host_to_device_stream;
delete device_to_host_stream;
delete deppart_stream;

delete_container_contents(device_to_device_streams);

Expand Down
Loading
Loading