Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ if(BUILD_CUDA AND BUILD_TBB)
target_link_libraries(exec_reco_stdexec PRIVATE CoroutineTests CUDA::cudart TBB::tbb STDEXEC::stdexec)
add_executable(exec_delegate_stdexec exec_delegate.cpp)
target_link_libraries(exec_delegate_stdexec PRIVATE CoroutineTests CUDA::cudart TBB::tbb STDEXEC::stdexec)
add_executable(exec_event_poll_stdexec exec_event_poll.cpp)
target_link_libraries(exec_event_poll_stdexec PRIVATE CoroutineTests CUDA::cudart TBB::tbb STDEXEC::stdexec)
endif()
if(BUILD_CAPY)
add_executable(capy_reco capy_reco.cpp)
Expand Down
2 changes: 1 addition & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,6 @@ These examples are a modification of [reconstruction examples](#reconstruction)

## Event poll

Link: [alien_event_poll.cpp](alien_event_poll.cpp), [capy_event_poll.cpp](capy_event_poll.cpp)
Link: [alien_event_poll.cpp](alien_event_poll.cpp), [exec_event_poll.cpp](exec_event_poll.cpp), [capy_event_poll.cpp](capy_event_poll.cpp)

These examples are a variant of [delegate examples](#delegate) in which awaiting completion of CUDA operations is done by repeatedly querying the event state instead of using a callback as in earlier examples. All the queries are executed by a specific thread.
13 changes: 7 additions & 6 deletions examples/alien_event_poll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ struct Retry {
void await_resume() const noexcept {}
};

tool::Task<void> poll(cudaEvent_t event) {
tool::Task<void> poll(cudaEvent_t event, std::string_view parent) {
auto status = cudaSuccess;
log() << "Polling for event completion..." << std::endl;
log(parent) << "Polling for event completion..." << std::endl;
while ((status = cudaEventQuery(event)) == cudaErrorNotReady) {
log() << "Event not ready, retrying..." << std::endl;
log(parent) << "Event not ready, retrying..." << std::endl;
co_await Retry{};
}
ERROR_CHECK_CUDA(status);
log(parent) << "Event completed successfully" << std::endl;
}

subtool::Task<DeviceBuffer<int>> clusterization(
Expand All @@ -83,7 +84,7 @@ subtool::Task<DeviceBuffer<int>> clusterization(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await schedule_on(delegation_scheduler, poll(event));
co_await schedule_on(delegation_scheduler, poll(event, self));

auto nClusters = 0;
for (auto v : h_cells)
Expand Down Expand Up @@ -136,7 +137,7 @@ subtool::Task<DeviceBuffer<int>> seeding(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await schedule_on(delegation_scheduler, poll(event));
co_await schedule_on(delegation_scheduler, poll(event, self));

int nSeeds = 0;
for (auto v : h_clusters)
Expand Down Expand Up @@ -200,7 +201,7 @@ tool::Task<tool::StatusCode> reconstruct(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await schedule_on(delegation_scheduler, poll(event));
co_await schedule_on(delegation_scheduler, poll(event, self));

log(self) << "Finishing reconstruction" << std::endl;
co_return tool::StatusCode::SUCCESS;
Expand Down
16 changes: 10 additions & 6 deletions examples/capy_event_poll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,15 @@ struct Retry {
void await_resume() const noexcept {}
};

boost::capy::task<void> poll(cudaEvent_t event) {
boost::capy::task<void> poll(cudaEvent_t event, std::string_view parent) {
auto status = cudaSuccess;
log() << "Polling for event completion..." << std::endl;
log(parent) << "Polling for event completion..." << std::endl;
while ((status = cudaEventQuery(event)) == cudaErrorNotReady) {
log() << "Event not ready, retrying..." << std::endl;
log(parent) << "Event not ready, retrying..." << std::endl;
co_await Retry{};
}
ERROR_CHECK_CUDA(status);
log(parent) << "Event completed successfully" << std::endl;
}

boost::capy::task<DeviceBuffer<int>> clusterization(
Expand All @@ -81,7 +82,8 @@ boost::capy::task<DeviceBuffer<int>> clusterization(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await boost::capy::run(delegation_thread.get_executor())(poll(event));
co_await boost::capy::run(delegation_thread.get_executor())(
poll(event, self));

auto nClusters = 0;
for (auto v : h_cells)
Expand Down Expand Up @@ -129,7 +131,8 @@ boost::capy::task<DeviceBuffer<int>> seeding(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await boost::capy::run(delegation_thread.get_executor())(poll(event));
co_await boost::capy::run(delegation_thread.get_executor())(
poll(event, self));

int nSeeds = 0;
for (auto v : h_clusters)
Expand Down Expand Up @@ -188,7 +191,8 @@ boost::capy::task<tools::StatusCode> reconstruct(
ERROR_CHECK_CUDA(cudaEventRecord(event, stream));
}));

co_await boost::capy::run(delegation_thread.get_executor())(poll(event));
co_await boost::capy::run(delegation_thread.get_executor())(
poll(event, self));

log(self) << "Finishing reconstruction" << std::endl;
co_return tools::StatusCode::SUCCESS;
Expand Down
Loading