Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions cmake/detray-compiler-options-cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@ if(PROJECT_IS_TOP_LEVEL)
detray_add_flag( CMAKE_CUDA_FLAGS "-Xcompiler /Zc:__cplusplus" )
endif()

# Set the CUDA architecture to build code for.
set(CMAKE_CUDA_ARCHITECTURES
"52"
CACHE STRING
"CUDA architectures to build device code for"
)

if("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
# Allow to use functions in device code that are constexpr, even if they are
# not marked with __device__.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "detray/materials/predefined_materials.hpp"
#include "detray/utils/log.hpp"
#include "detray/utils/ranges.hpp"
#include "detray/utils/type_registry.hpp"

// System include(s)
#include <sstream>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class concentric_cylinder2D {
const scalar_t tol = std::numeric_limits<scalar_t>::epsilon(),
const scalar_t /*edge_tol*/ = 0.f) const {

return (bounds[e_lower_z] - tol <= loc_p[1] &&
return (bounds[e_lower_z] <= loc_p[1] + tol &&
loc_p[1] <= bounds[e_upper_z] + tol);
}
/// @}
Expand Down
2 changes: 1 addition & 1 deletion core/include/detray/propagator/rk_stepper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class rk_stepper final
scalar_type m_next_step_size{0.f};

/// Magnetic field view
const magnetic_field_t m_magnetic_field;
magnetic_field_t m_magnetic_field;
};

/// Take a step, using an adaptive Runge-Kutta algorithm.
Expand Down
10 changes: 10 additions & 0 deletions tests/benchmarks/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,13 @@ foreach(algebra ${algebra_plugins})
PRIVATE "-march=native" "-ftree-vectorize"
)
endforeach()

detray_add_executable(cuda_propagation
"propagation_new.cpp"
LINK_LIBRARIES detray::benchmark_cuda_array detray::core_array vecmem::cuda detray::test_common
)

target_compile_options(
detray_cuda_propagation
PRIVATE "-march=native" "-ftree-vectorize"
)
136 changes: 136 additions & 0 deletions tests/benchmarks/cuda/propagation_new.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/** Detray library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

// Project include(s)
#include "detray/navigation/navigator.hpp"
#include "detray/propagator/actors.hpp"
#include "detray/propagator/rk_stepper.hpp"
#include "detray/tracks/tracks.hpp"

// Detray benchmark include(s)
#include "detray/benchmarks/device/cuda/propagator.hpp"
#include "detray/benchmarks/propagation_benchmark_utils.hpp"
#include "detray/benchmarks/types.hpp"

// Detray test include(s)
#include "detray/test/common/bfield.hpp"
#include "detray/test/common/build_toy_detector.hpp"
#include "detray/test/common/track_generators.hpp"

// Vecmem include(s)
#include <vecmem/memory/cuda/device_memory_resource.hpp>
#include <vecmem/memory/cuda/host_memory_resource.hpp>
#include <vecmem/memory/host_memory_resource.hpp>

// System include(s)
#include <chrono>
#include <ctime>
#include <iostream>
#include <ratio>
#include <string>

using namespace detray;

int main(int argc, char** argv) {

using metadata_t = benchmarks::toy_metadata;
using toy_detector_t = detector<metadata_t>;
using algebra_t = typename toy_detector_t::algebra_type;
using scalar = dscalar<algebra_t>;
using vector3 = dvector3D<algebra_t>;

using free_track_parameters_t = free_track_parameters<algebra_t>;
using uniform_gen_t =
detail::random_numbers<scalar, std::uniform_real_distribution<scalar>>;
using track_generator_t =
random_track_generator<free_track_parameters_t, uniform_gen_t>;
using field_bknd_t = bfield::const_bknd_t<benchmarks::scalar>;

// vecmem::host_memory_resource host_mr;
vecmem::cuda::host_memory_resource host_mr; //< pinned memory
vecmem::cuda::device_memory_resource dev_mr;

//
// Configuration
//

std::size_t n_tracks{262144u};
if (argc > 1) {
n_tracks = static_cast<std::size_t>(atoi(argv[1]));
}

// Constant magnetic field
vector3 B{0.f, 0.f, 2.f * unit<scalar>::T};

// Configure toy detector
toy_det_config<scalar> toy_cfg{};
toy_cfg.use_material_maps(false).n_brl_layers(4u).n_edc_layers(7u);

std::cout << toy_cfg << std::endl;

// Configure propagation
propagation::config prop_cfg{};
prop_cfg.navigation.search_window = {3u, 3u};

std::cout << prop_cfg << std::endl;

//
// Prepare data
//
// Generate track sample for strong scaling
track_generator_t::configuration trk_cfg{};
trk_cfg.n_tracks(n_tracks);
trk_cfg.seed(detail::random_numbers<scalar>::default_seed());

std::cout << trk_cfg << std::endl;

track_generator_t trk_gen{trk_cfg};

dvector<free_track_parameters_t> single_sample =
detray::benchmarks::generate_tracks(&host_mr, trk_gen, true);

const auto [toy_det, names] =
build_toy_detector<algebra_t>(host_mr, toy_cfg);

auto bfield = create_const_field<scalar>(B);

pointwise_material_interactor<algebra_t>::state interactor_state{};
parameter_resetter<algebra_t>::state resetter_state{};

auto actor_states =
detail::make_tuple<dtuple>(interactor_state, resetter_state);

//
// Register benchmarks
//
std::cout << "\n----------------------\n"
<< "Propagation Test\n"
<< "----------------------\n\n";

using navigator_t = navigator_type<metadata_t>;
using stepper_t = stepper_type<metadata_t, field_bknd_t>;
using actor_chain_t = default_chain<algebra_t>;

prop_cfg.stepping.do_covariance_transport = true;
cuda_propagation<navigator_t, stepper_t, actor_chain_t> propagator{
prop_cfg};

std::chrono::high_resolution_clock::time_point t1 =
std::chrono::high_resolution_clock::now();
propagator(&dev_mr, &toy_det, &bfield, &single_sample, &actor_states);
std::chrono::high_resolution_clock::time_point t2 =
std::chrono::high_resolution_clock::now();

const auto total_time =
std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
const double total_time_ms{total_time.count() * 1000.};

// Assumption: 1 event = 3000 truth tracks + 2 seeds per track
std::cout << "It took: " << total_time_ms << "ms ("
<< total_time_ms / (static_cast<double>(n_tracks) / 3000.)
<< " ms/evt)" << std::endl;
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ foreach(algebra ${algebra_plugins})
STATIC
"propagation_benchmark.hpp"
"propagation_benchmark.cu"
"propagator.hpp"
"propagator.cu"
)

add_library(
Expand All @@ -40,9 +42,15 @@ foreach(algebra ${algebra_plugins})
target_link_libraries(
detray_benchmark_cuda_${algebra}
PUBLIC
CUDA::cudart
vecmem::cuda
detray::benchmarks
detray::test_common
detray::core_${algebra}
)

set_property(
TARGET detray_benchmark_cuda_${algebra}
PROPERTY CUDA_ARCHITECTURES 75
)
endforeach()
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ template <typename propagator_t>
void release_actor_states(
typename propagator_t::actor_chain_type::state_tuple *);

/// Device Propagation becnhmark
/// Device Propagation benchmark
template <typename propagator_t, typename bfield_bknd_t,
detray::benchmarks::propagation_opt kOPT =
detray::benchmarks::propagation_opt::e_unsync>
Expand Down
Loading
Loading