Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions benchmark/batched/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
cmake_minimum_required(VERSION 3.16.0)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

set(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR})

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

string(
APPEND
CMAKE_CXX_FLAGS_DEBUG
" -DCLUE_DEBUG -D_GLIBCXX_ASSERTIONS -O0 -Wall -Wextra -Wpedantic -Wshadow -Wimplicit-fallthrough -Wextra-semi -Wold-style-cast -g -pg -fsanitize=address"
)
set(CMAKE_CXX_FLAGS_RELEASE
" -O2 -funroll-loops -funsafe-math-optimizations -ftree-vectorize -march=native"
)

find_package(Boost 1.75.0 REQUIRED)
find_package(benchmark REQUIRED)

find_package(alpaka)
if(NOT alpaka_FOUND)
include(FetchContent)
FetchContent_Declare(
alpaka
URL https://github.com/alpaka-group/alpaka/archive/refs/tags/1.2.0.tar.gz)

FetchContent_MakeAvailable(alpaka)
endif()

add_subdirectory(cpu)

include(CheckLanguage)
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
add_subdirectory(cuda)
endif()

check_language(HIP)
if(CMAKE_HIP_COMPILER)
add_subdirectory(hip)
endif()

set(_sycl_search_dirs ${SYCL_ROOT_DIR} /usr/lib /usr/local/lib
/opt/intel/oneapi/compiler/latest/linux)
find_program(
SYCL_COMPILER
NAMES icpx
HINTS ${_sycl_search_dirs}
PATH_SUFFIXES bin)
find_path(
SYCL_INCLUDE_DIR
NAMES sycl/sycl.hpp
HINTS ${_sycl_search_dirs}
PATH_SUFFIXES include)
find_path(
SYCL_LIB_DIR
NAMES libsycl.so
HINTS ${_sycl_search_dirs}
PATH_SUFFIXES lib)
find_package(oneDPL)

if(oneDPL_FOUND
AND SYCL_COMPILER
AND SYCL_INCLUDE_DIR
AND SYCL_LIB_DIR)
add_subdirectory(sycl)
endif()
48 changes: 48 additions & 0 deletions benchmark/batched/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
add_executable(serial.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(
serial.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(serial.out PRIVATE alpaka::alpaka Boost::boost
benchmark::benchmark)
target_compile_definitions(
serial.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)

add_executable(threads.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(
threads.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(threads.out PRIVATE alpaka::alpaka Boost::boost
benchmark::benchmark)
target_compile_definitions(
threads.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)

find_package(TBB)
if(TBB_FOUND)
add_executable(tbb.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(
tbb.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(tbb.out PRIVATE alpaka::alpaka Boost::boost TBB::tbb
benchmark::benchmark)
target_compile_definitions(
tbb.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)
endif()

find_package(OpenMP)
if(OpenMP_CXX_FOUND)
add_executable(openmp.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(
openmp.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(
openmp.out PRIVATE alpaka::alpaka Boost::boost OpenMP::OpenMP_CXX
benchmark::benchmark)
target_compile_definitions(
openmp.out PRIVATE ALPAKA_HOST_ONLY ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)
set_target_properties(openmp.out PROPERTIES RUNTIME_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR})
endif()
20 changes: 20 additions & 0 deletions benchmark/batched/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
enable_language(CUDA)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_COMPILER})

if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

set_source_files_properties(${CMAKE_SOURCE_DIR}/main.cpp PROPERTIES LANGUAGE
CUDA)
add_executable(cuda.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(cuda.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(cuda.out PRIVATE alpaka::alpaka Boost::boost
benchmark::benchmark)
target_compile_definitions(cuda.out PRIVATE ALPAKA_ACC_GPU_CUDA_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)
target_compile_options(cuda.out PRIVATE --expt-relaxed-constexpr)
set_target_properties(cuda.out PROPERTIES CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "75;80;90")
12 changes: 12 additions & 0 deletions benchmark/batched/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
enable_language(HIP)
find_package(HIP)

set_source_files_properties(${CMAKE_SOURCE_DIR}/main.cpp PROPERTIES LANGUAGE
HIP)
add_executable(hip.out ${CMAKE_SOURCE_DIR}/main.cpp)
target_include_directories(hip.out PRIVATE ${CMAKE_SOURCE_DIR}/../../include
${CMAKE_SOURCE_DIR}/../../benchmark)
target_link_libraries(hip.out PRIVATE alpaka::alpaka Boost::boost
benchmark::benchmark)
target_compile_definitions(hip.out PRIVATE ALPAKA_ACC_GPU_HIP_ENABLED
CLUE_ENABLE_CACHING_ALLOCATOR)
50 changes: 50 additions & 0 deletions benchmark/batched/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

#include "CLUEstering/CLUEstering.hpp"
#include "utils/generation.hpp"
#include <benchmark/benchmark.h>

#include <algorithm>
#include <cstddef>
#include <iterator>
#include <ranges>
#include <vector>

static void BM_SingleEvents(benchmark::State& state) {
auto queue = clue::get_queue(0u);

std::vector<clue::PointsHost<2>> host_points;
std::ranges::transform(std::views::iota(0u) | std::views::take(1000),
std::back_inserter(host_points),
[&](const auto i) {
return clue::read_csv<2>(
queue, "../../data/small_event_" + std::to_string(i) + ".csv");
});
clue::PointsDevice<2> d_points(queue, host_points[0].size());
const auto dc = 1.5f, rhoc = 10.f, outlier = 1.5f;

for (auto _ : state) {
for (auto& h_points : host_points) {
clue::Clusterer<2> algo(queue, dc, rhoc, outlier);
algo.make_clusters(queue, h_points, d_points);
}
}
}

static void BM_Batched(benchmark::State& state) {
auto queue = clue::get_queue(0u);

clue::PointsHost<2> h_points = clue::read_csv<2>(queue, "../../data/small_events_batch.csv");
const size_t n_points = h_points.size();
clue::PointsDevice<2> d_points(queue, n_points);
const auto dc = 1.5f, rhoc = 10.f, outlier = 1.5f;
std::vector<std::size_t> batch_event_sizes(1000, n_points / 1000);

for (auto _ : state) {
clue::Clusterer<2> algo(queue, dc, rhoc, outlier);
algo.make_clusters(queue, h_points, d_points, batch_event_sizes);
}
}

BENCHMARK(BM_SingleEvents)->Iterations(100);
BENCHMARK(BM_Batched)->Iterations(100);
BENCHMARK_MAIN();
Binary file added data/batched_data.tar
Binary file not shown.
Loading
Loading