diff --git a/DataFormats/Portable/test/BuildFile.xml b/DataFormats/Portable/test/BuildFile.xml index 0cd7f68a927d1..732c24f696ff0 100644 --- a/DataFormats/Portable/test/BuildFile.xml +++ b/DataFormats/Portable/test/BuildFile.xml @@ -32,3 +32,12 @@ + + + + + + + + + diff --git a/DataFormats/Portable/test/alpaka/device_methods.dev.cc b/DataFormats/Portable/test/alpaka/device_methods.dev.cc index 48b7f90a606d6..b48565afe05f5 100644 --- a/DataFormats/Portable/test/alpaka/device_methods.dev.cc +++ b/DataFormats/Portable/test/alpaka/device_methods.dev.cc @@ -32,17 +32,27 @@ GENERATE_SOA_LAYOUT(SoAPositionTemplate, SOA_ELEMENT_METHODS( - SOA_HOST_DEVICE void normalise() { + SOA_HOST_DEVICE SOA_INLINE void normalise() { float norm_position = square_norm_position(); if (norm_position > 0.0f) { x() /= norm_position; y() /= norm_position; z() /= norm_position; - }}; + }} + + template + SOA_HOST_DEVICE SOA_INLINE void add(OtherView& otherSoA) { + x() = x() + otherSoA.x(0); + y() = y() + otherSoA.y(0); + z() = z() + otherSoA.z(0); + } + ), SOA_CONST_ELEMENT_METHODS( - SOA_HOST_DEVICE float square_norm_position() const { return sqrt(x() * x() + y() * y() + z() * z()); }; + + SOA_HOST_DEVICE SOA_INLINE float square_norm_position() const { return sqrt(x() * x() + y() * y() + z() * z()); }; + ), SOA_SCALAR(int, detectorType)) @@ -69,16 +79,34 @@ struct FillSoA { // Kernel for normalising the positions struct NormalisePositions { template - ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView positionView) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const { for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) { positionView[local_idx].normalise(); } } }; + + +// Kernel for finding the sum of first element and the current element +struct Addition { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) { + if (local_idx != 0) { + positionView[local_idx].add(positionView); + } + } + } + }; int main(int argc, char** argv) { + int i=0; + std::chrono::time_point start, end; + std::vector inner_repetitions(100); + double sum, average; + auto const& devices = cms::alpakatools::devices(); if (devices.empty()) { std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) << " backend, " @@ -87,27 +115,28 @@ int main(int argc, char** argv) { auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0u); - for (auto const& device : cms::alpakatools::devices()) { + for (auto const& device : cms::alpakatools::devices()) { std::cout << "Running on " << alpaka::getName(device) << std::endl; Queue queue(device); // common number of elements for the SoAs - const std::size_t elems = 50000000; + const std::size_t elems = parse_or_default(argc > 1 ? argv[1] : nullptr, 0); // Portable Collections PortableCollection positionCollection(elems, queue); - SoAPositionView& positionCollectionView = positionCollection.view(); + SoAPositionView positionCollectionView = positionCollection.view(); + // SoAPositionConstView positionCollectionConstView = positionCollection.const_view(); // fill up // 1) Block size: argv[1] if valid, else 64 - const std::size_t blockSize = parse_or_default(argc > 1 ? argv[1] : nullptr, 64); + const std::size_t blockSize = parse_or_default(argc > 2 ? argv[2] : nullptr, 512); // 2) Default blocks: cover all elements for the chosen block size const std::size_t defaultBlocks = cms::alpakatools::divide_up_by(elems, blockSize); // 3) Number of blocks: argv[2] if valid (>0), else defaultBlocks - std::size_t numberOfBlocks = parse_or_default(argc > 2 ? argv[2] : nullptr, defaultBlocks); + std::size_t numberOfBlocks = parse_or_default(argc > 3 ? argv[3] : nullptr, defaultBlocks); // (Optional) guard: never let it be 0 if (numberOfBlocks == 0) numberOfBlocks = defaultBlocks; @@ -117,28 +146,70 @@ int main(int argc, char** argv) { alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); alpaka::wait(queue); - auto start = std::chrono::high_resolution_clock::now(); + start = std::chrono::high_resolution_clock::now(); - // normalise - alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView); - alpaka::wait(queue); + for (int j = 0 ; j < 20 ; j++) { + // normalise + alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView); + alpaka::wait(queue); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - std::cout << "Total execution time: " << elapsed.count() << " seconds\n"; + // element addition + alpaka::exec(queue, workDiv, Addition{}, positionCollectionView); + alpaka::wait(queue); + + // fill + alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); + alpaka::wait(queue); + } + + end = std::chrono::high_resolution_clock::now(); + + for(i=0; i<101; i++) { + + start = std::chrono::high_resolution_clock::now(); + + for (int j = 0 ; j < 10 ; j++) { + // normalise + alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView); + alpaka::wait(queue); + + // element addition + alpaka::exec(queue, workDiv, Addition{}, positionCollectionView); + alpaka::wait(queue); + + // fill + alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); + alpaka::wait(queue); + } + + end = std::chrono::high_resolution_clock::now(); + + std::chrono::duration elapsed = (end - start) * 1000; + + if (i > 0) + inner_repetitions[i-1] = elapsed.count(); + } + + // Calculate the sum of all elements + sum = std::accumulate(inner_repetitions.begin(), inner_repetitions.end(), 0.0); + + // Calculate the average + average = sum / inner_repetitions.size(); + + std::cout << "Average execution time: " << average << " ms\n"; PortableHostCollection positionHostCollection(elems, queue); alpaka::memcpy(queue, positionHostCollection.buffer(), positionCollection.buffer()); alpaka::wait(queue); // check norm == 1 - const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view(); - for (size_t i = 0; i < elems; i++) { - float norm = positionViewHostCollection[i].square_norm_position(); - if (std::abs(norm - 1.0f) > 1.e-5f) { - std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl; - } - } + // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view(); + // for (size_t i = 0; i < elems; i++) { + // float norm = positionViewHostCollection[i].square_norm_position(); + // if (std::abs(norm - 1.0f) > 1.e-5f) { + // std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl; + // } + // } std::cout << "Normalisation check completed" << std::endl; diff --git a/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc b/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc new file mode 100644 index 0000000000000..945c066ad11fd --- /dev/null +++ b/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc @@ -0,0 +1,214 @@ +// Device custom methods benchmark +#include +#include +#include +#include +#include + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +// Safe parser: returns fallback if s is null, invalid, or <= 0 +static inline std::size_t parse_or_default(const char* s, std::size_t fallback) { + if (!s) return fallback; + char* end = nullptr; + errno = 0; + unsigned long v = std::strtoul(s, &end, 10); + if (errno != 0 || end == s || *end != '\0' || v == 0) return fallback; + return static_cast(v); +} + +GENERATE_SOA_LAYOUT(SoAPositionTemplate, + SOA_COLUMN(float, x), + SOA_COLUMN(float, y), + SOA_COLUMN(float, z), + + SOA_SCALAR(int, detectorType)) + +using SoAPosition = SoAPositionTemplate<>; +using SoAPositionView = SoAPosition::View; +using SoAPositionConstView = SoAPosition::ConstView; + +// Kernel for filling the SoA +struct FillSoA { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView positionView) const { + if (cms::alpakatools::once_per_grid(acc)) + positionView.detectorType() = 1; + + for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) { + positionView[local_idx].x() = static_cast(local_idx) + 1.f; + positionView[local_idx].y() = (static_cast(local_idx) + 1.f) * 2.0f; + positionView[local_idx].z() = (static_cast(local_idx) + 1.f) * 3.0f; + } + } +}; + +template +SOA_HOST_DEVICE SOA_INLINE float square_norm_position(PositionView& positionView, int local_idx) { + return sqrt(positionView[local_idx].x() * positionView[local_idx].x() + positionView[local_idx].y() * positionView[local_idx].y() + positionView[local_idx].z() * positionView[local_idx].z()); +} + +// Kernel for normalising the positions +struct NormalisePositions { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) { + float norm_position = square_norm_position(positionView, local_idx); + if (norm_position > 0.0f) { + positionView[local_idx].x() /= norm_position; + positionView[local_idx].y() /= norm_position; + positionView[local_idx].z() /= norm_position; + } + } + } + }; + + +// Kernel for finding the sum of first element and the current element +struct Addition { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const { + for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) { + if (local_idx != 0) { + positionView[local_idx].x() += positionView[0].x(); + positionView[local_idx].y() += positionView[0].y(); + positionView[local_idx].z() += positionView[0].z(); + } + } + } + }; + + +int main(int argc, char** argv) { + + int i=0; + std::chrono::time_point start, end; + std::vector inner_repetitions(100); + double sum, average; + + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) << " backend, " + "the test will be skipped." << std::endl; + } + + auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0u); + + for (auto const& device : cms::alpakatools::devices()) { + std::cout << "Running on " << alpaka::getName(device) << std::endl; + + Queue queue(device); + + // common number of elements for the SoAs + const std::size_t elems = parse_or_default(argc > 1 ? argv[1] : nullptr, 1000); + + // Portable Collections + PortableCollection positionCollection(elems, queue); + SoAPositionView positionCollectionView = positionCollection.view(); + // SoAPositionConstView positionCollectionConstView = positionCollection.const_view(); + + // fill up + // 1) Block size: argv[1] if valid, else 64 + const std::size_t blockSize = parse_or_default(argc > 2 ? argv[2] : nullptr, 512); + + // 2) Default blocks: cover all elements for the chosen block size + const std::size_t defaultBlocks = cms::alpakatools::divide_up_by(elems, blockSize); + + // 3) Number of blocks: argv[2] if valid (>0), else defaultBlocks + std::size_t numberOfBlocks = parse_or_default(argc > 3 ? argv[3] : nullptr, defaultBlocks); + + // (Optional) guard: never let it be 0 + if (numberOfBlocks == 0) numberOfBlocks = defaultBlocks; + + const auto workDiv = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + + alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); + alpaka::wait(queue); + + start = std::chrono::high_resolution_clock::now(); + + for (int j = 0 ; j < 20 ; j++) { + // normalise + alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView); + alpaka::wait(queue); + + // element addition + alpaka::exec(queue, workDiv, Addition{}, positionCollectionView); + alpaka::wait(queue); + + // fill + alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); + alpaka::wait(queue); + } + + end = std::chrono::high_resolution_clock::now(); + + for(i=0; i<101; i++) { + + start = std::chrono::high_resolution_clock::now(); + + for (int j = 0 ; j < 10 ; j++) { + // normalise + alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView); + alpaka::wait(queue); + + // element addition + alpaka::exec(queue, workDiv, Addition{}, positionCollectionView); + alpaka::wait(queue); + + // fill + alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView); + alpaka::wait(queue); + } + + end = std::chrono::high_resolution_clock::now(); + + std::chrono::duration elapsed = (end - start) * 1000; + + if (i > 0) + inner_repetitions[i-1] = elapsed.count(); + } + + // Calculate the sum of all elements + sum = std::accumulate(inner_repetitions.begin(), inner_repetitions.end(), 0.0); + + // Calculate the average + average = sum / inner_repetitions.size(); + + std::cout << "Average execution time: " << average << " ms\n"; + + PortableHostCollection positionHostCollection(elems, queue); + alpaka::memcpy(queue, positionHostCollection.buffer(), positionCollection.buffer()); + alpaka::wait(queue); + + // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view(); + // for (size_t i = 0; i < elems; i++) { + // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].x() << std::endl; + // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].y() << std::endl; + // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].z() << std::endl; + // } + + // check norm == 1 + // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view(); + // for (size_t i = 0; i < elems; i++) { + // float norm = positionViewHostCollection[i].square_norm_position(); + // if (std::abs(norm - 1.0f) > 1.e-5f) { + // std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl; + // } + // } + + std::cout << "Normalisation check completed" << std::endl; + + } + + return 0; + + } diff --git a/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png new file mode 100644 index 0000000000000..826f86d1323c1 Binary files /dev/null and b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png differ diff --git a/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png new file mode 100644 index 0000000000000..77b1778a1de4f Binary files /dev/null and b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png differ diff --git a/DataFormats/Portable/test/results/plots.py b/DataFormats/Portable/test/results/plots.py new file mode 100644 index 0000000000000..bdca5142fd4d4 --- /dev/null +++ b/DataFormats/Portable/test/results/plots.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +import sys, os, csv +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +DEFAULT_CSVS = [ + "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_cpu.csv", + "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_test_cpu.csv", +] + +# DEFAULT_CSVS = [ +# "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_gpu.csv", +# "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_test_gpu.csv", +# ] + +REQUIRED_COLS = {"element_size", "mean", "std"} + +def infer_label(path): + name = os.path.splitext(os.path.basename(path))[0] + return "Standard methods" if "test" in name else "SoA methods" + +def read_csv(path): + sizes, means, stds = [], [], [] + with open(path, newline="") as f: + r = csv.DictReader(f) + if not REQUIRED_COLS.issubset(r.fieldnames or []): + raise ValueError(f"{os.path.basename(path)} must have columns: element_size, mean, std. Found: {r.fieldnames}") + for row in r: + sizes.append(int(row["element_size"])) + means.append(float(row["mean"])) + stds.append(float(row["std"])) + # sort by element size to make lines meaningful + order = sorted(range(len(sizes)), key=lambda i: sizes[i]) + sizes = [sizes[i] for i in order] + means = [means[i] for i in order] + stds = [stds[i] for i in order] + return sizes, means, stds + +def main(): + csv_paths = sys.argv[1:] if len(sys.argv) > 1 else DEFAULT_CSVS + + # Read all datasets + datasets = [] + for p in csv_paths: + sizes, means, stds = read_csv(p) + label = os.path.splitext(os.path.basename(p))[0] # e.g., results_avg_std_cpu + datasets.append((sizes, means, stds, infer_label(p))) + + if not datasets: + print("No CSVs provided.", file=sys.stderr) + sys.exit(1) + + fig, ax = plt.subplots() + ax.set_xscale("log", base=10) + + + # distinct markers per series (matplotlib will pick colors) + markers = ["o", "s", "^", "D", "v", "P", "X", "*"] + + # Build union of xticks across all series + all_sizes = sorted({s for sizes,_,_,_ in datasets for s in sizes}) + xticks = [1] + all_sizes # keep the “fake 0” at x=1 like before + ax.set_xticks(xticks) + ax.set_xticklabels(["0"] + [str(x) for x in all_sizes]) + + # Plot all series + for idx, (sizes, means, stds, label) in enumerate(datasets): + marker = markers[idx % len(markers)] + ax.errorbar(sizes, means, yerr=stds, fmt=f"-{marker}", capsize=4, label=label) + + pad = 10 + ax.set_xlim(1, max(all_sizes) * pad) + + ax.set_xlabel("Element No") + ax.set_ylabel("Average time (ms)") + ax.set_title("Standard methods vs SoA methods in CPU") + # ax.set_title("Standard methods vs SoA methods in GPU") + ax.grid(True, which="major") + ax.minorticks_off() + ax.legend() + + fig.tight_layout() + + # Name output by joining base names + out_png = os.path.join(os.path.dirname(csv_paths[0]), "Standard_vs_SoA_methods_cpu.png") + # out_png = os.path.join(os.path.dirname(csv_paths[0]), "Standard_vs_SoA_methods_gpu.png") + fig.savefig(out_png, dpi=150) + print(f"Saved plot: {out_png}") + +if __name__ == "__main__": + main() diff --git a/DataFormats/Portable/test/results/results_cpu.csv b/DataFormats/Portable/test/results/results_cpu.csv new file mode 100644 index 0000000000000..c0b01479a13b8 --- /dev/null +++ b/DataFormats/Portable/test/results/results_cpu.csv @@ -0,0 +1,9 @@ +element_size,mean,std +10,0.001233840,0.000018737 +100,0.007388929,0.000049974 +1000,0.069053830,0.000210581 +10000,0.684512000,0.001337960 +100000,6.838830000,0.004478107 +250000,17.100290000,0.006028903 +750000,51.329260000,0.015528483 +1000000,68.438890000,0.021186497 diff --git a/DataFormats/Portable/test/results/results_gpu.csv b/DataFormats/Portable/test/results/results_gpu.csv new file mode 100644 index 0000000000000..0109a44744afe --- /dev/null +++ b/DataFormats/Portable/test/results/results_gpu.csv @@ -0,0 +1,9 @@ +element_size,mean,std +10,0.187063200,0.001209227 +100,0.187814100,0.000751258 +1000,0.200782600,0.011476074 +10000,0.212477400,0.000417979 +100000,0.244108300,0.000813546 +250000,0.284834400,0.009816557 +750000,0.442813300,0.001007789 +1000000,0.519937200,0.000539323 diff --git a/DataFormats/Portable/test/results/results_test_cpu.csv b/DataFormats/Portable/test/results/results_test_cpu.csv new file mode 100644 index 0000000000000..a9070a4029b39 --- /dev/null +++ b/DataFormats/Portable/test/results/results_test_cpu.csv @@ -0,0 +1,9 @@ +element_size,mean,std +10,0.001260796,0.000070552 +100,0.007423318,0.000058612 +1000,0.069557150,0.000703586 +10000,0.688618400,0.001922951 +100000,6.897364000,0.012733163 +250000,17.218790000,0.011957095 +750000,51.618680000,0.011555451 +1000000,68.822000000,0.040243343 diff --git a/DataFormats/Portable/test/results/results_test_gpu.csv b/DataFormats/Portable/test/results/results_test_gpu.csv new file mode 100644 index 0000000000000..ed6823dc9dde1 --- /dev/null +++ b/DataFormats/Portable/test/results/results_test_gpu.csv @@ -0,0 +1,9 @@ +element_size,mean,std +10,0.189737700,0.002318113 +100,0.188666800,0.002463134 +1000,0.191237200,0.002568878 +10000,0.191252100,0.000814894 +100000,0.221208100,0.000459619 +250000,0.289717900,0.000860913 +750000,0.444420800,0.000630217 +1000000,0.522203000,0.001118425 diff --git a/DataFormats/Portable/test/run/run.sh b/DataFormats/Portable/test/run/run.sh new file mode 100755 index 0000000000000..22870979189e7 --- /dev/null +++ b/DataFormats/Portable/test/run/run.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +BUILD_JOBS="${1:-20}" +GPU="${2:-0}" + +export CUDA_VISIBLE_DEVICES="$GPU" +export HIP_VISIBLE_DEVICES="$GPU" + +SIZE_LIST=(10 100 1000 10000 100000 250000 750000 1000000) + +CMSSW_BASE="/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5" +SRC_DIR="$CMSSW_BASE/src/DataFormats/Portable" +RUN_DIR="$CMSSW_BASE/test/el8_amd64_gcc12" +OUT_DIR="$SRC_DIR/test/results" +OUT_CSV="$OUT_DIR/results_cpu.csv" +#OUT_CSV="$OUT_DIR/results_cpu.csv" + +mkdir -p "$OUT_DIR" + +# Optional build (uncomment if you want to rebuild each time) +#cd "$SRC_DIR" && scram b -j "$BUILD_JOBS" + +# Load CMSSW runtime +cd "$CMSSW_BASE" && eval "$(scram runtime -sh)" + +# Run experiments +cd "$RUN_DIR" || exit 1 +# [[ -x ./Device_methodsCudaAsync ]] || { echo "ERROR: Device_methodsCudaAsync not found"; exit 1; } +[[ -x ./Device_methodsSerialSync ]] || { echo "ERROR: Device_methodsSerialSync not found"; exit 1; } + +echo "element_size,mean,std" > "$OUT_CSV" + +for size in "${SIZE_LIST[@]}"; do + tmp="$(mktemp)" + for i in {0..10}; do + # out="$(./Device_methodsCudaAsync "$size" 2>&1)" + out="$(./Device_methodsSerialSync "$size" 2>&1)" + val="$(grep -m1 -E 'Average execution time:' <<<"$out" | awk '{print $(NF-1)}')" + [[ -n "${val:-}" ]] && echo "$val" >> "$tmp" + done + + mean="$(tail -n +2 "$tmp" | awk '{s+=$1; n++} END{if(n) printf("%.9f", s/n); else print "NaN"}')" + std="$(tail -n +2 "$tmp" | awk '{s+=$1; ss+=$1*$1; n++} END{if(n>1) printf("%.9f", sqrt((ss - s*s/n)/(n-1))); else print "NaN"}')" + + echo "$size,$mean,$std" >> "$OUT_CSV" + rm -f "$tmp" +done + +echo "Wrote: $OUT_CSV" diff --git a/DataFormats/Portable/test/run/run_test.sh b/DataFormats/Portable/test/run/run_test.sh new file mode 100755 index 0000000000000..f1017ba659ebe --- /dev/null +++ b/DataFormats/Portable/test/run/run_test.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +set -euo pipefail + +BUILD_JOBS="${1:-20}" +GPU="${2:-0}" + +export CUDA_VISIBLE_DEVICES="$GPU" +export HIP_VISIBLE_DEVICES="$GPU" + +SIZE_LIST=(10 100 1000 10000 100000 250000 750000 1000000) + +CMSSW_BASE="/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5" +SRC_DIR="$CMSSW_BASE/src/DataFormats/Portable" +RUN_DIR="$CMSSW_BASE/test/el8_amd64_gcc12" +OUT_DIR="$SRC_DIR/test/results" +# OUT_CSV="$OUT_DIR/results_test_gpu.csv" +OUT_CSV="$OUT_DIR/results_test_cpu.csv" + +mkdir -p "$OUT_DIR" + +# Optional build (uncomment if you want to rebuild each time) +#cd "$SRC_DIR" && scram b -j "$BUILD_JOBS" + +# Load CMSSW runtime +cd "$CMSSW_BASE" && eval "$(scram runtime -sh)" + +# Run experiments +cd "$RUN_DIR" || exit 1 +# [[ -x ./Device_test_methodsCudaAsync ]] || { echo "ERROR: Device_test_methodsCudaAsync not found"; exit 1; } +[[ -x ./Device_test_methodsSerialSync ]] || { echo "ERROR: Device_test_methodsSerialSync not found"; exit 1; } + +echo "element_size,mean,std" > "$OUT_CSV" + +for size in "${SIZE_LIST[@]}"; do + tmp="$(mktemp)" + for i in {0..10}; do + # out="$(./Device_test_methodsCudaAsync "$size" 2>&1)" + out="$(./Device_test_methodsSerialSync "$size" 2>&1)" + val="$(grep -m1 -E 'Average execution time:' <<<"$out" | awk '{print $(NF-1)}')" + [[ -n "${val:-}" ]] && echo "$val" >> "$tmp" + done + + mean="$(tail -n +2 "$tmp" | awk '{s+=$1; n++} END{if(n) printf("%.9f", s/n); else print "NaN"}')" + std="$(tail -n +2 "$tmp" | awk '{s+=$1; ss+=$1*$1; n++} END{if(n>1) printf("%.9f", sqrt((ss - s*s/n)/(n-1))); else print "NaN"}')" + + echo "$size,$mean,$std" >> "$OUT_CSV" + rm -f "$tmp" +done + +echo "Wrote: $OUT_CSV"