diff --git a/DataFormats/Portable/test/BuildFile.xml b/DataFormats/Portable/test/BuildFile.xml
index 0cd7f68a927d1..732c24f696ff0 100644
--- a/DataFormats/Portable/test/BuildFile.xml
+++ b/DataFormats/Portable/test/BuildFile.xml
@@ -32,3 +32,12 @@
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/Portable/test/alpaka/device_methods.dev.cc b/DataFormats/Portable/test/alpaka/device_methods.dev.cc
index 48b7f90a606d6..b48565afe05f5 100644
--- a/DataFormats/Portable/test/alpaka/device_methods.dev.cc
+++ b/DataFormats/Portable/test/alpaka/device_methods.dev.cc
@@ -32,17 +32,27 @@ GENERATE_SOA_LAYOUT(SoAPositionTemplate,
SOA_ELEMENT_METHODS(
- SOA_HOST_DEVICE void normalise() {
+ SOA_HOST_DEVICE SOA_INLINE void normalise() {
float norm_position = square_norm_position();
if (norm_position > 0.0f) {
x() /= norm_position;
y() /= norm_position;
z() /= norm_position;
- }};
+ }}
+
+ template
+ SOA_HOST_DEVICE SOA_INLINE void add(OtherView& otherSoA) {
+ x() = x() + otherSoA.x(0);
+ y() = y() + otherSoA.y(0);
+ z() = z() + otherSoA.z(0);
+ }
+
),
SOA_CONST_ELEMENT_METHODS(
- SOA_HOST_DEVICE float square_norm_position() const { return sqrt(x() * x() + y() * y() + z() * z()); };
+
+ SOA_HOST_DEVICE SOA_INLINE float square_norm_position() const { return sqrt(x() * x() + y() * y() + z() * z()); };
+
),
SOA_SCALAR(int, detectorType))
@@ -69,16 +79,34 @@ struct FillSoA {
// Kernel for normalising the positions
struct NormalisePositions {
template
- ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView positionView) const {
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const {
for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) {
positionView[local_idx].normalise();
}
}
};
+
+
+// Kernel for finding the sum of first element and the current element
+struct Addition {
+ template
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const {
+ for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) {
+ if (local_idx != 0) {
+ positionView[local_idx].add(positionView);
+ }
+ }
+ }
+ };
int main(int argc, char** argv) {
+ int i=0;
+ std::chrono::time_point start, end;
+ std::vector inner_repetitions(100);
+ double sum, average;
+
auto const& devices = cms::alpakatools::devices();
if (devices.empty()) {
std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) << " backend, "
@@ -87,27 +115,28 @@ int main(int argc, char** argv) {
auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0u);
- for (auto const& device : cms::alpakatools::devices()) {
+ for (auto const& device : cms::alpakatools::devices()) {
std::cout << "Running on " << alpaka::getName(device) << std::endl;
Queue queue(device);
// common number of elements for the SoAs
- const std::size_t elems = 50000000;
+ const std::size_t elems = parse_or_default(argc > 1 ? argv[1] : nullptr, 0);
// Portable Collections
PortableCollection positionCollection(elems, queue);
- SoAPositionView& positionCollectionView = positionCollection.view();
+ SoAPositionView positionCollectionView = positionCollection.view();
+ // SoAPositionConstView positionCollectionConstView = positionCollection.const_view();
// fill up
// 1) Block size: argv[1] if valid, else 64
- const std::size_t blockSize = parse_or_default(argc > 1 ? argv[1] : nullptr, 64);
+ const std::size_t blockSize = parse_or_default(argc > 2 ? argv[2] : nullptr, 512);
// 2) Default blocks: cover all elements for the chosen block size
const std::size_t defaultBlocks = cms::alpakatools::divide_up_by(elems, blockSize);
// 3) Number of blocks: argv[2] if valid (>0), else defaultBlocks
- std::size_t numberOfBlocks = parse_or_default(argc > 2 ? argv[2] : nullptr, defaultBlocks);
+ std::size_t numberOfBlocks = parse_or_default(argc > 3 ? argv[3] : nullptr, defaultBlocks);
// (Optional) guard: never let it be 0
if (numberOfBlocks == 0) numberOfBlocks = defaultBlocks;
@@ -117,28 +146,70 @@ int main(int argc, char** argv) {
alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
alpaka::wait(queue);
- auto start = std::chrono::high_resolution_clock::now();
+ start = std::chrono::high_resolution_clock::now();
- // normalise
- alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView);
- alpaka::wait(queue);
+ for (int j = 0 ; j < 20 ; j++) {
+ // normalise
+ alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView);
+ alpaka::wait(queue);
- auto end = std::chrono::high_resolution_clock::now();
- std::chrono::duration elapsed = end - start;
- std::cout << "Total execution time: " << elapsed.count() << " seconds\n";
+ // element addition
+ alpaka::exec(queue, workDiv, Addition{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // fill
+ alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
+ alpaka::wait(queue);
+ }
+
+ end = std::chrono::high_resolution_clock::now();
+
+ for(i=0; i<101; i++) {
+
+ start = std::chrono::high_resolution_clock::now();
+
+ for (int j = 0 ; j < 10 ; j++) {
+ // normalise
+ alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // element addition
+ alpaka::exec(queue, workDiv, Addition{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // fill
+ alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
+ alpaka::wait(queue);
+ }
+
+ end = std::chrono::high_resolution_clock::now();
+
+ std::chrono::duration elapsed = (end - start) * 1000;
+
+ if (i > 0)
+ inner_repetitions[i-1] = elapsed.count();
+ }
+
+ // Calculate the sum of all elements
+ sum = std::accumulate(inner_repetitions.begin(), inner_repetitions.end(), 0.0);
+
+ // Calculate the average
+ average = sum / inner_repetitions.size();
+
+ std::cout << "Average execution time: " << average << " ms\n";
PortableHostCollection positionHostCollection(elems, queue);
alpaka::memcpy(queue, positionHostCollection.buffer(), positionCollection.buffer());
alpaka::wait(queue);
// check norm == 1
- const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view();
- for (size_t i = 0; i < elems; i++) {
- float norm = positionViewHostCollection[i].square_norm_position();
- if (std::abs(norm - 1.0f) > 1.e-5f) {
- std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl;
- }
- }
+ // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view();
+ // for (size_t i = 0; i < elems; i++) {
+ // float norm = positionViewHostCollection[i].square_norm_position();
+ // if (std::abs(norm - 1.0f) > 1.e-5f) {
+ // std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl;
+ // }
+ // }
std::cout << "Normalisation check completed" << std::endl;
diff --git a/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc b/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc
new file mode 100644
index 0000000000000..945c066ad11fd
--- /dev/null
+++ b/DataFormats/Portable/test/alpaka/device_test_methods.dev.cc
@@ -0,0 +1,214 @@
+// Device custom methods benchmark
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+#include "DataFormats/Portable/interface/PortableCollection.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
+
+using namespace ALPAKA_ACCELERATOR_NAMESPACE;
+
+// Safe parser: returns fallback if s is null, invalid, or <= 0
+static inline std::size_t parse_or_default(const char* s, std::size_t fallback) {
+ if (!s) return fallback;
+ char* end = nullptr;
+ errno = 0;
+ unsigned long v = std::strtoul(s, &end, 10);
+ if (errno != 0 || end == s || *end != '\0' || v == 0) return fallback;
+ return static_cast(v);
+}
+
+GENERATE_SOA_LAYOUT(SoAPositionTemplate,
+ SOA_COLUMN(float, x),
+ SOA_COLUMN(float, y),
+ SOA_COLUMN(float, z),
+
+ SOA_SCALAR(int, detectorType))
+
+using SoAPosition = SoAPositionTemplate<>;
+using SoAPositionView = SoAPosition::View;
+using SoAPositionConstView = SoAPosition::ConstView;
+
+// Kernel for filling the SoA
+struct FillSoA {
+ template
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView positionView) const {
+ if (cms::alpakatools::once_per_grid(acc))
+ positionView.detectorType() = 1;
+
+ for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) {
+ positionView[local_idx].x() = static_cast(local_idx) + 1.f;
+ positionView[local_idx].y() = (static_cast(local_idx) + 1.f) * 2.0f;
+ positionView[local_idx].z() = (static_cast(local_idx) + 1.f) * 3.0f;
+ }
+ }
+};
+
+template
+SOA_HOST_DEVICE SOA_INLINE float square_norm_position(PositionView& positionView, int local_idx) {
+ return sqrt(positionView[local_idx].x() * positionView[local_idx].x() + positionView[local_idx].y() * positionView[local_idx].y() + positionView[local_idx].z() * positionView[local_idx].z());
+}
+
+// Kernel for normalising the positions
+struct NormalisePositions {
+ template
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const {
+ for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) {
+ float norm_position = square_norm_position(positionView, local_idx);
+ if (norm_position > 0.0f) {
+ positionView[local_idx].x() /= norm_position;
+ positionView[local_idx].y() /= norm_position;
+ positionView[local_idx].z() /= norm_position;
+ }
+ }
+ }
+ };
+
+
+// Kernel for finding the sum of first element and the current element
+struct Addition {
+ template
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, PositionView& positionView) const {
+ for (auto local_idx : cms::alpakatools::uniform_elements(acc, positionView.metadata().size())) {
+ if (local_idx != 0) {
+ positionView[local_idx].x() += positionView[0].x();
+ positionView[local_idx].y() += positionView[0].y();
+ positionView[local_idx].z() += positionView[0].z();
+ }
+ }
+ }
+ };
+
+
+int main(int argc, char** argv) {
+
+ int i=0;
+ std::chrono::time_point start, end;
+ std::vector inner_repetitions(100);
+ double sum, average;
+
+ auto const& devices = cms::alpakatools::devices();
+ if (devices.empty()) {
+ std::cout << "No devices available for the " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) << " backend, "
+ "the test will be skipped." << std::endl;
+ }
+
+ auto devHost = alpaka::getDevByIdx(alpaka::PlatformCpu{}, 0u);
+
+ for (auto const& device : cms::alpakatools::devices()) {
+ std::cout << "Running on " << alpaka::getName(device) << std::endl;
+
+ Queue queue(device);
+
+ // common number of elements for the SoAs
+ const std::size_t elems = parse_or_default(argc > 1 ? argv[1] : nullptr, 1000);
+
+ // Portable Collections
+ PortableCollection positionCollection(elems, queue);
+ SoAPositionView positionCollectionView = positionCollection.view();
+ // SoAPositionConstView positionCollectionConstView = positionCollection.const_view();
+
+ // fill up
+ // 1) Block size: argv[1] if valid, else 64
+ const std::size_t blockSize = parse_or_default(argc > 2 ? argv[2] : nullptr, 512);
+
+ // 2) Default blocks: cover all elements for the chosen block size
+ const std::size_t defaultBlocks = cms::alpakatools::divide_up_by(elems, blockSize);
+
+ // 3) Number of blocks: argv[2] if valid (>0), else defaultBlocks
+ std::size_t numberOfBlocks = parse_or_default(argc > 3 ? argv[3] : nullptr, defaultBlocks);
+
+ // (Optional) guard: never let it be 0
+ if (numberOfBlocks == 0) numberOfBlocks = defaultBlocks;
+
+ const auto workDiv = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize);
+
+ alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ start = std::chrono::high_resolution_clock::now();
+
+ for (int j = 0 ; j < 20 ; j++) {
+ // normalise
+ alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // element addition
+ alpaka::exec(queue, workDiv, Addition{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // fill
+ alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
+ alpaka::wait(queue);
+ }
+
+ end = std::chrono::high_resolution_clock::now();
+
+ for(i=0; i<101; i++) {
+
+ start = std::chrono::high_resolution_clock::now();
+
+ for (int j = 0 ; j < 10 ; j++) {
+ // normalise
+ alpaka::exec(queue, workDiv, NormalisePositions{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // element addition
+ alpaka::exec(queue, workDiv, Addition{}, positionCollectionView);
+ alpaka::wait(queue);
+
+ // fill
+ alpaka::exec(queue, workDiv, FillSoA{}, positionCollectionView);
+ alpaka::wait(queue);
+ }
+
+ end = std::chrono::high_resolution_clock::now();
+
+ std::chrono::duration elapsed = (end - start) * 1000;
+
+ if (i > 0)
+ inner_repetitions[i-1] = elapsed.count();
+ }
+
+ // Calculate the sum of all elements
+ sum = std::accumulate(inner_repetitions.begin(), inner_repetitions.end(), 0.0);
+
+ // Calculate the average
+ average = sum / inner_repetitions.size();
+
+ std::cout << "Average execution time: " << average << " ms\n";
+
+ PortableHostCollection positionHostCollection(elems, queue);
+ alpaka::memcpy(queue, positionHostCollection.buffer(), positionCollection.buffer());
+ alpaka::wait(queue);
+
+ // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view();
+ // for (size_t i = 0; i < elems; i++) {
+ // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].x() << std::endl;
+ // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].y() << std::endl;
+ // std::cout << "New value for element " << i << " : " << positionViewHostCollection[i].z() << std::endl;
+ // }
+
+ // check norm == 1
+ // const SoAPositionConstView& positionViewHostCollection = positionHostCollection.const_view();
+ // for (size_t i = 0; i < elems; i++) {
+ // float norm = positionViewHostCollection[i].square_norm_position();
+ // if (std::abs(norm - 1.0f) > 1.e-5f) {
+ // std::cout << "Error in normalisation at element " << i << " : " << norm << std::endl;
+ // }
+ // }
+
+ std::cout << "Normalisation check completed" << std::endl;
+
+ }
+
+ return 0;
+
+ }
diff --git a/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png
new file mode 100644
index 0000000000000..826f86d1323c1
Binary files /dev/null and b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_cpu.png differ
diff --git a/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png
new file mode 100644
index 0000000000000..77b1778a1de4f
Binary files /dev/null and b/DataFormats/Portable/test/results/Standard_vs_SoA_methods_gpu.png differ
diff --git a/DataFormats/Portable/test/results/plots.py b/DataFormats/Portable/test/results/plots.py
new file mode 100644
index 0000000000000..bdca5142fd4d4
--- /dev/null
+++ b/DataFormats/Portable/test/results/plots.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+import sys, os, csv
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+DEFAULT_CSVS = [
+ "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_cpu.csv",
+ "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_test_cpu.csv",
+]
+
+# DEFAULT_CSVS = [
+# "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_gpu.csv",
+# "/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5/src/DataFormats/Portable/test/results/results_test_gpu.csv",
+# ]
+
+REQUIRED_COLS = {"element_size", "mean", "std"}
+
+def infer_label(path):
+ name = os.path.splitext(os.path.basename(path))[0]
+ return "Standard methods" if "test" in name else "SoA methods"
+
+def read_csv(path):
+ sizes, means, stds = [], [], []
+ with open(path, newline="") as f:
+ r = csv.DictReader(f)
+ if not REQUIRED_COLS.issubset(r.fieldnames or []):
+ raise ValueError(f"{os.path.basename(path)} must have columns: element_size, mean, std. Found: {r.fieldnames}")
+ for row in r:
+ sizes.append(int(row["element_size"]))
+ means.append(float(row["mean"]))
+ stds.append(float(row["std"]))
+ # sort by element size to make lines meaningful
+ order = sorted(range(len(sizes)), key=lambda i: sizes[i])
+ sizes = [sizes[i] for i in order]
+ means = [means[i] for i in order]
+ stds = [stds[i] for i in order]
+ return sizes, means, stds
+
+def main():
+ csv_paths = sys.argv[1:] if len(sys.argv) > 1 else DEFAULT_CSVS
+
+ # Read all datasets
+ datasets = []
+ for p in csv_paths:
+ sizes, means, stds = read_csv(p)
+ label = os.path.splitext(os.path.basename(p))[0] # e.g., results_avg_std_cpu
+ datasets.append((sizes, means, stds, infer_label(p)))
+
+ if not datasets:
+ print("No CSVs provided.", file=sys.stderr)
+ sys.exit(1)
+
+ fig, ax = plt.subplots()
+ ax.set_xscale("log", base=10)
+
+
+ # distinct markers per series (matplotlib will pick colors)
+ markers = ["o", "s", "^", "D", "v", "P", "X", "*"]
+
+ # Build union of xticks across all series
+ all_sizes = sorted({s for sizes,_,_,_ in datasets for s in sizes})
+ xticks = [1] + all_sizes # keep the “fake 0” at x=1 like before
+ ax.set_xticks(xticks)
+ ax.set_xticklabels(["0"] + [str(x) for x in all_sizes])
+
+ # Plot all series
+ for idx, (sizes, means, stds, label) in enumerate(datasets):
+ marker = markers[idx % len(markers)]
+ ax.errorbar(sizes, means, yerr=stds, fmt=f"-{marker}", capsize=4, label=label)
+
+ pad = 10
+ ax.set_xlim(1, max(all_sizes) * pad)
+
+ ax.set_xlabel("Element No")
+ ax.set_ylabel("Average time (ms)")
+ ax.set_title("Standard methods vs SoA methods in CPU")
+ # ax.set_title("Standard methods vs SoA methods in GPU")
+ ax.grid(True, which="major")
+ ax.minorticks_off()
+ ax.legend()
+
+ fig.tight_layout()
+
+ # Name output by joining base names
+ out_png = os.path.join(os.path.dirname(csv_paths[0]), "Standard_vs_SoA_methods_cpu.png")
+ # out_png = os.path.join(os.path.dirname(csv_paths[0]), "Standard_vs_SoA_methods_gpu.png")
+ fig.savefig(out_png, dpi=150)
+ print(f"Saved plot: {out_png}")
+
+if __name__ == "__main__":
+ main()
diff --git a/DataFormats/Portable/test/results/results_cpu.csv b/DataFormats/Portable/test/results/results_cpu.csv
new file mode 100644
index 0000000000000..c0b01479a13b8
--- /dev/null
+++ b/DataFormats/Portable/test/results/results_cpu.csv
@@ -0,0 +1,9 @@
+element_size,mean,std
+10,0.001233840,0.000018737
+100,0.007388929,0.000049974
+1000,0.069053830,0.000210581
+10000,0.684512000,0.001337960
+100000,6.838830000,0.004478107
+250000,17.100290000,0.006028903
+750000,51.329260000,0.015528483
+1000000,68.438890000,0.021186497
diff --git a/DataFormats/Portable/test/results/results_gpu.csv b/DataFormats/Portable/test/results/results_gpu.csv
new file mode 100644
index 0000000000000..0109a44744afe
--- /dev/null
+++ b/DataFormats/Portable/test/results/results_gpu.csv
@@ -0,0 +1,9 @@
+element_size,mean,std
+10,0.187063200,0.001209227
+100,0.187814100,0.000751258
+1000,0.200782600,0.011476074
+10000,0.212477400,0.000417979
+100000,0.244108300,0.000813546
+250000,0.284834400,0.009816557
+750000,0.442813300,0.001007789
+1000000,0.519937200,0.000539323
diff --git a/DataFormats/Portable/test/results/results_test_cpu.csv b/DataFormats/Portable/test/results/results_test_cpu.csv
new file mode 100644
index 0000000000000..a9070a4029b39
--- /dev/null
+++ b/DataFormats/Portable/test/results/results_test_cpu.csv
@@ -0,0 +1,9 @@
+element_size,mean,std
+10,0.001260796,0.000070552
+100,0.007423318,0.000058612
+1000,0.069557150,0.000703586
+10000,0.688618400,0.001922951
+100000,6.897364000,0.012733163
+250000,17.218790000,0.011957095
+750000,51.618680000,0.011555451
+1000000,68.822000000,0.040243343
diff --git a/DataFormats/Portable/test/results/results_test_gpu.csv b/DataFormats/Portable/test/results/results_test_gpu.csv
new file mode 100644
index 0000000000000..ed6823dc9dde1
--- /dev/null
+++ b/DataFormats/Portable/test/results/results_test_gpu.csv
@@ -0,0 +1,9 @@
+element_size,mean,std
+10,0.189737700,0.002318113
+100,0.188666800,0.002463134
+1000,0.191237200,0.002568878
+10000,0.191252100,0.000814894
+100000,0.221208100,0.000459619
+250000,0.289717900,0.000860913
+750000,0.444420800,0.000630217
+1000000,0.522203000,0.001118425
diff --git a/DataFormats/Portable/test/run/run.sh b/DataFormats/Portable/test/run/run.sh
new file mode 100755
index 0000000000000..22870979189e7
--- /dev/null
+++ b/DataFormats/Portable/test/run/run.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BUILD_JOBS="${1:-20}"
+GPU="${2:-0}"
+
+export CUDA_VISIBLE_DEVICES="$GPU"
+export HIP_VISIBLE_DEVICES="$GPU"
+
+SIZE_LIST=(10 100 1000 10000 100000 250000 750000 1000000)
+
+CMSSW_BASE="/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5"
+SRC_DIR="$CMSSW_BASE/src/DataFormats/Portable"
+RUN_DIR="$CMSSW_BASE/test/el8_amd64_gcc12"
+OUT_DIR="$SRC_DIR/test/results"
+OUT_CSV="$OUT_DIR/results_cpu.csv"
+#OUT_CSV="$OUT_DIR/results_cpu.csv"
+
+mkdir -p "$OUT_DIR"
+
+# Optional build (uncomment if you want to rebuild each time)
+#cd "$SRC_DIR" && scram b -j "$BUILD_JOBS"
+
+# Load CMSSW runtime
+cd "$CMSSW_BASE" && eval "$(scram runtime -sh)"
+
+# Run experiments
+cd "$RUN_DIR" || exit 1
+# [[ -x ./Device_methodsCudaAsync ]] || { echo "ERROR: Device_methodsCudaAsync not found"; exit 1; }
+[[ -x ./Device_methodsSerialSync ]] || { echo "ERROR: Device_methodsSerialSync not found"; exit 1; }
+
+echo "element_size,mean,std" > "$OUT_CSV"
+
+for size in "${SIZE_LIST[@]}"; do
+ tmp="$(mktemp)"
+ for i in {0..10}; do
+ # out="$(./Device_methodsCudaAsync "$size" 2>&1)"
+ out="$(./Device_methodsSerialSync "$size" 2>&1)"
+ val="$(grep -m1 -E 'Average execution time:' <<<"$out" | awk '{print $(NF-1)}')"
+ [[ -n "${val:-}" ]] && echo "$val" >> "$tmp"
+ done
+
+ mean="$(tail -n +2 "$tmp" | awk '{s+=$1; n++} END{if(n) printf("%.9f", s/n); else print "NaN"}')"
+ std="$(tail -n +2 "$tmp" | awk '{s+=$1; ss+=$1*$1; n++} END{if(n>1) printf("%.9f", sqrt((ss - s*s/n)/(n-1))); else print "NaN"}')"
+
+ echo "$size,$mean,$std" >> "$OUT_CSV"
+ rm -f "$tmp"
+done
+
+echo "Wrote: $OUT_CSV"
diff --git a/DataFormats/Portable/test/run/run_test.sh b/DataFormats/Portable/test/run/run_test.sh
new file mode 100755
index 0000000000000..f1017ba659ebe
--- /dev/null
+++ b/DataFormats/Portable/test/run/run_test.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+BUILD_JOBS="${1:-20}"
+GPU="${2:-0}"
+
+export CUDA_VISIBLE_DEVICES="$GPU"
+export HIP_VISIBLE_DEVICES="$GPU"
+
+SIZE_LIST=(10 100 1000 10000 100000 250000 750000 1000000)
+
+CMSSW_BASE="/data/user/mmichail/hackathon_19/CMSSW_15_1_0_pre5"
+SRC_DIR="$CMSSW_BASE/src/DataFormats/Portable"
+RUN_DIR="$CMSSW_BASE/test/el8_amd64_gcc12"
+OUT_DIR="$SRC_DIR/test/results"
+# OUT_CSV="$OUT_DIR/results_test_gpu.csv"
+OUT_CSV="$OUT_DIR/results_test_cpu.csv"
+
+mkdir -p "$OUT_DIR"
+
+# Optional build (uncomment if you want to rebuild each time)
+#cd "$SRC_DIR" && scram b -j "$BUILD_JOBS"
+
+# Load CMSSW runtime
+cd "$CMSSW_BASE" && eval "$(scram runtime -sh)"
+
+# Run experiments
+cd "$RUN_DIR" || exit 1
+# [[ -x ./Device_test_methodsCudaAsync ]] || { echo "ERROR: Device_test_methodsCudaAsync not found"; exit 1; }
+[[ -x ./Device_test_methodsSerialSync ]] || { echo "ERROR: Device_test_methodsSerialSync not found"; exit 1; }
+
+echo "element_size,mean,std" > "$OUT_CSV"
+
+for size in "${SIZE_LIST[@]}"; do
+ tmp="$(mktemp)"
+ for i in {0..10}; do
+ # out="$(./Device_test_methodsCudaAsync "$size" 2>&1)"
+ out="$(./Device_test_methodsSerialSync "$size" 2>&1)"
+ val="$(grep -m1 -E 'Average execution time:' <<<"$out" | awk '{print $(NF-1)}')"
+ [[ -n "${val:-}" ]] && echo "$val" >> "$tmp"
+ done
+
+ mean="$(tail -n +2 "$tmp" | awk '{s+=$1; n++} END{if(n) printf("%.9f", s/n); else print "NaN"}')"
+ std="$(tail -n +2 "$tmp" | awk '{s+=$1; ss+=$1*$1; n++} END{if(n>1) printf("%.9f", sqrt((ss - s*s/n)/(n-1))); else print "NaN"}')"
+
+ echo "$size,$mean,$std" >> "$OUT_CSV"
+ rm -f "$tmp"
+done
+
+echo "Wrote: $OUT_CSV"