diff --git a/DataFormats/Portable/BuildFile.xml b/DataFormats/Portable/BuildFile.xml new file mode 100644 index 0000000000000..7a845459f17c2 --- /dev/null +++ b/DataFormats/Portable/BuildFile.xml @@ -0,0 +1,3 @@ + + + diff --git a/DataFormats/Portable/README.md b/DataFormats/Portable/README.md new file mode 100644 index 0000000000000..167039a8fdebe --- /dev/null +++ b/DataFormats/Portable/README.md @@ -0,0 +1,38 @@ +## Define portable data formats that wrap SoA data structures and can be persisted to ROOT files + +### `PortableHostCollection` + +`PortableHostCollection` is a class template that wraps a SoA type `T` and an alpaka host buffer, which owns the +memory where the SoA is allocated. The content of the SoA is persistent, while the buffer itself is transient. +Specialisations of this template can be persisted, and can be read back also in "bare ROOT" mode, without any +dictionaries. +They have no implicit or explicit references to alpaka (neither as part of the class signature nor as part of its name). +This could make it possible to read them back with different portability solutions in the future. + +### `PortableDeviceCollection` + +`PortableDeviceCollection` is a class template that wraps a SoA type `T` and an alpaka device buffer, which +owns the memory where the SoA is allocated. +To avoid confusion and ODR-violations, the `PortableDeviceCollection` template cannot be used with the `Host` +device type. +Specialisations of this template are transient and cannot be persisted. + +### `ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` + +`ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` is a template alias that resolves to either +`PortableHostCollection` or `PortableDeviceCollection`, depending on the +backend. + +### `PortableCollection` + +`PortableCollection` is an alias template that resolves to `ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` +for the matching device. + + +## Notes + +Modules that are supposed to work with only host types (_e.g._ dealing with de/serialisation, data transfers, _etc._) +should explicitly use the `PortableHostCollection` types. + +Modules that implement portable interfaces (_e.g._ producers) should use the generic types based on +`ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` or `PortableCollection`. diff --git a/DataFormats/Portable/interface/PortableCollection.h b/DataFormats/Portable/interface/PortableCollection.h new file mode 100644 index 0000000000000..4d3b4a9b1a269 --- /dev/null +++ b/DataFormats/Portable/interface/PortableCollection.h @@ -0,0 +1,16 @@ +#ifndef DataFormats_Portable_interface_PortableCollection_h +#define DataFormats_Portable_interface_PortableCollection_h + +namespace traits { + + // trait for a generic SoA-based product + template + class PortableCollectionTrait; + +} // namespace traits + +// type alias for a generic SoA-based product +template +using PortableCollection = typename traits::PortableCollectionTrait::CollectionType; + +#endif // DataFormats_Portable_interface_PortableCollection_h diff --git a/DataFormats/Portable/interface/PortableDeviceCollection.h b/DataFormats/Portable/interface/PortableDeviceCollection.h new file mode 100644 index 0000000000000..64b3b3ae55b15 --- /dev/null +++ b/DataFormats/Portable/interface/PortableDeviceCollection.h @@ -0,0 +1,65 @@ +#ifndef DataFormats_Portable_interface_PortableDeviceCollection_h +#define DataFormats_Portable_interface_PortableDeviceCollection_h + +#include +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +// generic SoA-based product in device memory +template +class PortableDeviceCollection { + static_assert(not std::is_same_v, + "Use PortableHostCollection instead of PortableDeviceCollection"); + +public: + using Layout = T; + using View = typename Layout::View; + using ConstView = typename Layout::ConstView; + using Buffer = alpaka::Buf, uint32_t>; + using ConstBuffer = alpaka::ViewConst; + + PortableDeviceCollection() = default; + + PortableDeviceCollection(int32_t elements, TDev const &device) + : buffer_{alpaka::allocBuf( + device, alpaka::Vec, uint32_t>{Layout::computeDataSize(elements)})}, + layout_{buffer_->data(), elements}, + view_{layout_} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); + } + + ~PortableDeviceCollection() = default; + + // non-copyable + PortableDeviceCollection(PortableDeviceCollection const &) = delete; + PortableDeviceCollection &operator=(PortableDeviceCollection const &) = delete; + + // movable + PortableDeviceCollection(PortableDeviceCollection &&other) = default; + PortableDeviceCollection &operator=(PortableDeviceCollection &&other) = default; + + View &view() { return view_; } + ConstView const &view() const { return view_; } + ConstView const &const_view() const { return view_; } + + View &operator*() { return view_; } + ConstView const &operator*() const { return view_; } + + View *operator->() { return &view_; } + ConstView const *operator->() const { return &view_; } + + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + +private: + std::optional buffer_; //! + Layout layout_; // + View view_; //! +}; + +#endif // DataFormats_Portable_interface_PortableDeviceCollection_h diff --git a/DataFormats/Portable/interface/PortableHostCollection.h b/DataFormats/Portable/interface/PortableHostCollection.h new file mode 100644 index 0000000000000..28e5da86ccc00 --- /dev/null +++ b/DataFormats/Portable/interface/PortableHostCollection.h @@ -0,0 +1,83 @@ +#ifndef DataFormats_Portable_interface_PortableHostCollection_h +#define DataFormats_Portable_interface_PortableHostCollection_h + +#include + +#include + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" + +// generic SoA-based product in host memory +template +class PortableHostCollection { +public: + using Layout = T; + using View = typename Layout::View; + using ConstView = typename Layout::ConstView; + using Buffer = alpaka::Buf, uint32_t>; + using ConstBuffer = alpaka::ViewConst; + + PortableHostCollection() = default; + + PortableHostCollection(int32_t elements, alpaka_common::DevHost const &host) + // allocate pageable host memory + : buffer_{alpaka::allocBuf( + host, alpaka::Vec, uint32_t>{Layout::computeDataSize(elements)})}, + layout_{buffer_->data(), elements}, + view_{layout_} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); + } + + template + PortableHostCollection(int32_t elements, alpaka_common::DevHost const &host, TDev const &device) + // allocate pinned host memory, accessible by the given device + : buffer_{alpaka::allocMappedBuf( + host, device, alpaka::Vec, uint32_t>{Layout::computeDataSize(elements)})}, + layout_{buffer_->data(), elements}, + view_{layout_} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); + } + + ~PortableHostCollection() = default; + + // non-copyable + PortableHostCollection(PortableHostCollection const &) = delete; + PortableHostCollection &operator=(PortableHostCollection const &) = delete; + + // movable + PortableHostCollection(PortableHostCollection &&other) = default; + PortableHostCollection &operator=(PortableHostCollection &&other) = default; + + View &view() { return view_; } + ConstView const &view() const { return view_; } + ConstView const &const_view() const { return view_; } + + View &operator*() { return view_; } + ConstView const &operator*() const { return view_; } + + View *operator->() { return &view_; } + ConstView const *operator->() const { return &view_; } + + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // part of the ROOT read streamer + static void ROOTReadStreamer(PortableHostCollection *newObj, Layout const &layout) { + newObj->~PortableHostCollection(); + // use the global "host" object returned by alpaka_common::host() + new (newObj) PortableHostCollection(layout.metadata().size(), alpaka_common::host()); + newObj->layout_.ROOTReadStreamer(layout); + } + +private: + std::optional buffer_; //! + Layout layout_; // + View view_; //! +}; + +#endif // DataFormats_Portable_interface_PortableHostCollection_h diff --git a/DataFormats/Portable/interface/alpaka/PortableCollection.h b/DataFormats/Portable/interface/alpaka/PortableCollection.h new file mode 100644 index 0000000000000..d9cfaf5c66bed --- /dev/null +++ b/DataFormats/Portable/interface/alpaka/PortableCollection.h @@ -0,0 +1,42 @@ +#ifndef DataFormats_Portable_interface_alpaka_PortableDeviceCollection_h +#define DataFormats_Portable_interface_alpaka_PortableDeviceCollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableCollection.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // ... or any other CPU-based accelerators + + // generic SoA-based product in host memory + template + using PortableCollection = ::PortableHostCollection; + +#else + + // generic SoA-based product in device memory + template + using PortableCollection = ::PortableDeviceCollection; + +#endif // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace traits { + + // specialise the trait for the device provided by the ALPAKA_ACCELERATOR_NAMESPACE + template + class PortableCollectionTrait { + using CollectionType = ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection; + }; + +} // namespace traits + +#endif // DataFormats_Portable_interface_alpaka_PortableDeviceCollection_h diff --git a/DataFormats/PortableTestObjects/BuildFile.xml b/DataFormats/PortableTestObjects/BuildFile.xml new file mode 100644 index 0000000000000..1ea43eb400166 --- /dev/null +++ b/DataFormats/PortableTestObjects/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/PortableTestObjects/README.md b/DataFormats/PortableTestObjects/README.md new file mode 100644 index 0000000000000..1814c02dfd42d --- /dev/null +++ b/DataFormats/PortableTestObjects/README.md @@ -0,0 +1,10 @@ +## Define the portable SoA-based data formats + +Notes: + - define a full dictionary for `portabletest::TestSoA` and `portabletest::TestHostCollection` + - do not define a dictionary for `alpaka_serial_sync::portabletest::TestDeviceCollection`, + because it is the same class as `portabletest::TestHostCollection`; + - define the dictionary for `alpaka_cuda_async::portabletest::TestDeviceCollection` + as _transient_ only; + - the dictionary for `alpaka_cuda_async::portabletest::TestDeviceCollection` should + be defined in a separate library, to factor out the CUDA dependency. diff --git a/DataFormats/PortableTestObjects/interface/TestHostCollection.h b/DataFormats/PortableTestObjects/interface/TestHostCollection.h new file mode 100644 index 0000000000000..f7f4ffd64b7d8 --- /dev/null +++ b/DataFormats/PortableTestObjects/interface/TestHostCollection.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_PortableTestObjects_interface_TestHostCollection_h +#define DataFormats_PortableTestObjects_interface_TestHostCollection_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/PortableTestObjects/interface/TestSoA.h" + +namespace portabletest { + + // SoA with x, y, z, id fields in host memory + using TestHostCollection = PortableHostCollection; + +} // namespace portabletest + +#endif // DataFormats_PortableTestObjects_interface_TestHostCollection_h diff --git a/DataFormats/PortableTestObjects/interface/TestSoA.h b/DataFormats/PortableTestObjects/interface/TestSoA.h new file mode 100644 index 0000000000000..3d2152d866ba1 --- /dev/null +++ b/DataFormats/PortableTestObjects/interface/TestSoA.h @@ -0,0 +1,22 @@ +#ifndef DataFormats_PortableTestObjects_interface_TestSoA_h +#define DataFormats_PortableTestObjects_interface_TestSoA_h + +#include "DataFormats/SoATemplate/interface/SoACommon.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" + +namespace portabletest { + + // SoA layout with x, y, z, id fields + GENERATE_SOA_LAYOUT(TestSoALayout, + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_COLUMN(int32_t, id)) + + using TestSoA = TestSoALayout<>; + +} // namespace portabletest + +#endif // DataFormats_PortableTestObjects_interface_TestSoA_h diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h new file mode 100644 index 0000000000000..3109bb15462f0 --- /dev/null +++ b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h @@ -0,0 +1,22 @@ +#ifndef DataFormats_PortableTestObjects_interface_alpaka_TestDeviceCollection_h +#define DataFormats_PortableTestObjects_interface_alpaka_TestDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/PortableTestObjects/interface/TestSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace portabletest { + + // import the top-level portabletest namespace + using namespace ::portabletest; + + // SoA with x, y, z, id fields in device global memory + using TestDeviceCollection = PortableCollection; + + } // namespace portabletest + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // DataFormats_PortableTestObjects_interface_alpaka_TestDeviceCollection_h diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..bcda8c64ea284 --- /dev/null +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h @@ -0,0 +1,3 @@ +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/PortableTestObjects/interface/TestSoA.h" +#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h" diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..4a4167944b259 --- /dev/null +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,4 @@ + + + + diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_serial.h b/DataFormats/PortableTestObjects/src/alpaka/classes_serial.h new file mode 100644 index 0000000000000..9405bbfd336e2 --- /dev/null +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_serial.h @@ -0,0 +1,3 @@ +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h" +#include "DataFormats/PortableTestObjects/interface/TestSoA.h" diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_serial_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_serial_def.xml new file mode 100644 index 0000000000000..cb12768035e4b --- /dev/null +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_serial_def.xml @@ -0,0 +1,16 @@ + + + + + + + + diff --git a/DataFormats/PortableTestObjects/src/classes.h b/DataFormats/PortableTestObjects/src/classes.h new file mode 100644 index 0000000000000..46c07862b7db8 --- /dev/null +++ b/DataFormats/PortableTestObjects/src/classes.h @@ -0,0 +1 @@ +#include "DataFormats/PortableTestObjects/interface/TestSoA.h" diff --git a/DataFormats/PortableTestObjects/src/classes_def.xml b/DataFormats/PortableTestObjects/src/classes_def.xml new file mode 100644 index 0000000000000..21b727eee18c6 --- /dev/null +++ b/DataFormats/PortableTestObjects/src/classes_def.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/DataFormats/SoATemplate/BuildFile.xml b/DataFormats/SoATemplate/BuildFile.xml new file mode 100644 index 0000000000000..8267b0602e13a --- /dev/null +++ b/DataFormats/SoATemplate/BuildFile.xml @@ -0,0 +1,2 @@ + + diff --git a/DataFormats/SoATemplate/README.md b/DataFormats/SoATemplate/README.md new file mode 100644 index 0000000000000..ba1b00e70233d --- /dev/null +++ b/DataFormats/SoATemplate/README.md @@ -0,0 +1,222 @@ +# Structure of array (SoA) generation + +The two header files [`SoALayout.h`](SoALayout.h) and [`SoAView.h`](SoAView.h) define preprocessor macros that +allow generating SoA classes. The SoA classes generate multiple, aligned column from a memory buffer. The memory +buffer is allocated separately by the user, and can be located in a memory space different from the local one (for +example, a SoA located in a GPU device memory can be fully pre-defined on the host and the resulting structure is +passed to the GPU kernel). + +This columnar storage allows efficient memory access by GPU kernels (coalesced access on cache line aligned data) +and possibly vectorization. + +Additionally, templation of the layout and view classes allows compile-time variations of accesses and checks: +verification of alignment and corresponding compiler hinting, cache strategy (non-coherent, streaming with immediate +invalidation), range checking. + +Macro generation allows generating code that provides a clear and concise access of data when used. The code +generation uses the Boost Preprocessing library. + +## Layout + +`SoALayout` is a macro generated templated class that subdivides a provided buffer into a collection of columns, +Eigen columns and scalars. The buffer is expected to be aligned with a selectable alignment defaulting to the CUDA +GPU cache line (128 bytes). All columns and scalars within a `SoALayout` will be individually aligned, leaving +padding at the end of each if necessary. Eigen columns have each component of the vector or matrix properly aligned +in individual column (by defining the stride between components). Only compile-time sized Eigen vectors and matrices +are supported. Scalar members are members of layout with one element, irrespective of the size of the layout. + +Static utility functions automatically compute the byte size of a layout, taking into account all its columns and +alignment. + +## View + +`SoAView` is a macro generated templated class allowing access to columns defined in one or multiple `SoALayout`s or +`SoAViews`. The view can be generated in a constant and non-constant flavors. All view flavors provide with the same +interface where scalar elements are accessed with an `operator()`: `soa.scalar()` while columns (Eigen or not) are +accessed via a array of structure (AoS) -like syntax: `soa[index].x()`. The "struct" object returned by `operator[]` +can be used as a shortcut: `auto si = soa[index]; si.z() = si.x() + zi.y();` + +A view can be instanciated by being passed the layout(s) and view(s) it is defined against, or column by column. + +Layout classes also define a `View` and `ConstView` subclass that provide access to each column and +scalar of the layout. In addition to those fully parametrized templates, two others levels of parametrization are +provided: `ViewTemplate`, `ViewViewTemplateFreeParams` and respectively `ConstViewTemplate`, +`ConstViewTemplateFreeParams`. The parametrization of those templates is explained in the [Template +parameters section](#template-parameters). + +## Metadata subclass + +In order to no clutter the namespace of the generated class, a subclass name `Metadata` is generated. It is +instanciated with the `metadata()` member function and contains various utility functions, like `size()` (number +of elements in the SoA), `byteSize()`, `byteAlignment()`, `data()` (a pointer to the buffer). A `nextByte()` +function computes the first byte of a structure right after a layout, allowing using a single buffer for multiple +layouts. + +## ROOT serialization and de-serialization + +Layouts can be serialized and de-serialized with ROOT. In order to generate the ROOT dictionary, separate +`clases_def.xml` and `classes.h` should be prepared. `classes.h` ensures the inclusion of the proper header files to +get the definition of the serialized classes, and `classes_def.xml` needs to define the fixed list of members that +ROOT should ignore, plus the list of all the columns. [An example is provided below.](#examples) + +Serialization of Eigen data is not yet supported. + +## Template parameters + +The template shared by layouts and parameters are: +- Byte aligment (defaulting to the nVidia GPU cache line size (128 bytes)) +- Alignment enforcement (`relaxed` or `enforced`). When enforced, the alignment will be checked at construction + time, and the accesses are done with compiler hinting (using the widely supported `__builtin_assume_aligned` + intrinsic). + +In addition, the views also provide access parameters: +- Restrict qualify: add restrict hints to read accesses, so that the compiler knows it can relax accesses to the + data and assume it will not change. On nVidia GPUs, this leads to the generation of instruction using the faster + non-coherent cache. +- Range checking: add index checking on each access. As this is a compile time parameter, the cost of the feature at + run time is null if turned off. When turned on, the accesses will be slowed down by checks. Uppon error detection, + an exception is launched (on the CPU side) or the kernel is made to crash (on the GPU side). This feature can help + the debugging of index issues at runtime, but of course requires a recompilation. + +The trivial views subclasses come in a variety of parametrization levels: `View` uses the same byte +alignement and alignment enforcement as the layout, and defaults (off) for restrict qualifying and range checking. +`ViewTemplate` template allows setting of restrict qualifying and range checking, while +`ViewTemplateFreeParams` allows full re-customization of the template parameters. + +## Using SoA layouts and views with GPUs + +Instanciation of views and layouts is preferably done on the CPU side. The view object is lightweight, with only one +pointer per column, plus the global number of elements. Extra view class can be generated to restrict this number of +pointers to the strict minimum in scenarios where only a subset of columns are used in a given GPU kernel. + +## Examples + +A layout can be defined as: + +```C++ +#include "DataFormats/SoALayout.h" + +GENERATE_SOA_LAYOUT(SoA1LayoutTemplate, + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(int32_t, value), + SOA_COLUMN(double *, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount), + + // scalars: one value for the whole structure + SOA_SCALAR(const char *, description), + SOA_SCALAR(uint32_t, someNumber) +); + +// Default template parameters are < +// size_t ALIGNMENT = cms::soa::CacheLineSize::defaultSize, +// bool ALIGNMENT_ENFORCEMENT = cms::soa::AlignmentEnforcement::relaxed +// > +using SoA1Layout = SoA1LayoutTemplate<>; + +using SoA1LayoutAligned = SoA1LayoutTemplate; +``` + +The buffer of the proper size is allocated, and the layout is populated with: + +```C++ +// Allocation of aligned +size_t elements = 100; +using AlignedBuffer = std::unique_ptr; +AlignedBuffer h_buf (reinterpret_cast(aligned_alloc(SoA1LayoutAligned::byteAlignment, SoA1LayoutAligned::computeDataSize(elements))), std::free); +SoA1LayoutAligned soaLayout(h_buf.get(), elements); +``` + +A view will derive its column types from one or multiple layouts. The macro generating the view takes a list of layouts or views it +gets is data from as a first parameter, and the selection of the columns the view will give access to as a second parameter. + +```C++ +// A 1 to 1 view of the layout (except for unsupported types). +GENERATE_SOA_VIEW(SoA1ViewTemplate, + SOA_VIEW_LAYOUT_LIST( + SOA_VIEW_LAYOUT(SoA1Layout, soa1) + ), + SOA_VIEW_VALUE_LIST( + SOA_VIEW_VALUE(soa1, x), + SOA_VIEW_VALUE(soa1, y), + SOA_VIEW_VALUE(soa1, z), + SOA_VIEW_VALUE(soa1, color), + SOA_VIEW_VALUE(soa1, value), + SOA_VIEW_VALUE(soa1, py), + SOA_VIEW_VALUE(soa1, count), + SOA_VIEW_VALUE(soa1, anotherCount), + SOA_VIEW_VALUE(soa1, description), + SOA_VIEW_VALUE(soa1, someNumber) + ) +); + +using SoA1View = SoA1ViewTemplate<>; + +SoA1View soaView(soaLayout); + +for (size_t i=0; i < soaLayout.metadata().size(); ++i) { + auto si = soaView[i]; + si.x() = si.y() = i; + soaView.someNumber() += i; +} +``` + +The mutable and const views with the exact same set of columns and their parametrized variants are provided from the layout as: + +```C++ +// (Pseudo-code) +struct SoA1Layout::View; + +template +struct SoA1Layout::ViewTemplate; + +template +struct SoA1Layout::ViewTemplateFreeParams; + +struct SoA1Layout::ConstView; + +template +struct SoA1Layout::ConstViewTemplate; + +template +struct SoA1Layout::ConstViewTemplateFreeParams; +``` + + + +## Current status and further improvements + +### Available features + +- The layout and views support scalars and columns, alignment and alignment enforcement and hinting (linked). +- Automatic `__restrict__` compiler hinting is supported and can be enabled where appropriate. +- Automatic creation of trivial views and const views derived from a single layout. +- Cache access style, which was explored, was abandoned as this not-yet-used feature interferes with `__restrict__` + support (which is already in used in existing code). It could be made available as a separate tool that can be used + directly by the module developer, orthogonally from SoA. +- Optional (compile time) range checking validates the index of every column access, throwing an exception on the + CPU side and forcing a segmentation fault to halt kernels. When not enabled, it has no impact on performance (code + not compiled) +- Eigen columns are also suported, with both const and non-const flavors. +- ROOT serialization and deserialization is supported. In CMSSW, it is planned to be used through the memory + managing `PortableCollection` family of classes. +- An `operator<<()` is provided to print the layout of an SoA to standard streams. diff --git a/DataFormats/SoATemplate/interface/SoACommon.h b/DataFormats/SoATemplate/interface/SoACommon.h new file mode 100644 index 0000000000000..5fbb986f136d0 --- /dev/null +++ b/DataFormats/SoATemplate/interface/SoACommon.h @@ -0,0 +1,717 @@ +#ifndef DataFormats_SoATemplate_interface_SoACommon_h +#define DataFormats_SoATemplate_interface_SoACommon_h + +/* + * Definitions of SoA common parameters for SoA class generators + */ + +#include +#include +#include +#include +#include + +#include + +#include "FWCore/Utilities/interface/typedefs.h" + +// CUDA attributes +#ifdef __CUDACC__ +#define SOA_HOST_ONLY __host__ +#define SOA_DEVICE_ONLY __device__ +#define SOA_HOST_DEVICE __host__ __device__ +#define SOA_INLINE __forceinline__ +#else +#define SOA_HOST_ONLY +#define SOA_DEVICE_ONLY +#define SOA_HOST_DEVICE +#define SOA_INLINE inline __attribute__((always_inline)) +#endif + +// Exception throwing (or willful crash in kernels) +#if defined(__CUDACC__) && defined(__CUDA_ARCH__) +#define SOA_THROW_OUT_OF_RANGE(A) \ + { \ + printf("%s\n", (A)); \ + __trap(); \ + } +#else +#define SOA_THROW_OUT_OF_RANGE(A) \ + { throw std::out_of_range(A); } +#endif + +/* declare "scalars" (one value shared across the whole SoA) and "columns" (one value per element) */ +#define _VALUE_TYPE_SCALAR 0 +#define _VALUE_TYPE_COLUMN 1 +#define _VALUE_TYPE_EIGEN_COLUMN 2 + +/* The size type need to be "hardcoded" in the template parameters for classes serialized by ROOT */ +#define CMS_SOA_BYTE_SIZE_TYPE std::size_t + +namespace cms::soa { + + // size_type for indices. Compatible with ROOT Int_t, but limited to 2G entries + using size_type = cms_int32_t; + // byte_size_type for byte counts. Not creating an artificial limit (and not ROOT serialized). + using byte_size_type = CMS_SOA_BYTE_SIZE_TYPE; + + enum class SoAColumnType { + scalar = _VALUE_TYPE_SCALAR, + column = _VALUE_TYPE_COLUMN, + eigen = _VALUE_TYPE_EIGEN_COLUMN + }; + + namespace RestrictQualify { + constexpr bool enabled = true; + constexpr bool disabled = false; + constexpr bool Default = disabled; + } // namespace RestrictQualify + + namespace RangeChecking { + constexpr bool enabled = true; + constexpr bool disabled = false; + constexpr bool Default = disabled; + } // namespace RangeChecking + + template + struct add_restrict {}; + + template + struct add_restrict { + using Value = T; + using Pointer = T* __restrict__; + using Reference = T& __restrict__; + using ConstValue = const T; + using PointerToConst = const T* __restrict__; + using ReferenceToConst = const T& __restrict__; + }; + + template + struct add_restrict { + using Value = T; + using Pointer = T*; + using Reference = T&; + using ConstValue = const T; + using PointerToConst = const T*; + using ReferenceToConst = const T&; + }; + + // Forward declarations + template + struct SoAConstParametersImpl; + + template + struct SoAParametersImpl; + + // Templated const parameter sets for scalars, columns and Eigen columns + template + struct SoAConstParametersImpl { + static constexpr SoAColumnType columnType = COLUMN_TYPE; + + using ValueType = T; + using ScalarType = T; + using TupleOrPointerType = const ValueType*; + + // default constructor + SoAConstParametersImpl() = default; + + // constructor from an address + SOA_HOST_DEVICE SOA_INLINE constexpr SoAConstParametersImpl(ValueType const* addr) : addr_(addr) {} + + // constructor from a non-const parameter set + SOA_HOST_DEVICE SOA_INLINE constexpr SoAConstParametersImpl(SoAParametersImpl const& o) + : addr_{o.addr_} {} + + static constexpr bool checkAlignment(ValueType* addr, byte_size_type alignment) { + return reinterpret_cast(addr) % alignment; + } + + public: + // scalar or column + ValueType const* addr_ = nullptr; + }; + + // Templated const parameter specialisation for Eigen columns + template + struct SoAConstParametersImpl { + static constexpr SoAColumnType columnType = SoAColumnType::eigen; + + using ValueType = T; + using ScalarType = typename T::Scalar; + using TupleOrPointerType = std::tuple; + + // default constructor + SoAConstParametersImpl() = default; + + // constructor from individual address and stride + SOA_HOST_DEVICE SOA_INLINE constexpr SoAConstParametersImpl(ScalarType const* addr, byte_size_type stride) + : addr_(addr), stride_(stride) {} + + // constructor from address and stride packed in a tuple + SOA_HOST_DEVICE SOA_INLINE constexpr SoAConstParametersImpl(TupleOrPointerType const& tuple) + : addr_(std::get<0>(tuple)), stride_(std::get<1>(tuple)) {} + + // constructor from a non-const parameter set + SOA_HOST_DEVICE SOA_INLINE constexpr SoAConstParametersImpl(SoAParametersImpl const& o) + : addr_{o.addr_}, stride_{o.stride_} {} + + static constexpr bool checkAlignment(TupleOrPointerType const& tuple, byte_size_type alignment) { + const auto& [addr, stride] = tuple; + return reinterpret_cast(addr) % alignment; + } + + public: + // address and stride + ScalarType const* addr_ = nullptr; + byte_size_type stride_ = 0; + }; + + // Matryoshka template to avoid commas inside macros + template + struct SoAConstParameters_ColumnType { + template + using DataType = SoAConstParametersImpl; + }; + + // Templated parameter sets for scalars, columns and Eigen columns + template + struct SoAParametersImpl { + static constexpr SoAColumnType columnType = COLUMN_TYPE; + + using ValueType = T; + using ScalarType = T; + using TupleOrPointerType = ValueType*; + + using ConstType = SoAConstParametersImpl; + friend ConstType; + + // default constructor + SoAParametersImpl() = default; + + // constructor from an address + SOA_HOST_DEVICE SOA_INLINE constexpr SoAParametersImpl(ValueType* addr) : addr_(addr) {} + + static constexpr bool checkAlignment(ValueType* addr, byte_size_type alignment) { + return reinterpret_cast(addr) % alignment; + } + + public: + // scalar or column + ValueType* addr_ = nullptr; + }; + + // Templated parameter specialisation for Eigen columns + template + struct SoAParametersImpl { + static constexpr SoAColumnType columnType = SoAColumnType::eigen; + + using ValueType = T; + using ScalarType = typename T::Scalar; + using TupleOrPointerType = std::tuple; + + using ConstType = SoAConstParametersImpl; + friend ConstType; + + // default constructor + SoAParametersImpl() = default; + + // constructor from individual address and stride + SOA_HOST_DEVICE SOA_INLINE constexpr SoAParametersImpl(ScalarType* addr, byte_size_type stride) + : addr_(addr), stride_(stride) {} + + // constructor from address and stride packed in a tuple + SOA_HOST_DEVICE SOA_INLINE constexpr SoAParametersImpl(TupleOrPointerType const& tuple) + : addr_(std::get<0>(tuple)), stride_(std::get<1>(tuple)) {} + + static constexpr bool checkAlignment(TupleOrPointerType const& tuple, byte_size_type alignment) { + const auto& [addr, stride] = tuple; + return reinterpret_cast(addr) % alignment; + } + + public: + // address and stride + ScalarType* addr_ = nullptr; + byte_size_type stride_ = 0; + }; + + // Matryoshka template to avoid commas inside macros + template + struct SoAParameters_ColumnType { + template + using DataType = SoAParametersImpl; + }; + + // Helper converting a const parameter set to a non-const parameter set, to be used only in the constructor of non-const "element" + namespace { + template + constexpr inline std::remove_const_t* non_const_ptr(T* p) { + return const_cast*>(p); + } + } // namespace + + template + SOA_HOST_DEVICE SOA_INLINE constexpr SoAParametersImpl const_cast_SoAParametersImpl( + SoAConstParametersImpl const& o) { + return SoAParametersImpl{non_const_ptr(o.addr_)}; + } + + template + SOA_HOST_DEVICE SOA_INLINE constexpr SoAParametersImpl const_cast_SoAParametersImpl( + SoAConstParametersImpl const& o) { + return SoAParametersImpl{non_const_ptr(o.addr_), o.stride_}; + } + + // Helper template managing the value at index idx within a column. + // The optional compile time alignment parameter enables informing the + // compiler of alignment (enforced by caller). + template + class SoAValue { + // Eigen is implemented in a specialization + static_assert(COLUMN_TYPE != SoAColumnType::eigen); + + public: + using Restr = add_restrict; + using Val = typename Restr::Value; + using Ptr = typename Restr::Pointer; + using Ref = typename Restr::Reference; + using PtrToConst = typename Restr::PointerToConst; + using RefToConst = typename Restr::ReferenceToConst; + + SOA_HOST_DEVICE SOA_INLINE SoAValue(size_type i, T* col) : idx_(i), col_(col) {} + + SOA_HOST_DEVICE SOA_INLINE SoAValue(size_type i, SoAParametersImpl params) + : idx_(i), col_(params.addr_) {} + + SOA_HOST_DEVICE SOA_INLINE Ref operator()() { + // Ptr type will add the restrict qualifyer if needed + Ptr col = alignedCol(); + return col[idx_]; + } + + SOA_HOST_DEVICE SOA_INLINE RefToConst operator()() const { + // PtrToConst type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; + } + + SOA_HOST_DEVICE SOA_INLINE Ptr operator&() { return &alignedCol()[idx_]; } + + SOA_HOST_DEVICE SOA_INLINE PtrToConst operator&() const { return &alignedCol()[idx_]; } + + /* This was an attempt to implement the syntax + * + * old_value = view.x + * view.x = new_value + * + * instead of + * + * old_value = view.x() + * view.x() = new_value + * + * but it was found to break in some corner cases. + * We keep them commented out for the time being. + + SOA_HOST_DEVICE SOA_INLINE operator T&() { return col_[idx_]; } + + template + SOA_HOST_DEVICE SOA_INLINE Ref operator=(const T2& v) { + return alignedCol()[idx_] = v; + } + */ + + using valueType = Val; + + static constexpr auto valueSize = sizeof(T); + + private: + SOA_HOST_DEVICE SOA_INLINE Ptr alignedCol() const { + if constexpr (ALIGNMENT) { + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + } + return reinterpret_cast(col_); + } + + size_type idx_; + T* col_; + }; + + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. +#ifdef EIGEN_WORLD_VERSION + // Helper template managing an Eigen-type value at index idx within a column. + template + class SoAValue { + public: + using Type = C; + using MapType = Eigen::Map>; + using CMapType = const Eigen::Map>; + using Restr = add_restrict; + using Val = typename Restr::Value; + using Ptr = typename Restr::Pointer; + using Ref = typename Restr::Reference; + using PtrToConst = typename Restr::PointerToConst; + using RefToConst = typename Restr::ReferenceToConst; + + SOA_HOST_DEVICE SOA_INLINE SoAValue(size_type i, typename C::Scalar* col, byte_size_type stride) + : val_(col + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + stride_(stride) {} + + SOA_HOST_DEVICE SOA_INLINE SoAValue(size_type i, SoAParametersImpl params) + : val_(params.addr_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + crCol_(params.addr_), + cVal_(crCol_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + stride_(params.stride_) {} + + SOA_HOST_DEVICE SOA_INLINE MapType& operator()() { return val_; } + + SOA_HOST_DEVICE SOA_INLINE const CMapType& operator()() const { return cVal_; } + + SOA_HOST_DEVICE SOA_INLINE operator C() { return val_; } + + SOA_HOST_DEVICE SOA_INLINE operator const C() const { return cVal_; } + + SOA_HOST_DEVICE SOA_INLINE C* operator&() { return &val_; } + + SOA_HOST_DEVICE SOA_INLINE const C* operator&() const { return &cVal_; } + + template + SOA_HOST_DEVICE SOA_INLINE MapType& operator=(const C2& v) { + return val_ = v; + } + + using ValueType = typename C::Scalar; + static constexpr auto valueSize = sizeof(C::Scalar); + SOA_HOST_DEVICE SOA_INLINE byte_size_type stride() const { return stride_; } + + private: + MapType val_; + const Ptr crCol_; + CMapType cVal_; + byte_size_type stride_; + }; +#else + // Raise a compile-time error + template + class SoAValue { + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + }; +#endif + + // Helper template managing a const value at index idx within a column. + template + class SoAConstValue { + // Eigen is implemented in a specialization + static_assert(COLUMN_TYPE != SoAColumnType::eigen); + + public: + using Restr = add_restrict; + using Val = typename Restr::Value; + using Ptr = typename Restr::Pointer; + using Ref = typename Restr::Reference; + using PtrToConst = typename Restr::PointerToConst; + using RefToConst = typename Restr::ReferenceToConst; + using Params = SoAParametersImpl; + using ConstParams = SoAConstParametersImpl; + + SOA_HOST_DEVICE SOA_INLINE SoAConstValue(size_type i, const T* col) : idx_(i), col_(col) {} + + SOA_HOST_DEVICE SOA_INLINE SoAConstValue(size_type i, SoAParametersImpl params) + : idx_(i), col_(params.addr_) {} + + SOA_HOST_DEVICE SOA_INLINE SoAConstValue(size_type i, SoAConstParametersImpl params) + : idx_(i), col_(params.addr_) {} + + SOA_HOST_DEVICE SOA_INLINE RefToConst operator()() const { + // Ptr type will add the restrict qualifyer if needed + PtrToConst col = alignedCol(); + return col[idx_]; + } + + SOA_HOST_DEVICE SOA_INLINE const T* operator&() const { return &alignedCol()[idx_]; } + + /* This was an attempt to implement the syntax + * + * old_value = view.x + * + * instead of + * + * old_value = view.x() + * + * but it was found to break in some corner cases. + * We keep them commented out for the time being. + + SOA_HOST_DEVICE SOA_INLINE operator T&() { return col_[idx_]; } + */ + + using valueType = T; + static constexpr auto valueSize = sizeof(T); + + private: + SOA_HOST_DEVICE SOA_INLINE PtrToConst alignedCol() const { + if constexpr (ALIGNMENT) { + return reinterpret_cast(__builtin_assume_aligned(col_, ALIGNMENT)); + } + return reinterpret_cast(col_); + } + + size_type idx_; + const T* col_; + }; + + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. +#ifdef EIGEN_WORLD_VERSION + // Helper template managing a const Eigen-type value at index idx within a column. + template + class SoAConstValue { + public: + using Type = C; + using CMapType = Eigen::Map>; + using RefToConst = const CMapType&; + using ConstParams = SoAConstParametersImpl; + + SOA_HOST_DEVICE SOA_INLINE SoAConstValue(size_type i, typename C::Scalar* col, byte_size_type stride) + : crCol_(col), + cVal_(crCol_ + i, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)), + stride_(stride) {} + + SOA_HOST_DEVICE SOA_INLINE SoAConstValue(size_type i, SoAConstParametersImpl params) + : crCol_(params.addr_), + cVal_(crCol_ + i, + C::RowsAtCompileTime, + C::ColsAtCompileTime, + Eigen::InnerStride(params.stride_)), + stride_(params.stride_) {} + + SOA_HOST_DEVICE SOA_INLINE const CMapType& operator()() const { return cVal_; } + + SOA_HOST_DEVICE SOA_INLINE operator const C() const { return cVal_; } + + SOA_HOST_DEVICE SOA_INLINE const C* operator&() const { return &cVal_; } + + using ValueType = typename C::Scalar; + static constexpr auto valueSize = sizeof(C::Scalar); + + SOA_HOST_DEVICE SOA_INLINE byte_size_type stride() const { return stride_; } + + private: + const typename C::Scalar* __restrict__ crCol_; + CMapType cVal_; + byte_size_type stride_; + }; +#else + // Raise a compile-time error + template + class SoAConstValue { + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + }; +#endif + + // Helper template to avoid commas inside macros +#ifdef EIGEN_WORLD_VERSION + template + struct EigenConstMapMaker { + using Type = Eigen::Map>; + + class DataHolder { + public: + DataHolder(const typename C::Scalar* data) : data_(data) {} + + EigenConstMapMaker::Type withStride(byte_size_type stride) { + return EigenConstMapMaker::Type( + data_, C::RowsAtCompileTime, C::ColsAtCompileTime, Eigen::InnerStride(stride)); + } + + private: + const typename C::Scalar* const data_; + }; + + static DataHolder withData(const typename C::Scalar* data) { return DataHolder(data); } + }; +#else + template + struct EigenConstMapMaker { + // Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns. + static_assert(!sizeof(C), + "Eigen/Core should be pre-included before the SoA headers to enable support for Eigen columns."); + }; +#endif + + // Helper function to compute aligned size + constexpr inline byte_size_type alignSize(byte_size_type size, byte_size_type alignment) { + return ((size + alignment - 1) / alignment) * alignment; + } + +} // namespace cms::soa + +#define SOA_SCALAR(TYPE, NAME) (_VALUE_TYPE_SCALAR, TYPE, NAME) +#define SOA_COLUMN(TYPE, NAME) (_VALUE_TYPE_COLUMN, TYPE, NAME) +#define SOA_EIGEN_COLUMN(TYPE, NAME) (_VALUE_TYPE_EIGEN_COLUMN, TYPE, NAME) + +/* Iterate on the macro MACRO and return the result as a comma separated list */ +#define _ITERATE_ON_ALL_COMMA(MACRO, DATA, ...) \ + BOOST_PP_TUPLE_ENUM(BOOST_PP_SEQ_TO_TUPLE(_ITERATE_ON_ALL(MACRO, DATA, __VA_ARGS__))) + +/* Iterate MACRO on all elements */ +#define _ITERATE_ON_ALL(MACRO, DATA, ...) BOOST_PP_SEQ_FOR_EACH(MACRO, DATA, BOOST_PP_VARIADIC_TO_SEQ(__VA_ARGS__)) + +/* Switch on macros depending on scalar / column type */ +#define _SWITCH_ON_TYPE(VALUE_TYPE, IF_SCALAR, IF_COLUMN, IF_EIGEN_COLUMN) \ + BOOST_PP_IF( \ + BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_SCALAR), \ + IF_SCALAR, \ + BOOST_PP_IF( \ + BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_COLUMN), \ + IF_COLUMN, \ + BOOST_PP_IF(BOOST_PP_EQUAL(VALUE_TYPE, _VALUE_TYPE_EIGEN_COLUMN), IF_EIGEN_COLUMN, BOOST_PP_EMPTY()))) + +namespace cms::soa { + + /* Column accessors: templates implementing the global accesors (soa::x() and soa::x(index) */ + enum class SoAAccessType : bool { mutableAccess, constAccess }; + + template + struct SoAColumnAccessorsImpl {}; + + // TODO from Eric Cano: + // - add alignment support + // - SFINAE-based const/non const variants + + // Column + template + struct SoAColumnAccessorsImpl { + //SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE T* operator()() { return params_.addr_; } + using NoParamReturnType = T*; + SOA_HOST_DEVICE SOA_INLINE T& operator()(size_type index) { return params_.addr_[index]; } + + private: + SoAParametersImpl params_; + }; + + // Const column + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE const T* operator()() const { return params_.addr_; } + using NoParamReturnType = const T*; + SOA_HOST_DEVICE SOA_INLINE T operator()(size_type index) const { return params_.addr_[index]; } + + private: + SoAConstParametersImpl params_; + }; + + // Scalar + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE T& operator()() { return *params_.addr_; } + using NoParamReturnType = T&; + SOA_HOST_DEVICE SOA_INLINE void operator()(size_type index) const { + assert(false && "Indexed access impossible for SoA scalars."); + } + + private: + SoAParametersImpl params_; + }; + + // Const scalar + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE T operator()() const { return *params_.addr_; } + using NoParamReturnType = T; + SOA_HOST_DEVICE SOA_INLINE void operator()(size_type index) const { + assert(false && "Indexed access impossible for SoA scalars."); + } + + private: + SoAConstParametersImpl params_; + }; + + // Eigen-type + template + struct SoAColumnAccessorsImpl { + //SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(T* baseAddress) : baseAddress_(baseAddress) {} + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE typename T::Scalar* operator()() { return params_.addr_; } + using NoParamReturnType = typename T::Scalar*; + //SOA_HOST_DEVICE SOA_INLINE T& operator()(size_type index) { return params_.addr_[index]; } + + private: + SoAParametersImpl params_; + }; + + // Const Eigen-type + template + struct SoAColumnAccessorsImpl { + SOA_HOST_DEVICE SOA_INLINE SoAColumnAccessorsImpl(const SoAConstParametersImpl& params) + : params_(params) {} + SOA_HOST_DEVICE SOA_INLINE const typename T::Scalar* operator()() const { return params_.addr_; } + using NoParamReturnType = typename T::Scalar*; + //SOA_HOST_DEVICE SOA_INLINE T operator()(size_type index) const { return params_.addr_[index]; } + + private: + SoAConstParametersImpl params_; + }; + + /* A helper template stager to avoid commas inside macros */ + template + struct SoAAccessors { + template + struct ColumnType { + template + struct AccessType : public SoAColumnAccessorsImpl { + using SoAColumnAccessorsImpl::SoAColumnAccessorsImpl; + }; + }; + }; + + /* Enum parameters allowing templated control of layout/view behaviors */ + /* Alignment enforcement verifies every column is aligned, and + * hints the compiler that it can expect column pointers to be aligned */ + struct AlignmentEnforcement { + static constexpr bool relaxed = false; + static constexpr bool enforced = true; + }; + + struct CacheLineSize { + static constexpr byte_size_type NvidiaGPU = 128; + static constexpr byte_size_type IntelCPU = 64; + static constexpr byte_size_type AMDCPU = 64; + static constexpr byte_size_type ARMCPU = 64; + static constexpr byte_size_type defaultSize = NvidiaGPU; + }; + +} // namespace cms::soa + +// Small wrapper for stream insertion of SoA printing +template >> +SOA_HOST_ONLY std::ostream& operator<<(std::ostream& os, const SOA& soa) { + soa.soaToStreamInternal(os); + return os; +} + +#endif // DataFormats_SoATemplate_interface_SoACommon_h diff --git a/DataFormats/SoATemplate/interface/SoALayout.h b/DataFormats/SoATemplate/interface/SoALayout.h new file mode 100644 index 0000000000000..a4c429ad6e977 --- /dev/null +++ b/DataFormats/SoATemplate/interface/SoALayout.h @@ -0,0 +1,559 @@ +#ifndef DataFormats_SoATemplate_interface_SoALayout_h +#define DataFormats_SoATemplate_interface_SoALayout_h + +/* + * Structure-of-Arrays template with "columns" and "scalars", defined through preprocessor macros, + * with compile-time size and alignment, and accessors to the "rows" and "columns". + */ + +#include +#include + +#include "SoACommon.h" +#include "SoAView.h" + +/* dump SoA fields information; these should expand to, for columns: + * Example: + * GENERATE_SOA_LAYOUT(SoA, + * // predefined static scalars + * // size_t size; + * // size_t alignment; + * + * // columns: one value per element + * SOA_COLUMN(double, x), + * SOA_COLUMN(double, y), + * SOA_COLUMN(double, z), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + * SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + * SOA_COLUMN(uint16_t, colour), + * SOA_COLUMN(int32_t, value), + * SOA_COLUMN(double *, py), + * SOA_COLUMN(uint32_t, count), + * SOA_COLUMN(uint32_t, anotherCount), + * + * // scalars: one value for the whole structure + * SOA_SCALAR(const char *, description), + * SOA_SCALAR(uint32_t, someNumber) + * ); + * + * dumps as: + * SoA(32, 64): + * sizeof(SoA): 152 + * Column x_ at offset 0 has size 256 and padding 0 + * Column y_ at offset 256 has size 256 and padding 0 + * Column z_ at offset 512 has size 256 and padding 0 + * Eigen value a_ at offset 768 has dimension (3 x 1) and per column size 256 and padding 0 + * Eigen value b_ at offset 1536 has dimension (3 x 1) and per column size 256 and padding 0 + * Eigen value r_ at offset 2304 has dimension (3 x 1) and per column size 256 and padding 0 + * Column colour_ at offset 3072 has size 64 and padding 0 + * Column value_ at offset 3136 has size 128 and padding 0 + * Column py_ at offset 3264 has size 256 and padding 0 + * Column count_ at offset 3520 has size 128 and padding 0 + * Column anotherCount_ at offset 3648 has size 128 and padding 0 + * Scalar description_ at offset 3776 has size 8 and padding 56 + * Scalar someNumber_ at offset 3840 has size 4 and padding 60 + * Final offset = 3904 computeDataSize(...): 3904 + * + */ + +// clang-format off +#define _DECLARE_SOA_STREAM_INFO_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE( \ + VALUE_TYPE, \ + /* Dump scalar */ \ + os << " Scalar " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has size " << sizeof(CPP_TYPE) \ + << " and padding " << ((sizeof(CPP_TYPE) - 1) / alignment + 1) * alignment - sizeof(CPP_TYPE) \ + << std::endl; \ + offset += ((sizeof(CPP_TYPE) - 1) / alignment + 1) * alignment; \ + , \ + /* Dump column */ \ + os << " Column " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has size " \ + << sizeof(CPP_TYPE) * nElements_ << " and padding " \ + << cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE), alignment) - (nElements_ * sizeof(CPP_TYPE)) \ + << std::endl; \ + offset += cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE), alignment); \ + , \ + /* Dump Eigen column */ \ + os << " Eigen value " BOOST_PP_STRINGIZE(NAME) " at offset " << offset << " has dimension " \ + << "(" << CPP_TYPE::RowsAtCompileTime << " x " << CPP_TYPE::ColsAtCompileTime << ")" \ + << " and per column size " \ + << sizeof(CPP_TYPE::Scalar) * nElements_ \ + << " and padding " \ + << cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE::Scalar), alignment) \ + - (nElements_ * sizeof(CPP_TYPE::Scalar)) \ + << std::endl; \ + offset += cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE::Scalar), alignment) \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + ) +// clang-format on + +#define _DECLARE_SOA_STREAM_INFO(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_STREAM_INFO_IMPL TYPE_NAME) + +/** + * Metadata member computing column pitch + */ +// clang-format off +#define _DEFINE_METADATA_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + byte_size_type BOOST_PP_CAT(NAME, Pitch()) const { \ + return cms::soa::alignSize(sizeof(CPP_TYPE), ParentClass::alignment); \ + } \ + using BOOST_PP_CAT(TypeOf_, NAME) = CPP_TYPE; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::scalar; \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ + using BOOST_PP_CAT(ParametersTypeOf_, NAME) = \ + cms::soa::SoAParameters_ColumnType::DataType; \ + SOA_HOST_DEVICE SOA_INLINE \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + }, \ + /* Column */ \ + using BOOST_PP_CAT(ParametersTypeOf_, NAME) = \ + cms::soa::SoAParameters_ColumnType::DataType; \ + SOA_HOST_DEVICE SOA_INLINE \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) (parent_.BOOST_PP_CAT(NAME, _)); \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + byte_size_type BOOST_PP_CAT(NAME, Pitch()) const { \ + return cms::soa::alignSize(parent_.nElements_ * sizeof(CPP_TYPE), ParentClass::alignment); \ + } \ + using BOOST_PP_CAT(TypeOf_, NAME) = CPP_TYPE; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::column;, \ + /* Eigen column */ \ + using BOOST_PP_CAT(ParametersTypeOf_, NAME) = \ + cms::soa::SoAParameters_ColumnType::DataType; \ + SOA_HOST_DEVICE SOA_INLINE \ + BOOST_PP_CAT(ParametersTypeOf_, NAME) BOOST_PP_CAT(parametersOf_, NAME)() const { \ + return BOOST_PP_CAT(ParametersTypeOf_, NAME) ( \ + parent_.BOOST_PP_CAT(NAME, _), \ + parent_.BOOST_PP_CAT(NAME, Stride_)); \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + byte_size_type BOOST_PP_CAT(NAME, Pitch()) const { \ + return cms::soa::alignSize(parent_.nElements_ * sizeof(CPP_TYPE::Scalar), ParentClass::alignment) \ + * CPP_TYPE::RowsAtCompileTime * CPP_TYPE::ColsAtCompileTime; \ + } \ + using BOOST_PP_CAT(TypeOf_, NAME) = CPP_TYPE ; \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, NAME) = cms::soa::SoAColumnType::eigen; \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE::Scalar const* BOOST_PP_CAT(addressOf_, NAME)() const { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ + SOA_HOST_DEVICE SOA_INLINE \ + CPP_TYPE::Scalar* BOOST_PP_CAT(addressOf_, NAME)() { \ + return parent_.metadata().BOOST_PP_CAT(parametersOf_, NAME)().addr_; \ + } \ +) +// clang-format on +#define _DEFINE_METADATA_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_METADATA_MEMBERS_IMPL TYPE_NAME + +// clang-format off +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + (BOOST_PP_CAT(NAME, _)(nullptr)), \ + /* Column */ \ + (BOOST_PP_CAT(NAME, _)(nullptr)), \ + /* Eigen column */ \ + (BOOST_PP_CAT(NAME, _)(nullptr)) \ + (BOOST_PP_CAT(NAME, Stride_)(0)) \ + ) +// clang-format on + +#define _DECLARE_MEMBER_TRIVIAL_CONSTRUCTION(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_IMPL TYPE_NAME) + +/** + * Declare the value_element data members + */ +// clang-format off +#define _DEFINE_VALUE_ELEMENT_MEMBERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar (empty) */ \ + , \ + /* Column */ \ + CPP_TYPE NAME; \ + , \ + /* Eigen column */ \ + CPP_TYPE NAME; \ + ) +// clang-format on + +#define _DEFINE_VALUE_ELEMENT_MEMBERS(R, DATA, TYPE_NAME) _DEFINE_VALUE_ELEMENT_MEMBERS_IMPL TYPE_NAME + +/** + * List of data members in the value_element constructor arguments + */ +// clang-format off +#define _VALUE_ELEMENT_CTOR_ARGS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar (empty) */ \ + , \ + /* Column */ \ + (CPP_TYPE NAME), \ + /* Eigen column */ \ + (CPP_TYPE NAME) \ + ) +// clang-format on + +#define _VALUE_ELEMENT_CTOR_ARGS(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_VALUE_ELEMENT_CTOR_ARGS_IMPL TYPE_NAME) + +/** + * List-initalise the value_element data members + */ +// clang-format off +#define _VALUE_ELEMENT_INITIALIZERS_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar (empty) */ \ + , \ + /* Column */ \ + (NAME{NAME}), \ + /* Eigen column */ \ + (NAME{NAME}) \ + ) +// clang-format on + +#define _VALUE_ELEMENT_INITIALIZERS(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_VALUE_ELEMENT_INITIALIZERS_IMPL TYPE_NAME) + +/** + * Computation of the column or scalar pointer location in the memory layout (at SoA construction time) + */ +// clang-format off +#define _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += cms::soa::alignSize(sizeof(CPP_TYPE), alignment); \ + , \ + /* Column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE), alignment); \ + , \ + /* Eigen column */ \ + BOOST_PP_CAT(NAME, _) = reinterpret_cast(curMem); \ + curMem += cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE::Scalar), alignment) * CPP_TYPE::RowsAtCompileTime \ + * CPP_TYPE::ColsAtCompileTime; \ + BOOST_PP_CAT(NAME, Stride_) = cms::soa::alignSize(nElements_ * sizeof(CPP_TYPE::Scalar), alignment) \ + / sizeof(CPP_TYPE::Scalar); \ + ) \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::enforced) \ + if (reinterpret_cast(BOOST_PP_CAT(NAME, _)) % alignment) \ + throw std::runtime_error("In layout constructor: misaligned column: " #NAME); +// clang-format on + +#define _ASSIGN_SOA_COLUMN_OR_SCALAR(R, DATA, TYPE_NAME) _ASSIGN_SOA_COLUMN_OR_SCALAR_IMPL TYPE_NAME + +/** + * Computation of the column or scalar size for SoA size computation + */ +// clang-format off +#define _ACCUMULATE_SOA_ELEMENT_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + ret += cms::soa::alignSize(sizeof(CPP_TYPE), alignment); \ + , \ + /* Column */ \ + ret += cms::soa::alignSize(nElements * sizeof(CPP_TYPE), alignment); \ + , \ + /* Eigen column */ \ + ret += cms::soa::alignSize(nElements * sizeof(CPP_TYPE::Scalar), alignment) * CPP_TYPE::RowsAtCompileTime \ + * CPP_TYPE::ColsAtCompileTime; \ + ) +// clang-format on + +#define _ACCUMULATE_SOA_ELEMENT(R, DATA, TYPE_NAME) _ACCUMULATE_SOA_ELEMENT_IMPL TYPE_NAME + +/** + * Direct access to column pointer and indexed access + */ +// clang-format off +#define _DECLARE_SOA_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE& NAME() { return *BOOST_PP_CAT(NAME, _); } \ + , \ + /* Column */ \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE* NAME() { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE& NAME(size_type index) { return BOOST_PP_CAT(NAME, _)[index]; } \ + , \ + /* Eigen column */ \ + /* TODO: implement*/ \ + BOOST_PP_EMPTY() \ + ) +// clang-format on + +#define _DECLARE_SOA_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_ACCESSOR_IMPL TYPE_NAME) + +/** + * Direct access to column pointer (const) and indexed access. + */ +// clang-format off +#define _DECLARE_SOA_CONST_ACCESSOR_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE NAME() const { return *(BOOST_PP_CAT(NAME, _)); } \ + , \ + /* Column */ \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE NAME(size_type index) const { return *(BOOST_PP_CAT(NAME, _) + index); } \ + , \ + /* Eigen column */ \ + SOA_HOST_DEVICE SOA_INLINE CPP_TYPE::Scalar const* NAME() const { return BOOST_PP_CAT(NAME, _); } \ + SOA_HOST_DEVICE SOA_INLINE size_type BOOST_PP_CAT(NAME, Stride)() { return BOOST_PP_CAT(NAME, Stride_); } \ + ) +// clang-format on + +#define _DECLARE_SOA_CONST_ACCESSOR(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_CONST_ACCESSOR_IMPL TYPE_NAME) + +/** + * SoA member ROOT streamer read (column pointers). + */ +// clang-format off +#define _STREAMER_READ_SOA_DATA_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + /* TODO: implement*/ \ + , \ + /* Column */ \ + memcpy(BOOST_PP_CAT(NAME, _), onfile.BOOST_PP_CAT(NAME, _), sizeof(CPP_TYPE) * onfile.nElements_); \ + , \ + /* Eigen column */ \ + /* TODO: implement*/ \ + ) +// clang-format on + +#define _STREAMER_READ_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_STREAMER_READ_SOA_DATA_MEMBER_IMPL TYPE_NAME) + +/** + * SoA class member declaration (column pointers). + */ +// clang-format off +#define _DECLARE_SOA_DATA_MEMBER_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar */ \ + CPP_TYPE* BOOST_PP_CAT(NAME, _) = nullptr; \ + , \ + /* Column */ \ + CPP_TYPE * BOOST_PP_CAT(NAME, _) = nullptr; \ + , \ + /* Eigen column */ \ + CPP_TYPE::Scalar * BOOST_PP_CAT(NAME, _) = nullptr; \ + byte_size_type BOOST_PP_CAT(NAME, Stride_) = 0; \ + ) +// clang-format on + +#define _DECLARE_SOA_DATA_MEMBER(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_DECLARE_SOA_DATA_MEMBER_IMPL TYPE_NAME) + +#ifdef DEBUG +#define _DO_RANGECHECK true +#else +#define _DO_RANGECHECK false +#endif + +/* + * A macro defining a SoA layout (collection of scalars and columns of equal lengths) + */ +// clang-format off +#define GENERATE_SOA_LAYOUT(CLASS, ...) \ + template \ + struct CLASS { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CLASS; \ + using AlignmentEnforcement = cms::soa::AlignmentEnforcement; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + using size_type = cms::soa::size_type; \ + using byte_size_type = cms::soa::byte_size_type; \ + constexpr static byte_size_type defaultAlignment = 128; \ + constexpr static byte_size_type alignment = ALIGNMENT; \ + constexpr static bool alignmentEnforcement = ALIGNMENT_ENFORCEMENT; \ + constexpr static byte_size_type conditionalAlignment = \ + alignmentEnforcement == cms::soa::AlignmentEnforcement::enforced ? alignment : 0; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ + \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + \ + template \ + struct ViewTemplateFreeParams; \ + \ + /* dump the SoA internal structure */ \ + SOA_HOST_ONLY \ + void soaToStreamInternal(std::ostream & os) const { \ + os << #CLASS "(" << nElements_ << " elements, byte alignement= " << alignment << ", @"<< mem_ <<"): " \ + << std::endl; \ + os << " sizeof(" #CLASS "): " << sizeof(CLASS) << std::endl; \ + byte_size_type offset = 0; \ + _ITERATE_ON_ALL(_DECLARE_SOA_STREAM_INFO, ~, __VA_ARGS__) \ + os << "Final offset = " << offset << " computeDataSize(...): " << computeDataSize(nElements_) \ + << std::endl; \ + os << std::endl; \ + } \ + \ + /* Helper function used by caller to externally allocate the storage */ \ + static constexpr byte_size_type computeDataSize(size_type nElements) { \ + byte_size_type ret = 0; \ + _ITERATE_ON_ALL(_ACCUMULATE_SOA_ELEMENT, ~, __VA_ARGS__) \ + return ret; \ + } \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct Metadata { \ + friend CLASS; \ + SOA_HOST_DEVICE SOA_INLINE size_type size() const { return parent_.nElements_; } \ + SOA_HOST_DEVICE SOA_INLINE byte_size_type byteSize() const { return parent_.byteSize_; } \ + SOA_HOST_DEVICE SOA_INLINE byte_size_type alignment() const { return CLASS::alignment; } \ + SOA_HOST_DEVICE SOA_INLINE std::byte* data() { return parent_.mem_; } \ + SOA_HOST_DEVICE SOA_INLINE const std::byte* data() const { return parent_.mem_; } \ + SOA_HOST_DEVICE SOA_INLINE std::byte* nextByte() const { return parent_.mem_ + parent_.byteSize_; } \ + SOA_HOST_DEVICE SOA_INLINE CLASS cloneToNewAddress(std::byte* addr) const { \ + return CLASS(addr, parent_.nElements_); \ + } \ + \ + _ITERATE_ON_ALL(_DEFINE_METADATA_MEMBERS, ~, __VA_ARGS__) \ + \ + struct value_element { \ + SOA_HOST_DEVICE SOA_INLINE value_element( \ + _ITERATE_ON_ALL_COMMA(_VALUE_ELEMENT_CTOR_ARGS, ~, __VA_ARGS__) \ + ) : \ + _ITERATE_ON_ALL_COMMA(_VALUE_ELEMENT_INITIALIZERS, ~, __VA_ARGS__) \ + {} \ + \ + _ITERATE_ON_ALL(_DEFINE_VALUE_ELEMENT_MEMBERS, ~, __VA_ARGS__) \ + }; \ + \ + Metadata& operator=(const Metadata&) = delete; \ + Metadata(const Metadata&) = delete; \ + \ + private: \ + SOA_HOST_DEVICE SOA_INLINE Metadata(const CLASS& parent) : parent_(parent) {} \ + const CLASS& parent_; \ + using ParentClass = CLASS; \ + }; \ + friend Metadata; \ + SOA_HOST_DEVICE SOA_INLINE const Metadata metadata() const { return Metadata(*this); } \ + SOA_HOST_DEVICE SOA_INLINE Metadata metadata() { return Metadata(*this); } \ + \ + /* Trivial constuctor */ \ + CLASS() \ + : mem_(nullptr), \ + nElements_(0), \ + byteSize_(0), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION, ~, __VA_ARGS__) {} \ + \ + /* Constructor relying on user provided storage (implementation shared with ROOT streamer) */ \ + SOA_HOST_ONLY CLASS(std::byte* mem, size_type nElements) : mem_(mem), nElements_(nElements), byteSize_(0) { \ + organizeColumnsFromBuffer(); \ + } \ + \ + private: \ + void organizeColumnsFromBuffer() { \ + if constexpr (alignmentEnforcement == cms::soa::AlignmentEnforcement::enforced) \ + if (reinterpret_cast(mem_) % alignment) \ + throw std::runtime_error("In " #CLASS "::" #CLASS ": misaligned buffer"); \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + /* Sanity check: we should have reached the computed size, only on host code */ \ + byteSize_ = computeDataSize(nElements_); \ + if (mem_ + byteSize_ != curMem) \ + throw std::runtime_error("In " #CLASS "::" #CLASS ": unexpected end pointer."); \ + } \ + \ + public: \ + /* Constructor relying on user provided storage */ \ + SOA_DEVICE_ONLY CLASS(bool devConstructor, std::byte* mem, size_type nElements) : \ + mem_(mem), \ + nElements_(nElements) \ + { \ + auto curMem = mem_; \ + _ITERATE_ON_ALL(_ASSIGN_SOA_COLUMN_OR_SCALAR, ~, __VA_ARGS__) \ + } \ + \ + /* ROOT read streamer */ \ + template \ + void ROOTReadStreamer(T & onfile) { \ + auto size = onfile.metadata().size(); \ + _ITERATE_ON_ALL(_STREAMER_READ_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ + } \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + /* Range checker conditional to the macro _DO_RANGECHECK */ \ + SOA_HOST_DEVICE SOA_INLINE \ + void rangeCheck(size_type index) const { \ + if constexpr (_DO_RANGECHECK) { \ + if (index >= nElements_) { \ + printf("In " #CLASS "::rangeCheck(): index out of range: %zu with nElements: %zu\n", index, nElements_); \ + assert(false); \ + } \ + } \ + } \ + \ + /* data members */ \ + std::byte* mem_; \ + size_type nElements_; \ + byte_size_type byteSize_; \ + _ITERATE_ON_ALL(_DECLARE_SOA_DATA_MEMBER, ~, __VA_ARGS__) \ + /* Making the code conditional is problematic in macros as the commas will interfere with parameter lisings */ \ + /* So instead we make the code unconditional with paceholder names which are protected by a private protection. */ \ + /* This will be handled later as we handle the integration of the view as a subclass of the layout. */ \ + public: \ + _GENERATE_SOA_TRIVIAL_CONST_VIEW(CLASS, \ + SOA_VIEW_LAYOUT_LIST( \ + SOA_VIEW_LAYOUT(BOOST_PP_CAT(CLASS, _parametrized) , BOOST_PP_CAT(instance_, CLASS))), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, CLASS), __VA_ARGS__))) \ + \ + template \ + using ConstViewTemplate = ConstViewTemplateFreeParams; \ + \ + _GENERATE_SOA_TRIVIAL_VIEW(CLASS, \ + SOA_VIEW_LAYOUT_LIST( \ + SOA_VIEW_LAYOUT(BOOST_PP_CAT(CLASS, _parametrized), BOOST_PP_CAT(instance_, CLASS))), \ + SOA_VIEW_VALUE_LIST(_ITERATE_ON_ALL_COMMA( \ + _VIEW_FIELD_FROM_LAYOUT, BOOST_PP_CAT(instance_, CLASS), __VA_ARGS__)), \ + __VA_ARGS__) \ + \ + template \ + using ViewTemplate = ViewTemplateFreeParams; \ + \ + using ConstView = ConstViewTemplate; \ + using View = ViewTemplate; \ + }; +// clang-format on + +#endif // DataFormats_SoATemplate_interface_SoALayout_h diff --git a/DataFormats/SoATemplate/interface/SoAView.h b/DataFormats/SoATemplate/interface/SoAView.h new file mode 100644 index 0000000000000..3e6423d5e83b8 --- /dev/null +++ b/DataFormats/SoATemplate/interface/SoAView.h @@ -0,0 +1,785 @@ +#ifndef DataFormats_SoATemplate_interface_SoAView_h +#define DataFormats_SoATemplate_interface_SoAView_h + +/* + * Structure-of-Arrays templates allowing access to a selection of scalars and columns from one + * or multiple SoA layouts or views. + * This template generator will allow handling subsets of columns from one or multiple SoA views or layouts. + */ + +#include "SoACommon.h" + +#define SOA_VIEW_LAYOUT(TYPE, NAME) (TYPE, NAME) + +#define SOA_VIEW_LAYOUT_LIST(...) __VA_ARGS__ + +#define SOA_VIEW_VALUE(LAYOUT_NAME, LAYOUT_MEMBER) (LAYOUT_NAME, LAYOUT_MEMBER, LAYOUT_MEMBER) + +#define SOA_VIEW_VALUE_RENAME(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) (LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) + +#define SOA_VIEW_VALUE_LIST(...) __VA_ARGS__ + +/* + * A macro defining a SoA view (collection of columns from multiple layouts or views.) + * + * Usage: + * GENERATE_SOA_VIEW(PixelXYConstView, PixelXYView, + * SOA_VIEW_LAYOUT_LIST( + * SOA_VIEW_LAYOUT(PixelDigis, pixelDigis), + * SOA_VIEW_LAYOUT(PixelRecHitsLayout, pixelsRecHit) + * ), + * SOA_VIEW_VALUE_LIST( + * SOA_VIEW_VALUE_RENAME(pixelDigis, x, digisX), + * SOA_VIEW_VALUE_RENAME(pixelDigis, y, digisY), + * SOA_VIEW_VALUE_RENAME(pixelsRecHit, x, recHitsX), + * SOA_VIEW_VALUE_RENAME(pixelsRecHit, y, recHitsY) + * ) + * ); + * + */ + +namespace cms::soa { + + /* Traits for the different column type scenarios */ + /* Value traits passes the class as is in the case of column type and return + * an empty class with functions returning non-scalar as accessors. */ + template + struct ConstValueTraits : public C { + using C::C; + }; + + template + struct ConstValueTraits { + // Just take to SoAValue type to generate the right constructor. + SOA_HOST_DEVICE SOA_INLINE ConstValueTraits(size_type, const typename C::valueType*) {} + SOA_HOST_DEVICE SOA_INLINE ConstValueTraits(size_type, const typename C::Params&) {} + SOA_HOST_DEVICE SOA_INLINE ConstValueTraits(size_type, const typename C::ConstParams&) {} + // Any attempt to do anything with the "scalar" value a const element will fail. + }; + +} // namespace cms::soa + +/* + * Members definitions macros for views + */ + +/** + * Layout templates parametrization + */ +#define _DECLARE_VIEW_LAYOUT_PARAMETRIZED_TEMPLATE_IMPL(TYPE, NAME) \ + (using BOOST_PP_CAT(TYPE, _default) = BOOST_PP_CAT(TYPE, _StagedTemplates) < VIEW_ALIGNMENT, \ + VIEW_ALIGNMENT_ENFORCEMENT > ;) + +#define _DECLARE_VIEW_LAYOUT_PARAMETRIZED_TEMPLATE(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_LAYOUT_PARAMETRIZED_TEMPLATE_IMPL TYPE_NAME) + +/** + * Layout types aliasing for referencing by name + */ +#define _DECLARE_VIEW_LAYOUT_TYPE_ALIAS_IMPL(TYPE, NAME) using BOOST_PP_CAT(TypeOf_, NAME) = TYPE; + +#define _DECLARE_VIEW_LAYOUT_TYPE_ALIAS(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS_IMPL TYPE_NAME) + +/** + * Member types aliasing for referencing by name + */ +// clang-format off +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, CAST) \ + using BOOST_PP_CAT(TypeOf_, LOCAL_NAME) = \ + typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::Metadata::BOOST_PP_CAT(TypeOf_, LAYOUT_MEMBER); \ + using BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME) = \ + typename BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::Metadata::BOOST_PP_CAT(ParametersTypeOf_, LAYOUT_MEMBER); \ + constexpr static cms::soa::SoAColumnType BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) = \ + BOOST_PP_CAT(TypeOf_, LAYOUT_NAME)::Metadata::BOOST_PP_CAT(ColumnTypeOf_, LAYOUT_MEMBER); \ + SOA_HOST_DEVICE SOA_INLINE \ + const BOOST_PP_CAT(ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(parametersOf_, LOCAL_NAME)() const { \ + return CAST(parent_.BOOST_PP_CAT(LOCAL_NAME, Parameters_)); \ + }; +// clang-format on + +// DATA should be a function used to convert parent_.LOCAL_NAME ## Parameters_ to ParametersTypeOf_ ## LOCAL_NAME, or empty +#define _DECLARE_VIEW_MEMBER_TYPE_ALIAS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_TYPE_ALIAS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Member type pointers for referencing by name + */ +// clang-format off +#define _DECLARE_VIEW_MEMBER_POINTERS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE SOA_INLINE auto* BOOST_PP_CAT(addressOf_, LOCAL_NAME)() { \ + return BOOST_PP_CAT(parametersOf_, LOCAL_NAME)().addr_; \ + }; +// clang-format on + +#define _DECLARE_VIEW_MEMBER_POINTERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_POINTERS_IMPL LAYOUT_MEMBER_NAME) + +/** + * Member type const pointers for referencing by name + */ +// clang-format off +#define _DECLARE_VIEW_MEMBER_CONST_POINTERS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE SOA_INLINE auto const* BOOST_PP_CAT(addressOf_, LOCAL_NAME)() const { \ + return BOOST_PP_CAT(parametersOf_, LOCAL_NAME)().addr_; \ + }; +// clang-format on + +#define _DECLARE_VIEW_MEMBER_CONST_POINTERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_CONST_POINTERS_IMPL LAYOUT_MEMBER_NAME) + +/** + * Generator of parameters (layouts/views) for constructor by layouts/views. + */ +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL(LAYOUT_TYPE, LAYOUT_NAME, DATA) (DATA LAYOUT_TYPE & LAYOUT_NAME) + +#define _DECLARE_VIEW_CONSTRUCTION_PARAMETERS(R, DATA, TYPE_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(TYPE_NAME, DATA)) + +/** + * Generator of parameters for constructor by column. + */ +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(Metadata::ParametersTypeOf_, LOCAL_NAME)::TupleOrPointerType LOCAL_NAME) + +#define _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND( \ + _DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Generator of member initialization from constructor. + * We use a lambda with auto return type to handle multiple possible return types. + */ +// clang-format off +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL(LAYOUT, MEMBER, NAME) \ + (BOOST_PP_CAT(NAME, Parameters_)([&]() -> auto { \ + auto params = LAYOUT.metadata().BOOST_PP_CAT(parametersOf_, MEMBER)(); \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::enforced) \ + if (reinterpret_cast(params.addr_) % alignment) \ + throw std::runtime_error("In constructor by layout: misaligned column: " #NAME); \ + return params; \ + }())) +// clang-format on + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_IMPL LAYOUT_MEMBER_NAME) + +/** + * Generator of size computation for constructor. + * This is the per-layout part of the lambda checking they all have the same size. + */ +// clang-format off +#define _UPDATE_SIZE_OF_VIEW_IMPL(LAYOUT_TYPE, LAYOUT_NAME) \ + if (set) { \ + if (ret != LAYOUT_NAME.metadata().size()) \ + throw std::runtime_error("In constructor by layout: different sizes from layouts."); \ + } else { \ + ret = LAYOUT_NAME.metadata().size(); \ + set = true; \ + } +// clang-format on + +#define _UPDATE_SIZE_OF_VIEW(R, DATA, TYPE_NAME) BOOST_PP_EXPAND(_UPDATE_SIZE_OF_VIEW_IMPL TYPE_NAME) + +/** + * Generator of member initialization from constructor. + * We use a lambda with auto return type to handle multiple possible return types. + */ +// clang-format off +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL(LAYOUT, MEMBER, NAME) \ + ( \ + BOOST_PP_CAT(NAME, Parameters_)([&]() -> auto { \ + if constexpr (alignmentEnforcement == AlignmentEnforcement::enforced) \ + if (Metadata:: BOOST_PP_CAT(ParametersTypeOf_, NAME)::checkAlignment(NAME, alignment)) \ + throw std::runtime_error("In constructor by column: misaligned column: " #NAME); \ + return NAME; \ + }()) \ + ) +// clang-format on + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN_IMPL LAYOUT_MEMBER_NAME) + +/** + * Generator of layout list. + */ +#define _DECLARE_LAYOUT_LIST_IMPL(LAYOUT, NAME) (NAME) + +#define _DECLARE_LAYOUT_LIST(R, DATA, LAYOUT_MEMBER_NAME) BOOST_PP_EXPAND(_DECLARE_LAYOUT_LIST_IMPL LAYOUT_MEMBER_NAME) + +/** + * Generator of view member list. + */ +#define _DECLARE_VIEW_MEMBER_LIST_IMPL(LAYOUT, MEMBER, NAME) (NAME) + +#define _DECLARE_VIEW_MEMBER_LIST(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_LIST_IMPL LAYOUT_MEMBER_NAME) + +/** + * Generator of member initializer for copy constructor. + */ +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_FROM_OTHER_IMPL(LAYOUT, MEMBER, LOCAL_NAME, DATA) \ + (BOOST_PP_CAT(MEMBER, Parameters_){DATA.BOOST_PP_CAT(MEMBER, Parameters_)}) + +#define _DECLARE_VIEW_MEMBER_INITIALIZERS_FROM_OTHER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_INITIALIZERS_FROM_OTHER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Generator of member assignment for assignment operator. + */ +#define _DECLARE_VIEW_MEMBER_ASSIGNMENT_FROM_OTHER_IMPL(LAYOUT, MEMBER, LOCAL_NAME, DATA) \ + BOOST_PP_CAT(MEMBER, Parameters_) = DATA.BOOST_PP_CAT(MEMBER, Parameters_); + +#define _DECLARE_VIEW_MEMBER_ASSIGNMENT_FROM_OTHER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_MEMBER_ASSIGNMENT_FROM_OTHER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Generator of element members initializer. + */ +#define _DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL(LAYOUT, MEMBER, LOCAL_NAME, DATA) (LOCAL_NAME(DATA, LOCAL_NAME)) + +#define _DECLARE_VIEW_ELEM_MEMBER_INIT(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Helper macro extracting the data type from metadata of a layout or view + */ +#define _COLUMN_TYPE(LAYOUT_NAME, LAYOUT_MEMBER) \ + typename std::remove_pointer::type + +/** + * Generator of parameters for (non-const) element subclass (expanded comma separated). + */ +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(Metadata::ParametersTypeOf_, LOCAL_NAME) LOCAL_NAME) + +#define _DECLARE_VIEW_ELEMENT_VALUE_ARG(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA) + +/** + * Generator of parameters for (const) element subclass (expanded comma separated). + */ +#define _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + (DATA typename BOOST_PP_CAT(Metadata::ParametersTypeOf_, LOCAL_NAME)::ConstType LOCAL_NAME) + +#define _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA) + +/** + * Generator of member initialization for constructor of element subclass + */ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + (BOOST_PP_CAT(LOCAL_NAME, _)(DATA, LOCAL_NAME)) + +/* declare AoS-like element value args for contructor; these should expand,for columns only */ +#define _DECLARE_VIEW_CONST_ELEM_MEMBER_INIT(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Declaration of the members accessors of the const element subclass + */ +// clang-format off +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SOA_HOST_DEVICE SOA_INLINE \ + const typename SoAConstValueWithConf::RefToConst \ + LOCAL_NAME() const { \ + return BOOST_PP_CAT(LOCAL_NAME, _)(); \ + } +// clang-format on + +#define _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_ACCESSOR_IMPL LAYOUT_MEMBER_NAME + +/** + * Declaration of the private members of the const element subclass + */ +// clang-format off +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + const cms::soa::ConstValueTraits, \ + BOOST_PP_CAT(Metadata::ColumnTypeOf_, LOCAL_NAME)> \ + BOOST_PP_CAT(LOCAL_NAME, _); +// clang-format on + +#define _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME + +/** + * Generator of the member-by-member copy operator of the element subclass. + */ +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + if constexpr (Metadata::BOOST_PP_CAT(ColumnTypeOf_, LOCAL_NAME) != cms::soa::SoAColumnType::scalar) \ + LOCAL_NAME() = other.LOCAL_NAME(); + +#define _DECLARE_VIEW_ELEMENT_VALUE_COPY(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_VALUE_COPY_IMPL LAYOUT_MEMBER_NAME) + +/** + * Declaration of the private members of the const element subclass + */ +// clang-format off +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + SoAValueWithConf \ + LOCAL_NAME; +// clang-format on + +#define _DECLARE_VIEW_ELEMENT_VALUE_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + _DECLARE_VIEW_ELEMENT_VALUE_MEMBER_IMPL LAYOUT_MEMBER_NAME + +/** + * Parameters passed to const element subclass constructor in operator[] + */ +#define _DECLARE_VIEW_CONST_ELEMENT_CONSTR_CALL_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + (BOOST_PP_CAT(LOCAL_NAME, Parameters_)) + +#define _DECLARE_VIEW_CONST_ELEMENT_CONSTR_CALL(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_CONST_ELEMENT_CONSTR_CALL_IMPL LAYOUT_MEMBER_NAME) + +/** + * Parameters passed to element subclass constructor in operator[] + * + * The use of const_cast (inside const_cast_SoAParametersImpl) is safe because the constructor of a View binds only to + * non-const arguments. + */ +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + (const_cast_SoAParametersImpl(base_type::BOOST_PP_CAT(LOCAL_NAME, Parameters_))) + +#define _DECLARE_VIEW_ELEMENT_CONSTR_CALL(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_ELEMENT_CONSTR_CALL_IMPL LAYOUT_MEMBER_NAME) + +/** + * Direct access to column pointer and indexed access + */ +// clang-format off +#define _DECLARE_VIEW_SOA_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE SOA_INLINE \ + typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>::NoParamReturnType \ + LOCAL_NAME() { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>(const_cast_SoAParametersImpl( \ + base_type:: BOOST_PP_CAT(LOCAL_NAME, Parameters_)))(); \ + } \ + SOA_HOST_DEVICE SOA_INLINE auto& LOCAL_NAME(size_type index) { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::mutableAccess>(const_cast_SoAParametersImpl( \ + base_type:: BOOST_PP_CAT(LOCAL_NAME, Parameters_)))(index); \ + } +// clang-format on + +#define _DECLARE_VIEW_SOA_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) + +/** + * Direct access to column pointer (const) and indexed access. + */ +// clang-format off +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME) \ + /* Column or scalar */ \ + SOA_HOST_DEVICE SOA_INLINE auto LOCAL_NAME() const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(); \ + } \ + SOA_HOST_DEVICE SOA_INLINE auto LOCAL_NAME(size_type index) const { \ + return typename cms::soa::SoAAccessors:: \ + template ColumnType::template AccessType< \ + cms::soa::SoAAccessType::constAccess>(BOOST_PP_CAT(LOCAL_NAME, Parameters_))(index); \ + } +// clang-format on + +#define _DECLARE_VIEW_SOA_CONST_ACCESSOR(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_CONST_ACCESSOR_IMPL LAYOUT_MEMBER_NAME) + +/** + * SoA class member declaration (column pointers and parameters). + */ +#define _DECLARE_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + typename BOOST_PP_CAT(Metadata::ParametersTypeOf_, LOCAL_NAME) BOOST_PP_CAT(LOCAL_NAME, Parameters_); + +#define _DECLARE_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Const SoA class member declaration (column pointers and parameters). + */ +#define _DECLARE_CONST_VIEW_SOA_MEMBER_IMPL(LAYOUT_NAME, LAYOUT_MEMBER, LOCAL_NAME, DATA) \ + typename BOOST_PP_CAT(Metadata::ParametersTypeOf_, LOCAL_NAME)::ConstType BOOST_PP_CAT(LOCAL_NAME, Parameters_); + +#define _DECLARE_CONST_VIEW_SOA_MEMBER(R, DATA, LAYOUT_MEMBER_NAME) \ + BOOST_PP_EXPAND(_DECLARE_CONST_VIEW_SOA_MEMBER_IMPL BOOST_PP_TUPLE_PUSH_BACK(LAYOUT_MEMBER_NAME, DATA)) + +/** + * Assign the value of the view from the values in the value_element. + */ + +// clang-format off +#define _TRIVIAL_VIEW_ASSIGN_VALUE_ELEMENT_IMPL(VALUE_TYPE, CPP_TYPE, NAME) \ + _SWITCH_ON_TYPE(VALUE_TYPE, \ + /* Scalar (empty) */ \ + , \ + /* Column */ \ + NAME() = value.NAME; \ + , \ + /* Eigen column */ \ + NAME() = value.NAME; \ +) +// clang-format on + +#define _TRIVIAL_VIEW_ASSIGN_VALUE_ELEMENT(R, DATA, TYPE_NAME) _TRIVIAL_VIEW_ASSIGN_VALUE_ELEMENT_IMPL TYPE_NAME + +/* ---- MUTABLE VIEW ------------------------------------------------------------------------------------------------ */ +// clang-format off +#define _GENERATE_SOA_VIEW_PART_0(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + template \ + struct VIEW : public CONST_VIEW { \ + /* Declare the parametrized layouts as the default */ \ + /*BOOST_PP_SEQ_CAT(_ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_PARAMETRIZED_TEMPLATE, ~, LAYOUTS_LIST)) */ \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = VIEW; \ + using base_type = CONST_VIEW; +// clang-format on + +// clang-format off +#define _GENERATE_SOA_VIEW_PART_0_NO_DEFAULTS(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + template \ + struct VIEW : public CONST_VIEW { \ + /* Declare the parametrized layouts as the default */ \ + /*BOOST_PP_SEQ_CAT(_ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_PARAMETRIZED_TEMPLATE, ~, LAYOUTS_LIST)) */ \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = VIEW; \ + using base_type = CONST_VIEW; +// clang-format on + +/** + * Split of the const view definition where the parametrized template alias for the layout is defined for layout trivial view. + */ + +// clang-format off +#define _GENERATE_SOA_VIEW_PART_1(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + using size_type = cms::soa::size_type; \ + using byte_size_type = cms::soa::byte_size_type; \ + using AlignmentEnforcement = cms::soa::AlignmentEnforcement; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static byte_size_type defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ + constexpr static byte_size_type alignment = VIEW_ALIGNMENT; \ + constexpr static bool alignmentEnforcement = VIEW_ALIGNMENT_ENFORCEMENT; \ + constexpr static byte_size_type conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::enforced ? alignment : 0; \ + constexpr static bool restrictQualify = RESTRICT_QUALIFY; \ + constexpr static bool rangeChecking = RANGE_CHECKING; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ + \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct Metadata { \ + friend VIEW; \ + SOA_HOST_DEVICE SOA_INLINE size_type size() const { return parent_.nElements_; } \ + /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, const_cast_SoAParametersImpl, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_POINTERS, ~, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_CONST_POINTERS, ~, VALUE_LIST) \ + \ + /* Forbid copying to avoid const correctness evasion */ \ + Metadata& operator=(const Metadata&) = delete; \ + Metadata(const Metadata&) = delete; \ + \ + private: \ + SOA_HOST_DEVICE SOA_INLINE Metadata(const VIEW& parent) : parent_(parent) {} \ + const VIEW& parent_; \ + }; \ + \ + friend Metadata; \ + SOA_HOST_DEVICE SOA_INLINE const Metadata metadata() const { return Metadata(*this); } \ + SOA_HOST_DEVICE SOA_INLINE Metadata metadata() { return Metadata(*this); } \ + \ + /* Trivial constuctor */ \ + VIEW() = default; \ + \ + /* Constructor relying on user provided layouts or views */ \ + SOA_HOST_ONLY VIEW(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, BOOST_PP_EMPTY(), LAYOUTS_LIST)) \ + : base_type{_ITERATE_ON_ALL_COMMA(_DECLARE_LAYOUT_LIST, BOOST_PP_EMPTY(), LAYOUTS_LIST)} {} \ + \ + /* Constructor relying on individually provided column addresses */ \ + SOA_HOST_ONLY VIEW(size_type nElements, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, \ + BOOST_PP_EMPTY(), \ + VALUE_LIST)) \ + : base_type{nElements, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_LIST, BOOST_PP_EMPTY(), VALUE_LIST)} {} \ + \ + /* Copiable */ \ + VIEW(VIEW const&) = default; \ + VIEW& operator=(VIEW const&) = default; \ + \ + /* Movable */ \ + VIEW(VIEW &&) = default; \ + VIEW& operator=(VIEW &&) = default; \ + \ + /* Trivial destuctor */ \ + ~VIEW() = default; \ + \ + /* AoS-like accessor (const) */ \ + using const_element = typename base_type::const_element; \ + \ + using base_type::operator[]; \ + \ + /* AoS-like accessor (mutable) */ \ + struct element { \ + SOA_HOST_DEVICE SOA_INLINE \ + element(size_type index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_VALUE_ARG, BOOST_PP_EMPTY(), VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + SOA_HOST_DEVICE SOA_INLINE \ + element& operator=(const element& other) { \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_COPY, ~, VALUE_LIST) \ + return *this; \ + } +// clang-format on + +// clang-format off +#define _GENERATE_SOA_VIEW_PART_2(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + SOA_HOST_DEVICE SOA_INLINE \ + element operator[](size_type index) { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::enabled) { \ + if (index >= base_type::nElements_) \ + SOA_THROW_OUT_OF_RANGE("Out of range index in " #VIEW "::operator[]") \ + } \ + return element{index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)}; \ + } \ + \ + /* inherit const accessors from ConstView */ \ + \ + /* non-const accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + }; +// clang-format on + +/* ---- CONST VIEW -------------------------------------------------------------------------------------------------- */ +// clang-format off +#define _GENERATE_SOA_CONST_VIEW_PART_0(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + template \ + struct CONST_VIEW { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CONST_VIEW; +// clang-format on + +// clang-format off +#define _GENERATE_SOA_CONST_VIEW_PART_0_NO_DEFAULTS(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + template \ + struct CONST_VIEW { \ + /* these could be moved to an external type trait to free up the symbol names */ \ + using self_type = CONST_VIEW; +// clang-format on + +/** + * Split of the const view definition where the parametrized template alias for the layout is defined for layout trivial view. + */ + +// clang-format off +#define _GENERATE_SOA_CONST_VIEW_PART_1(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + using size_type = cms::soa::size_type; \ + using byte_size_type = cms::soa::byte_size_type; \ + using AlignmentEnforcement = cms::soa::AlignmentEnforcement; \ + \ + template \ + friend struct VIEW; \ + \ + /* For CUDA applications, we align to the 128 bytes of the cache lines. \ + * See https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-memory-3-0 this is still valid \ + * up to compute capability 8.X. \ + */ \ + constexpr static byte_size_type defaultAlignment = cms::soa::CacheLineSize::defaultSize; \ + constexpr static byte_size_type alignment = VIEW_ALIGNMENT; \ + constexpr static bool alignmentEnforcement = VIEW_ALIGNMENT_ENFORCEMENT; \ + constexpr static byte_size_type conditionalAlignment = \ + alignmentEnforcement == AlignmentEnforcement::enforced ? alignment : 0; \ + constexpr static bool restrictQualify = RESTRICT_QUALIFY; \ + constexpr static bool rangeChecking = RANGE_CHECKING; \ + /* Those typedefs avoid having commas in macros (which is problematic) */ \ + template \ + using SoAValueWithConf = cms::soa::SoAValue; \ + \ + template \ + using SoAConstValueWithConf = cms::soa::SoAConstValue; \ + \ + /** \ + * Helper/friend class allowing SoA introspection. \ + */ \ + struct Metadata { \ + friend CONST_VIEW; \ + SOA_HOST_DEVICE SOA_INLINE size_type size() const { return parent_.nElements_; } \ + /* Alias layout or view types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_LAYOUT_TYPE_ALIAS, ~, LAYOUTS_LIST) \ + \ + /* Alias member types to name-derived identifyer to allow simpler definitions */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_TYPE_ALIAS, BOOST_PP_EMPTY(), VALUE_LIST) \ + _ITERATE_ON_ALL(_DECLARE_VIEW_MEMBER_CONST_POINTERS, ~, VALUE_LIST) \ + \ + /* Forbid copying to avoid const correctness evasion */ \ + Metadata& operator=(const Metadata&) = delete; \ + Metadata(const Metadata&) = delete; \ + \ + private: \ + SOA_HOST_DEVICE SOA_INLINE Metadata(const CONST_VIEW& parent) : parent_(parent) {} \ + const CONST_VIEW& parent_; \ + }; \ + \ + friend Metadata; \ + SOA_HOST_DEVICE SOA_INLINE const Metadata metadata() const { return Metadata(*this); } \ + \ + /* Trivial constuctor */ \ + CONST_VIEW() = default; \ + \ + /* Constructor relying on user provided layouts or views */ \ + SOA_HOST_ONLY CONST_VIEW(_ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_PARAMETERS, const, LAYOUTS_LIST)) \ + : nElements_([&]() -> size_type { \ + bool set = false; \ + size_type ret = 0; \ + _ITERATE_ON_ALL(_UPDATE_SIZE_OF_VIEW, BOOST_PP_EMPTY(), LAYOUTS_LIST) \ + return ret; \ + }()), \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS, ~, VALUE_LIST) {} \ + \ + /* Constructor relying on individually provided column addresses */ \ + SOA_HOST_ONLY CONST_VIEW(size_type nElements, \ + _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONSTRUCTION_BYCOLUMN_PARAMETERS, const, VALUE_LIST)) \ + : nElements_(nElements), _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_MEMBER_INITIALIZERS_BYCOLUMN, ~, VALUE_LIST) {} \ + \ + /* Copiable */ \ + CONST_VIEW(CONST_VIEW const&) = default; \ + CONST_VIEW& operator=(CONST_VIEW const&) = default; \ + \ + /* Movable */ \ + CONST_VIEW(CONST_VIEW &&) = default; \ + CONST_VIEW& operator=(CONST_VIEW &&) = default; \ + \ + /* Trivial destuctor */ \ + ~CONST_VIEW() = default; \ + \ + /* AoS-like accessor (const) */ \ + struct const_element { \ + SOA_HOST_DEVICE SOA_INLINE \ + const_element(size_type index, /* Declare parameters */ \ + _ITERATE_ON_ALL_COMMA(_DECLARE_CONST_VIEW_ELEMENT_VALUE_ARG, const, VALUE_LIST)) \ + : _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEM_MEMBER_INIT, index, VALUE_LIST) {} \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_ACCESSOR, ~, VALUE_LIST) \ + \ + private: \ + _ITERATE_ON_ALL(_DECLARE_VIEW_CONST_ELEMENT_VALUE_MEMBER, ~, VALUE_LIST) \ + }; \ + \ + SOA_HOST_DEVICE SOA_INLINE \ + const_element operator[](size_type index) const { \ + if constexpr (rangeChecking == cms::soa::RangeChecking::enabled) { \ + if (index >= nElements_) \ + SOA_THROW_OUT_OF_RANGE("Out of range index in " #CONST_VIEW "::operator[]") \ + } \ + return const_element{index, _ITERATE_ON_ALL_COMMA(_DECLARE_VIEW_CONST_ELEMENT_CONSTR_CALL, ~, VALUE_LIST)}; \ + } \ + \ + /* const accessors */ \ + _ITERATE_ON_ALL(_DECLARE_VIEW_SOA_CONST_ACCESSOR, ~, VALUE_LIST) \ + \ + /* dump the SoA internal structure */ \ + template \ + SOA_HOST_ONLY friend void dump(); \ + \ + private: \ + size_type nElements_ = 0; \ + _ITERATE_ON_ALL(_DECLARE_CONST_VIEW_SOA_MEMBER, const, VALUE_LIST) \ +}; +// clang-format on + +// clang-format off +// MAJOR caveat: in order to propagate the LAYOUTS_LIST and VALUE_LIST +#define _GENERATE_SOA_CONST_VIEW(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + _GENERATE_SOA_CONST_VIEW_PART_0(CONST_VIEW, VIEW, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + _GENERATE_SOA_CONST_VIEW_PART_1(CONST_VIEW, VIEW, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) + +#define GENERATE_SOA_CONST_VIEW(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + _GENERATE_SOA_CONST_VIEW(CONST_VIEW, BOOST_PP_CAT(CONST_VIEW, Unused_), \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) + +#define _GENERATE_SOA_TRIVIAL_CONST_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST) \ + _GENERATE_SOA_CONST_VIEW_PART_0_NO_DEFAULTS(ConstViewTemplateFreeParams, ViewTemplateFreeParams, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + using BOOST_PP_CAT(CLASS, _parametrized) = CLASS; \ + _GENERATE_SOA_CONST_VIEW_PART_1(ConstViewTemplateFreeParams, ViewTemplateFreeParams, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) + +#define _GENERATE_SOA_VIEW(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + _GENERATE_SOA_VIEW_PART_0(CONST_VIEW, VIEW, SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + _GENERATE_SOA_VIEW_PART_1(CONST_VIEW, VIEW, SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + _GENERATE_SOA_VIEW_PART_2(CONST_VIEW, VIEW, SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) + +#define GENERATE_SOA_VIEW(CONST_VIEW, VIEW, LAYOUTS_LIST, VALUE_LIST) \ + _GENERATE_SOA_CONST_VIEW(CONST_VIEW, VIEW, SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + _GENERATE_SOA_VIEW(CONST_VIEW, VIEW, SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) + +#define _GENERATE_SOA_TRIVIAL_VIEW(CLASS, LAYOUTS_LIST, VALUE_LIST, ...) \ + _GENERATE_SOA_VIEW_PART_0_NO_DEFAULTS(ConstViewTemplateFreeParams, ViewTemplateFreeParams, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + using BOOST_PP_CAT(CLASS, _parametrized) = CLASS; \ + _GENERATE_SOA_VIEW_PART_1(ConstViewTemplateFreeParams, ViewTemplateFreeParams, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) \ + \ + /* Extra operator=() for mutable element to emulate the aggregate initialisation syntax */ \ + SOA_HOST_DEVICE SOA_INLINE constexpr element & operator=(const typename \ + BOOST_PP_CAT(CLASS, _parametrized)::Metadata::value_element value) { \ + _ITERATE_ON_ALL(_TRIVIAL_VIEW_ASSIGN_VALUE_ELEMENT, ~, __VA_ARGS__) \ + return *this; \ + } \ + \ + _GENERATE_SOA_VIEW_PART_2(ConstViewTemplateFreeParams, ViewTemplateFreeParams, \ + SOA_VIEW_LAYOUT_LIST(LAYOUTS_LIST), SOA_VIEW_VALUE_LIST(VALUE_LIST)) +// clang-format on + +/** + * Helper macro turning layout field declaration into view field declaration. + */ +#define _VIEW_FIELD_FROM_LAYOUT_IMPL(VALUE_TYPE, CPP_TYPE, NAME, DATA) (DATA, NAME, NAME) + +#define _VIEW_FIELD_FROM_LAYOUT(R, DATA, VALUE_TYPE_NAME) \ + BOOST_PP_EXPAND((_VIEW_FIELD_FROM_LAYOUT_IMPL BOOST_PP_TUPLE_PUSH_BACK(VALUE_TYPE_NAME, DATA))) + +#endif // DataFormats_SoATemplate_interface_SoAView_h diff --git a/DataFormats/SoATemplate/test/BuildFile.xml b/DataFormats/SoATemplate/test/BuildFile.xml new file mode 100644 index 0000000000000..7c62e15bde402 --- /dev/null +++ b/DataFormats/SoATemplate/test/BuildFile.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + diff --git a/DataFormats/SoATemplate/test/FakeSoA.h b/DataFormats/SoATemplate/test/FakeSoA.h new file mode 100644 index 0000000000000..b0319d5a0bdca --- /dev/null +++ b/DataFormats/SoATemplate/test/FakeSoA.h @@ -0,0 +1,91 @@ +#ifndef DataFormats_SoATemplate_test_FakeSoA_h +#define DataFormats_SoATemplate_test_FakeSoA_h + +// A SoA-like class (with fake alignment (padding)) +#include +#include +#include + +#define myassert(A) \ + if (not(A)) { \ + std::cerr << "Failed assertion: " #A " at " __FILE__ "(" << __LINE__ << ")" << std::endl; \ + abort(); \ + } + +class FakeSoA { +public: + static constexpr size_t padding_ = 128; + + // A fake SoA with 2 columns of uint16_t and uin32_t, plus fake padding. + static size_t computeBufferSize(size_t nElements) { + return nElements * (sizeof(uint16_t) + sizeof(uint32_t)) + padding_ + 0x400; + } + + FakeSoA(std::byte *buffer, size_t nElements) { constFromBufferImpl(buffer, nElements); } + + FakeSoA() : size_(0) { std::cout << "At end of FakeSoA::FakeSoA()" << std::endl; } + + template + void allocateAndIoRead(T &onfile) { + std::cout << "allocateAndIoRead begin" << std::endl; + auto buffSize = FakeSoA::computeBufferSize(onfile.size_); + auto buffer = new std::byte[buffSize]; + std::cout << "Buffer first byte after (alloc) =" << buffer + buffSize << std::endl; + constFromBufferImpl(buffer, onfile.size_); + memcpy(a16_, onfile.a16_, sizeof(uint16_t) * onfile.size_); + memcpy(b32_, onfile.b32_, sizeof(uint32_t) * onfile.size_); + std::cout << "allocateAndIoRead end" << std::endl; + } + + void dump() { + std::cout << "size=" << size_ << " buffer=" << buffer_.get() << " a16=" << a16_ << " b32=" << b32_ + << " (b32 - a16)=" + << reinterpret_cast(reinterpret_cast(b32_) - reinterpret_cast(a16_)) + << " buffer size=" << computeBufferSize(size_) << "(" << std::hex << computeBufferSize(size_) << ")" + << std::endl; + } + + void dumpData() { std::cout << "a16_[0]=" << a16_[0] << " b32_[0]=" << b32_[0] << std::endl; } + + void fill() { + for (int i = 0; i < size_; i++) { + a16_[i] = 42 + i; + b32_[i] = 24 + i; + } + } + + bool check() { + bool result = true; + for (int i = 0; i < size_; i++) { + if (a16_[i] != 42 + i) { + std::cout << "a16 mismatch at i=" << i << "(" << a16_[i] << "/" << 42 + i << ")" << std::endl; + result = false; + } + if (b32_[i] != 24 + (uint32_t)i) { + std::cout << "b32 mismatch at i=" << i << "(" << b32_[i] << "/" << 24 + i << ")" << std::endl; + result = false; + } + } + return result; + } + +private: + void constFromBufferImpl(std::byte *buffer, size_t nElements) { + buffer_.reset(buffer); + size_ = nElements; + a16_ = reinterpret_cast(buffer); + buffer += nElements * sizeof(uint16_t) + padding_; + b32_ = reinterpret_cast(buffer); + buffer += nElements * sizeof(uint32_t); + std::cout << "Buffer first byte after (const) =" << buffer << std::endl; + std::cout << "At end of FakeSoA::constFromBufferImpl(std::byte * buffer, size_t nElements): "; + dump(); + } + + int size_; + uint16_t *a16_ = nullptr; //[size_] + uint32_t *b32_ = nullptr; //[size_] + std::unique_ptr buffer_ = nullptr; //! +}; + +#endif // DataFormats_SoATemplate_test_FakeSoA_h diff --git a/DataFormats/SoATemplate/test/SoALayoutAndView_t.cu b/DataFormats/SoATemplate/test/SoALayoutAndView_t.cu new file mode 100644 index 0000000000000..a4f9b0bc7d78f --- /dev/null +++ b/DataFormats/SoATemplate/test/SoALayoutAndView_t.cu @@ -0,0 +1,275 @@ +#include +#include + +#include +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +// Test SoA stores and view. +// Use cases +// Multiple stores in a buffer +// Scalars, Columns of scalars and of Eigen vectors +// View to each of them, from one and multiple stores. + +GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate, + /*SoAHostDeviceViewTemplate,*/ + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + // scalars: one value for the whole structure + SOA_SCALAR(const char*, description), + SOA_SCALAR(uint32_t, someNumber)) + +using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>; +using SoAHostDeviceView = SoAHostDeviceLayout::View; +using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView; + +GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate, + /*SoADeviceOnlyViewTemplate,*/ + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(double, value), + SOA_COLUMN(double*, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount)) + +using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>; +using SoADeviceOnlyView = SoADeviceOnlyLayout::View; + +// A 1 to 1 view of the store (except for unsupported types). +GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate, + SoAFullDeviceViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD), + SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x), + SOA_VIEW_VALUE(soaHD, y), + SOA_VIEW_VALUE(soaHD, z), + SOA_VIEW_VALUE(soaDO, color), + SOA_VIEW_VALUE(soaDO, value), + SOA_VIEW_VALUE(soaDO, py), + SOA_VIEW_VALUE(soaDO, count), + SOA_VIEW_VALUE(soaDO, anotherCount), + SOA_VIEW_VALUE(soaHD, description), + SOA_VIEW_VALUE(soaHD, someNumber))) + +using SoAFullDeviceView = + SoAFullDeviceViewTemplate; + +// Eigen cross product kernel (on store) +__global__ void crossProduct(SoAHostDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.r() = si.a().cross(si.b()); +} + +// Device-only producer kernel +__global__ void producerKernel(SoAFullDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char)); + si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z()); +} + +// Device-only consumer with result in host-device area +__global__ void consumerKernel(SoAFullDeviceView soa, const unsigned int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.x() = si.color() * si.value(); +} + +// Get a view like the default, except for range checking +using RangeCheckingHostDeviceView = + SoAHostDeviceLayout::ViewTemplate; + +// We expect to just run one thread. +__global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) { + printf("About to fail range-check in CUDA thread: %d\n", threadIdx.x); + [[maybe_unused]] auto si = soa[soa.metadata().size()]; + printf("Fail: range-check failure should have stopped the kernel.\n"); +} + +int main(void) { + cms::cudatest::requireDevices(); + + cudaStream_t stream; + cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); + + // Non-aligned number of elements to check alignment features. + constexpr unsigned int numElements = 65537; + + // Allocate buffer and store on host + size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements); + std::byte* h_buf = nullptr; + cudaCheck(cudaMallocHost(&h_buf, hostDeviceSize)); + SoAHostDeviceLayout h_soahdLayout(h_buf, numElements); + SoAHostDeviceView h_soahd(h_soahdLayout); + SoAHostDeviceConstView h_soahd_c(h_soahdLayout); + + // Alocate buffer, stores and views on the device (single, shared buffer). + size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements); + std::byte* d_buf = nullptr; + cudaCheck(cudaMallocHost(&d_buf, hostDeviceSize + deviceOnlySize)); + SoAHostDeviceLayout d_soahdLayout(d_buf, numElements); + SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements); + SoAHostDeviceView d_soahdView(d_soahdLayout); + SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout); + + // Assert column alignments + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment); + + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_description()) % + decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_someNumber()) % + decltype(d_soahdLayout)::alignment); + + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment); + assert(0 == reinterpret_cast(d_soadoLayout.metadata().addressOf_anotherCount()) % + decltype(d_soadoLayout)::alignment); + + // Views should get the same alignment as the stores they refer to + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment); + // Limitation of views: we have to get scalar member addresses via metadata. + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_description()) % + decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_someNumber()) % + decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_anotherCount()) % + decltype(d_soaFullView)::alignment); + + // Initialize and fill the host buffer + std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd[i]; + // Tuple assignment... + // elements are: x, y, z, a, b, r + auto v1 = 1.0 * i + 1.0; + auto v2 = 2.0 * i; + auto v3 = 3.0 * i - 1.0; + if (i % 2) { + si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}}; + } else { + si.x() = si.a()(0) = si.b()(2) = v1; + si.y() = si.a()(1) = si.b()(1) = v2; + si.z() = si.a()(2) = si.b()(0) = v3; + } + } + auto& sn = h_soahd.someNumber(); + sn = numElements + 2; + + // Push to device + cudaCheck(cudaMemcpyAsync(d_buf, h_buf, hostDeviceSize, cudaMemcpyDefault, stream)); + + // Process on device + crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements); + + // Paint the device only with 0xFF initially + cudaCheck(cudaMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream)); + + // Produce to the device only area + producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Consume the device only area and generate a result on the host-device area + consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Get result back + cudaCheck(cudaMemcpyAsync(h_buf, d_buf, hostDeviceSize, cudaMemcpyDefault, stream)); + + // Wait and validate. + cudaCheck(cudaStreamSynchronize(stream)); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd_c[i]; + assert(si.r() == si.a().cross(si.b())); + double initialX = 1.0 * i + 1.0; + double initialY = 2.0 * i; + double initialZ = 3.0 * i - 1.0; + uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char)); + double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ); + if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits::epsilon()) { + std::cout << "X failed: for i=" << i << std::endl + << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl + << "expectedX=" << expectedX << std::endl + << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl + << "relativeDiff=" << abs(si.x() - expectedX) / expectedX + << " epsilon=" << std::numeric_limits::epsilon() << std::endl; + assert(false); + } + } + + // Validation of range checking + try { + // Get a view like the default, except for range checking + SoAHostDeviceLayout::ViewTemplate + soa1viewRangeChecking(h_soahdLayout); + // This should throw an exception + [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()]; + std::cout << "Fail: expected range-check exception not caught on the host." << std::endl; + assert(false); + } catch (const std::out_of_range&) { + std::cout << "Pass: expected range-check exception successfully caught on the host." << std::endl; + } + + // Validation of range checking in a kernel + // Get a view like the default one, except for range checking + RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout); + + // This should throw an exception in the kernel + rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking); + + // Wait and confirm that the CUDA kernel failed + try { + cudaCheck(cudaStreamSynchronize(stream)); + std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl; + assert(false); + } catch (const std::runtime_error&) { + std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl; + } + + std::cout << "OK" << std::endl; +} diff --git a/DataFormats/SoATemplate/test/SoAStreamer_t.cpp b/DataFormats/SoATemplate/test/SoAStreamer_t.cpp new file mode 100644 index 0000000000000..08b67506051a3 --- /dev/null +++ b/DataFormats/SoATemplate/test/SoAStreamer_t.cpp @@ -0,0 +1,69 @@ +/* + * SoAStreamer_t.cpp + * + * A test validating and the serialization of SoA Layouts to a ROOT file + */ + +#include +#include + +#include +#include + +#include "FakeSoA.h" + +void writeSoA() { + std::cout << "write begin" << std::endl; + constexpr size_t nElements = 128; + + auto buffer = std::make_unique(FakeSoA::computeBufferSize(nElements)); + FakeSoA fsoa(buffer.get(), nElements); + fsoa.dump(); + fsoa.fill(); + if (not fsoa.check()) { + exit(EXIT_FAILURE); + } + + std::unique_ptr myFile(TFile::Open("serializerNoTObj.root", "RECREATE")); + TTree tt("serializerNoTObjTree", "A SoA TTree"); + // In CMSSW, we will get a branch of objects (each row from the branched corresponding to an event) + // So we have a branch with one element for the moment. + [[maybe_unused]] auto Branch = tt.Branch("FakeSoA", &fsoa); + std::cout << "In writeFile(), about to Fill()" << std::endl; + fsoa.dump(); + auto prevGDebug = gDebug; + gDebug = 5; + tt.Fill(); + gDebug = prevGDebug; + tt.Write(); + myFile->Close(); + std::cout << "write end" << std::endl; +} + +void readSoA() { + std::cout << "read begin" << std::endl; + std::unique_ptr myFile(TFile::Open("serializerNoTObj.root", "READ")); + myFile->ls(); + std::unique_ptr fakeSoATree((TTree *)myFile->Get("serializerNoTObjTree")); + fakeSoATree->ls(); + auto prevGDebug = gDebug; + //gDebug = 3; + FakeSoA *fakeSoA = nullptr; + fakeSoATree->SetBranchAddress("FakeSoA", &fakeSoA); + fakeSoATree->GetEntry(0); + gDebug = prevGDebug; + std::cout << "fakeSoAAddress=" << fakeSoA << std::endl; + fakeSoA->dump(); + fakeSoA->dumpData(); + std::cout << "Checking SoA readback..."; + if (not fakeSoA->check()) { + exit(EXIT_FAILURE); + } + std::cout << " OK" << std::endl; +} + +int main() { + writeSoA(); + readSoA(); + return EXIT_SUCCESS; +} diff --git a/DataFormats/SoATemplate/test/classes.h b/DataFormats/SoATemplate/test/classes.h new file mode 100644 index 0000000000000..bba9150237d88 --- /dev/null +++ b/DataFormats/SoATemplate/test/classes.h @@ -0,0 +1 @@ +#include "DataFormats/SoATemplate/test/FakeSoA.h" \ No newline at end of file diff --git a/DataFormats/SoATemplate/test/classes_def.xml b/DataFormats/SoATemplate/test/classes_def.xml new file mode 100644 index 0000000000000..23695971e87e1 --- /dev/null +++ b/DataFormats/SoATemplate/test/classes_def.xml @@ -0,0 +1,14 @@ + + + + allocateAndIoRead(onfile); + ]]> + + diff --git a/HeterogeneousCore/AlpakaCore/BuildFile.xml b/HeterogeneousCore/AlpakaCore/BuildFile.xml new file mode 100644 index 0000000000000..786446de87dce --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/BuildFile.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/HeterogeneousCore/AlpakaCore/interface/MakerMacros.h b/HeterogeneousCore/AlpakaCore/interface/MakerMacros.h new file mode 100644 index 0000000000000..71fece04e7e38 --- /dev/null +++ b/HeterogeneousCore/AlpakaCore/interface/MakerMacros.h @@ -0,0 +1,11 @@ +#ifndef HeterogeneousCore_AlpakaCore_interface_MakerMacros_h +#define HeterogeneousCore_AlpakaCore_interface_MakerMacros_h + +#include "FWCore/Framework/interface/MakerMacros.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +// force expanding ALPAKA_ACCELERATOR_NAMESPACE before stringification inside DEFINE_FWK_MODULE +#define DEFINE_FWK_ALPAKA_MODULE2(name) DEFINE_FWK_MODULE(name) +#define DEFINE_FWK_ALPAKA_MODULE(name) DEFINE_FWK_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name) + +#endif // HeterogeneousCore_AlpakaCore_interface_MakerMacros_h diff --git a/HeterogeneousCore/AlpakaInterface/BuildFile.xml b/HeterogeneousCore/AlpakaInterface/BuildFile.xml new file mode 100644 index 0000000000000..6287a9a38a31f --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/BuildFile.xml @@ -0,0 +1,2 @@ + + diff --git a/HeterogeneousCore/AlpakaInterface/README.md b/HeterogeneousCore/AlpakaInterface/README.md new file mode 100644 index 0000000000000..709098b772665 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/README.md @@ -0,0 +1,6 @@ +## HeterogeneousCore/AlpakaInterface + +This package only depends on the `alpaka` header-only external library, and +provides the interface used by other packages in CMSSW. + +It is safe to be used inside DataFormats packages. diff --git a/HeterogeneousCore/AlpakaInterface/interface/config.h b/HeterogeneousCore/AlpakaInterface/interface/config.h new file mode 100644 index 0000000000000..2ad630440bda4 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/interface/config.h @@ -0,0 +1,164 @@ +#ifndef HeterogeneousCore_AlpakaInterface_interface_config_h +#define HeterogeneousCore_AlpakaInterface_interface_config_h + +#include + +#include + +#include "FWCore/Utilities/interface/stringize.h" + +namespace alpaka_common { + + // common types and dimensions + using Idx = uint32_t; + using Extent = uint32_t; + using Offsets = Extent; + + using Dim0D = alpaka::DimInt<0u>; + using Dim1D = alpaka::DimInt<1u>; + using Dim2D = alpaka::DimInt<2u>; + using Dim3D = alpaka::DimInt<3u>; + + template + using Vec = alpaka::Vec; + using Vec1D = Vec; + using Vec2D = Vec; + using Vec3D = Vec; + using Scalar = Vec; + + template + using WorkDiv = alpaka::WorkDivMembers; + using WorkDiv1D = WorkDiv; + using WorkDiv2D = WorkDiv; + using WorkDiv3D = WorkDiv; + + // host types + using DevHost = alpaka::DevCpu; + using PltfHost = alpaka::Pltf; + +} // namespace alpaka_common + +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED +namespace alpaka_cuda_async { + using namespace alpaka_common; + + using Platform = alpaka::PltfCudaRt; + using Device = alpaka::DevCudaRt; + using Queue = alpaka::QueueCudaRtNonBlocking; + using Event = alpaka::EventCudaRt; + + template + using Acc = alpaka::AccGpuCudaRt; + using Acc1D = Acc; + using Acc2D = Acc; + using Acc3D = Acc; + +} // namespace alpaka_cuda_async + +#ifdef ALPAKA_ACCELERATOR_NAMESPACE +#define ALPAKA_DUPLICATE_NAMESPACE +#else +#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_cuda_async +#define ALPAKA_TYPE_SUFFIX CudaAsync +#endif + +#endif // ALPAKA_ACC_GPU_CUDA_ENABLED + +#ifdef ALPAKA_ACC_GPU_HIP_ENABLED +namespace alpaka_hip_async { + using namespace alpaka_common; + + using Platform = alpaka::PltfHipRt; + using Device = alpaka::DevHipRt; + using Queue = alpaka::QueueHipRtNonBlocking; + using Event = alpaka::EventHipRt; + + template + using Acc = alpaka::AccGpuHipRt; + using Acc1D = Acc; + using Acc2D = Acc; + using Acc3D = Acc; + +} // namespace alpaka_hip_async + +#ifdef ALPAKA_ACCELERATOR_NAMESPACE +#define ALPAKA_DUPLICATE_NAMESPACE +#else +#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_hip_async +#define ALPAKA_TYPE_SUFFIX HipAsync +#endif + +#endif // ALPAKA_ACC_GPU_HIP_ENABLED + +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED +namespace alpaka_serial_sync { + using namespace alpaka_common; + + using Platform = alpaka::PltfCpu; + using Device = alpaka::DevCpu; + using Queue = alpaka::QueueCpuBlocking; + using Event = alpaka::EventCpu; + + template + using Acc = alpaka::AccCpuSerial; + using Acc1D = Acc; + using Acc2D = Acc; + using Acc3D = Acc; + +} // namespace alpaka_serial_sync + +#ifdef ALPAKA_ACCELERATOR_NAMESPACE +#define ALPAKA_DUPLICATE_NAMESPACE +#else +#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_serial_sync +#define ALPAKA_TYPE_SUFFIX SerialSync +#endif + +#endif // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + +#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED +namespace alpaka_tbb_async { + using namespace alpaka_common; + + using Platform = alpaka::PltfCpu; + using Device = alpaka::DevCpu; + using Queue = alpaka::QueueCpuNonBlocking; + using Event = alpaka::EventCpu; + + template + using Acc = alpaka::AccCpuTbbBlocks; + using Acc1D = Acc; + using Acc2D = Acc; + using Acc3D = Acc; + +} // namespace alpaka_tbb_async + +#ifdef ALPAKA_ACCELERATOR_NAMESPACE +#define ALPAKA_DUPLICATE_NAMESPACE +#else +#define ALPAKA_ACCELERATOR_NAMESPACE alpaka_tbb_async +#define ALPAKA_TYPE_SUFFIX TbbAsync +#endif + +#endif // ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED + +#if defined ALPAKA_DUPLICATE_NAMESPACE +#error Only one alpaka backend symbol can be defined at the same time: ALPAKA_ACC_GPU_CUDA_ENABLED, ALPAKA_ACC_GPU_HIP_ENABLED, ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED, ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED. +#endif + +#if defined ALPAKA_ACCELERATOR_NAMESPACE + +// create a new backend-specific identifier based on the original type name and a backend-specific suffix +#define ALPAKA_TYPE_ALIAS__(TYPE, SUFFIX) TYPE##SUFFIX +#define ALPAKA_TYPE_ALIAS_(TYPE, SUFFIX) ALPAKA_TYPE_ALIAS__(TYPE, SUFFIX) +#define ALPAKA_TYPE_ALIAS(TYPE) ALPAKA_TYPE_ALIAS_(TYPE, ALPAKA_TYPE_SUFFIX) + +// declare the backend-specific identifier as an alias for the namespace-based type name +#define DECLARE_ALPAKA_TYPE_ALIAS(TYPE) using ALPAKA_TYPE_ALIAS(TYPE) = ALPAKA_ACCELERATOR_NAMESPACE::TYPE + +// define a null-terminated string containing the backend-specific identifier +#define ALPAKA_TYPE_ALIAS_NAME(TYPE) EDM_STRINGIZE(ALPAKA_TYPE_ALIAS(TYPE)) + +#endif // ALPAKA_ACCELERATOR_NAMESPACE + +#endif // HeterogeneousCore_AlpakaInterface_interface_config_h diff --git a/HeterogeneousCore/AlpakaInterface/interface/host.h b/HeterogeneousCore/AlpakaInterface/interface/host.h new file mode 100644 index 0000000000000..147efd9a819a8 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/interface/host.h @@ -0,0 +1,16 @@ +#ifndef HeterogeneousCore_AlpakaInterface_interface_host_h +#define HeterogeneousCore_AlpakaInterface_interface_host_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace alpaka_common { + + // alpaka host device + static inline DevHost const& host() { + static const auto host = alpaka::getDevByIdx(0u); + return host; + } + +} // namespace alpaka_common + +#endif // HeterogeneousCore_AlpakaInterface_interface_host_h diff --git a/HeterogeneousCore/AlpakaServices/BuildFile.xml b/HeterogeneousCore/AlpakaServices/BuildFile.xml new file mode 100644 index 0000000000000..e165ce9200bbc --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h b/HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h new file mode 100644 index 0000000000000..df9e1bcd38f93 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h @@ -0,0 +1,41 @@ +#ifndef HeterogeneousCore_AlpakaServices_interface_AlpakaService_h +#define HeterogeneousCore_AlpakaServices_interface_AlpakaService_h + +#include + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace edm { + class ActivityRegistry; + class ConfigurationDescriptions; + class ParameterSet; +} // namespace edm + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class AlpakaService { + public: + AlpakaService(edm::ParameterSet const& config, edm::ActivityRegistry&); + ~AlpakaService() = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + bool enabled() const { return enabled_; } + + std::vector const& devices() const { return devices_; } + + Device const& device(uint32_t index) const { return devices_.at(index); } + + private: + bool enabled_ = false; + bool verbose_ = false; + std::vector devices_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DECLARE_ALPAKA_TYPE_ALIAS(AlpakaService); + +#endif // HeterogeneousCore_AlpakaServices_interface_AlpakaService_h diff --git a/HeterogeneousCore/AlpakaServices/plugins/BuildFile.xml b/HeterogeneousCore/AlpakaServices/plugins/BuildFile.xml new file mode 100644 index 0000000000000..0da58dae89970 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/plugins/BuildFile.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + diff --git a/HeterogeneousCore/AlpakaServices/plugins/alpaka/plugins.cc b/HeterogeneousCore/AlpakaServices/plugins/alpaka/plugins.cc new file mode 100644 index 0000000000000..4c3e3da3ff3a2 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/plugins/alpaka/plugins.cc @@ -0,0 +1,5 @@ +#include "FWCore/ServiceRegistry/interface/ServiceMaker.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +DEFINE_FWK_SERVICE(ALPAKA_TYPE_ALIAS(AlpakaService)); diff --git a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc new file mode 100644 index 0000000000000..bc59c755137e8 --- /dev/null +++ b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc @@ -0,0 +1,78 @@ +#include + +#include + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#endif // ALPAKA_ACC_GPU_CUDA_ENABLED + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + AlpakaService::AlpakaService(edm::ParameterSet const& config, edm::ActivityRegistry&) + : enabled_(config.getUntrackedParameter("enabled")), + verbose_(config.getUntrackedParameter("verbose")) { +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + // rely on the CUDAService to initialise the CUDA devices + edm::Service cudaService; +#endif // ALPAKA_ACC_GPU_CUDA_ENABLED + + // TODO from Andrea Bocci: + // - handle alpaka caching allocators ? + // - extract and print more information about the platform and devices + + if (not enabled_) { + edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by configuration"; + return; + } + +#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED + if (not cudaService->enabled()) { + enabled_ = false; + edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService"; + return; + } +#endif // ALPAKA_ACC_GPU_CUDA_ENABLED + + // enumerate all devices on this platform + uint32_t n = alpaka::getDevCount(); + if (n == 0) { + const std::string platform = boost::core::demangle(typeid(Platform).name()); + edm::LogWarning("AlpakaService") << "Could not find any devices on platform " << platform << ".\n" + << "Disabling " << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << "."; + enabled_ = false; + return; + } + + devices_.reserve(n); + for (uint32_t i = 0; i < n; ++i) { + devices_.push_back(alpaka::getDevByIdx(i)); + //assert(getDeviceIndex(devices_.back()) == static_cast(i)); + } + + { + const char* suffix[] = {"s.", ":", "s:"}; + edm::LogInfo out("AlpakaService"); + out << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " succesfully initialised.\n"; + out << "Found " << n << " device" << suffix[n < 2 ? n : 2]; + for (auto const& device : devices_) { + out << "\n - " << alpaka::getName(device); + } + } + } + + void AlpakaService::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.addUntracked("enabled", true); + desc.addUntracked("verbose", false); + + descriptions.add(ALPAKA_TYPE_ALIAS_NAME(AlpakaService), desc); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/BuildFile.xml b/HeterogeneousCore/AlpakaTest/plugins/BuildFile.xml new file mode 100644 index 0000000000000..82a5b499f9d85 --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/BuildFile.xml @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc new file mode 100644 index 0000000000000..8b1779e9e4e6e --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc @@ -0,0 +1,50 @@ +#include +#include + +#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/stream/EDAnalyzer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" + +class TestAlpakaAnalyzer : public edm::stream::EDAnalyzer<> { +public: + TestAlpakaAnalyzer(edm::ParameterSet const& config) + : source_{config.getParameter("source")}, token_{consumes(source_)} {} + + void analyze(edm::Event const& event, edm::EventSetup const&) override { + portabletest::TestHostCollection const& product = event.get(token_); + + auto const& view = product.const_view(); + for (int32_t i = 0; i < view.metadata().size(); ++i) { + assert(view[i].id() == i); + } + + edm::LogInfo msg("TestAlpakaAnalyzer"); + msg << source_.encode() << ".size() = " << view.metadata().size() << '\n'; + msg << "data = " << product.buffer().data() << " x = " << view.x() << " y = " << view.y() << " z = " << view.z() + << " id = " << view.id() << '\n'; + msg << std::hex << "[y - x] = 0x" << reinterpret_cast(view.y()) - reinterpret_cast(view.x()) + << " [z - y] = 0x" << reinterpret_cast(view.z()) - reinterpret_cast(view.y()) + << " [id - z] = 0x" << reinterpret_cast(view.id()) - reinterpret_cast(view.z()); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("source"); + descriptions.addWithDefaultLabel(desc); + } + +private: + const edm::InputTag source_; + const edm::EDGetTokenT token_; +}; + +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(TestAlpakaAnalyzer); diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc new file mode 100644 index 0000000000000..797125d5f330f --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc @@ -0,0 +1,40 @@ +// Check that ALPAKA_HOST_ONLY is not defined during device compilation: +#ifdef ALPAKA_HOST_ONLY +#error ALPAKA_HOST_ONLY defined in device compilation +#endif + +#include + +#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "TestAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class TestAlgoKernel { + public: + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, int32_t size) const { + // this example accepts an arbitrary number of blocks and threads, and always uses 1 element per thread + const int32_t thread = alpaka::getIdx(acc)[0u]; + const int32_t stride = alpaka::getWorkDiv(acc)[0u]; + for (auto i = thread; i < size; i += stride) { + view[i] = {0., 0., 0., i}; + } + } + }; + + void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection) const { + auto const& deviceProperties = alpaka::getAccDevProps(alpaka::getDev(queue)); + uint32_t maxThreadsPerBlock = deviceProperties.m_blockThreadExtentMax[0]; + + uint32_t threadsPerBlock = maxThreadsPerBlock; + uint32_t blocksPerGrid = (collection->metadata().size() + threadsPerBlock - 1) / threadsPerBlock; + uint32_t elementsPerThread = 1; + auto workDiv = WorkDiv1D{blocksPerGrid, threadsPerBlock, elementsPerThread}; + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), collection->metadata().size()); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h new file mode 100644 index 0000000000000..64ed9421121b4 --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h @@ -0,0 +1,16 @@ +#ifndef HeterogeneousCore_AlpakaTest_plugins_alpaka_TestAlgo_h +#define HeterogeneousCore_AlpakaTest_plugins_alpaka_TestAlgo_h + +#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class TestAlgo { + public: + void fill(Queue& queue, portabletest::TestDeviceCollection& collection) const; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // HeterogeneousCore_AlpakaTest_plugins_alpaka_TestAlgo_h diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc new file mode 100644 index 0000000000000..00770e3cc974e --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc @@ -0,0 +1,75 @@ +#include +#include + +#include + +#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +#include "TestAlgo.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class TestAlpakaProducer : public edm::stream::EDProducer<> { + public: + TestAlpakaProducer(edm::ParameterSet const& config) + : deviceToken_{produces()}, size_{config.getParameter("size")} {} + + void beginStream(edm::StreamID sid) override { + // choose a device based on the EDM stream number + edm::Service service; + if (not service->enabled()) { + throw cms::Exception("Configuration") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " is disabled."; + } + auto& devices = service->devices(); + unsigned int index = sid.value() % devices.size(); + device_ = devices[index]; + } + + void produce(edm::Event& event, edm::EventSetup const&) override { + // create a queue to submit async work + Queue queue{*device_}; + portabletest::TestDeviceCollection deviceProduct{size_, *device_}; + + // run the algorithm, potentially asynchronously + algo_.fill(queue, deviceProduct); + + // wait for any asynchronous work to complete + alpaka::wait(queue); + + event.emplace(deviceToken_, std::move(deviceProduct)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("size"); + descriptions.addWithDefaultLabel(desc); + } + + private: + const edm::EDPutTokenT deviceToken_; + const int32_t size_; + + // device associated to the EDM stream + std::optional device_; + + // implementation of the algorithm + TestAlgo algo_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(TestAlpakaProducer); diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaTranscriber.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaTranscriber.cc new file mode 100644 index 0000000000000..d9cea435bbcd5 --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaTranscriber.cc @@ -0,0 +1,77 @@ +// The "Transcriber" makes sense only across different memory spaces +#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) or defined(ALPAKA_ACC_GPU_HIP_ENABLED) + +#include +#include + +#include + +#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h" +#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/StreamID.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaServices/interface/alpaka/AlpakaService.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class TestAlpakaTranscriber : public edm::stream::EDProducer<> { + public: + TestAlpakaTranscriber(edm::ParameterSet const& config) + : deviceToken_{consumes(config.getParameter("source"))}, hostToken_{produces()} {} + + void beginStream(edm::StreamID sid) override { + // choose a device based on the EDM stream number + edm::Service service; + if (not service->enabled()) { + throw cms::Exception("Configuration") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " is disabled."; + } + auto& devices = service->devices(); + unsigned int index = sid.value() % devices.size(); + device_ = devices[index]; + } + + void produce(edm::Event& event, edm::EventSetup const&) override { + // create a queue to submit async work + Queue queue{*device_}; + portabletest::TestDeviceCollection const& deviceProduct = event.get(deviceToken_); + + portabletest::TestHostCollection hostProduct{deviceProduct->metadata().size(), alpaka_common::host(), *device_}; + alpaka::memcpy(queue, hostProduct.buffer(), deviceProduct.const_buffer()); + + // wait for any async work to complete + alpaka::wait(queue); + + event.emplace(hostToken_, std::move(hostProduct)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("source"); + descriptions.addWithDefaultLabel(desc); + } + + private: + const edm::EDGetTokenT deviceToken_; + const edm::EDPutTokenT hostToken_; + + // device associated to the EDM stream + std::optional device_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(TestAlpakaTranscriber); + +#endif // defined(ALPAKA_ACC_GPU_CUDA_ENABLED) or defined(ALPAKA_ACC_GPU_HIP_ENABLED) diff --git a/HeterogeneousCore/AlpakaTest/test/BuildFile.xml b/HeterogeneousCore/AlpakaTest/test/BuildFile.xml new file mode 100644 index 0000000000000..c53ee8acdb820 --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/test/BuildFile.xml @@ -0,0 +1,4 @@ + diff --git a/HeterogeneousCore/AlpakaTest/test/reader.py b/HeterogeneousCore/AlpakaTest/test/reader.py new file mode 100644 index 0000000000000..8ad854082d38f --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/test/reader.py @@ -0,0 +1,27 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process('Reader') + +# read the products from a 'test.root' file +process.source = cms.Source('PoolSource', + fileNames = cms.untracked.vstring('file:test.root') +) + +# enable logging for the TestAlpakaAnalyzer +process.MessageLogger.TestAlpakaAnalyzer = cms.untracked.PSet() + +# analyse the first product +process.testAnalyzer = cms.EDAnalyzer('TestAlpakaAnalyzer', + source = cms.InputTag('testProducer') +) + +# analyse the second product +process.testAnalyzerSerial = cms.EDAnalyzer('TestAlpakaAnalyzer', + source = cms.InputTag('testProducerSerial') +) + +process.cuda_path = cms.Path(process.testAnalyzer) + +process.serial_path = cms.Path(process.testAnalyzerSerial) + +process.maxEvents.input = 10 diff --git a/HeterogeneousCore/AlpakaTest/test/writer.py b/HeterogeneousCore/AlpakaTest/test/writer.py new file mode 100644 index 0000000000000..57fcb51a376f2 --- /dev/null +++ b/HeterogeneousCore/AlpakaTest/test/writer.py @@ -0,0 +1,81 @@ +import FWCore.ParameterSet.Config as cms +from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA + +process = cms.Process('Writer') + +process.source = cms.Source('EmptySource') + +process.load('Configuration.StandardSequences.Accelerators_cff') + +# enable logging for the AlpakaService and TestAlpakaAnalyzer +process.MessageLogger.TestAlpakaAnalyzer = cms.untracked.PSet() +process.MessageLogger.AlpakaService = cms.untracked.PSet() + +# enable alpaka-based heterogeneous modules +process.AlpakaServiceCudaAsync = cms.Service('AlpakaServiceCudaAsync') +process.AlpakaServiceSerialSync = cms.Service('AlpakaServiceSerialSync') + +# run the producer on a CUDA gpu (if available) +process.testProducerCuda = cms.EDProducer('alpaka_cuda_async::TestAlpakaProducer', + size = cms.int32(42) +) + +# copy the product from the gpu (if available) to the host +process.testTranscriberFromCuda = cms.EDProducer('alpaka_cuda_async::TestAlpakaTranscriber', + source = cms.InputTag('testProducerCuda') +) + +# run the producer on the cpu +process.testProducerCpu = cms.EDProducer('alpaka_serial_sync::TestAlpakaProducer', + size = cms.int32(42) +) + +# either run the producer on a CUDA gpu (if available) and copy the product to the cpu, or run the producer directly on the cpu +process.testProducer = SwitchProducerCUDA( + cpu = cms.EDAlias( + testProducerCpu = cms.VPSet(cms.PSet(type = cms.string('*'))) + ), + cuda = cms.EDAlias( + testTranscriberFromCuda = cms.VPSet(cms.PSet(type = cms.string('*'))) + ) +) + +# analyse the product +process.testAnalyzer = cms.EDAnalyzer('TestAlpakaAnalyzer', + source = cms.InputTag('testProducer') +) + +# run a second producer explicitly on the cpu +process.testProducerSerial = cms.EDProducer('alpaka_serial_sync::TestAlpakaProducer', + size = cms.int32(99) +) + +# analyse the second product +process.testAnalyzerSerial = cms.EDAnalyzer('TestAlpakaAnalyzer', + source = cms.InputTag('testProducerSerial') +) + +# write the two products to a 'test.root' file +process.output = cms.OutputModule('PoolOutputModule', + fileName = cms.untracked.string('test.root'), + outputCommands = cms.untracked.vstring( + 'drop *', + 'keep *_testProducer_*_*', + 'keep *_testProducerSerial_*_*', + ) +) + +process.producer_task = cms.Task(process.testProducerCuda, process.testTranscriberFromCuda, process.testProducerCpu) + +process.process_path = cms.Path( + process.testProducer + + process.testAnalyzer, + process.producer_task) + +process.serial_path = cms.Path( + process.testProducerSerial + + process.testAnalyzerSerial) + +process.output_path = cms.EndPath(process.output) + +process.maxEvents.input = 10