diff --git a/DataFormats/TrackSoA/BuildFile.xml b/DataFormats/TrackSoA/BuildFile.xml
new file mode 100644
index 0000000000000..ac764cf5b95ff
--- /dev/null
+++ b/DataFormats/TrackSoA/BuildFile.xml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/TrackSoA/README.md b/DataFormats/TrackSoA/README.md
new file mode 100644
index 0000000000000..433dfb0d656c7
--- /dev/null
+++ b/DataFormats/TrackSoA/README.md
@@ -0,0 +1,60 @@
+# TrackSoA Data Formats
+
+`DataFormat`s meant to be used on Host (CPU) or Device (GPU) for
+storing information about `TrackSoA`s created during the Pixel-local Reconstruction
+chain. It stores data in an SoA manner.
+
+The host format is inheriting from `DataFormats/Portable/interface/PortableHostCollection.h`,
+while the device format is inheriting from `DataFormats/Portable/interface/PortableDeviceCollection.h`
+
+Both formats use the same SoA Layout (`TrackSoA::Layout`) which is generated
+via the `GENERATE_SOA_LAYOUT` macro in the `TrackDefinitions.h` file.
+
+## Notes
+
+-`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the
+layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA
+`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size)
+is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this.
+- Host and Device classes should **not** be created via inheritance, as they're done here,
+but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309).
+
+## TracksHost
+
+The version of the data format to be used for storing `TrackSoA` data on the CPU.
+Instances of this class are to be used for:
+
+- Having a place to copy data to host from device, via `Memcpy`, or
+- Running host-side algorithms using data stored in an SoA manner.
+
+## TracksDevice
+
+The version of the data format to be used for storing `TrackSoA` data on the GPU.
+
+Instances of `TracksDevice` are to be created on host and be
+used on device only. To do so, the instance's `view()` method is to be called
+to pass a `View` to any kernel launched. Accessing data from the `view()` is not
+possible on the host side.
+
+## TracksSoACollection
+
+Depending on the Alpaka accelerator back-end enabled, `TrackSoACollection` is an alias to either the Host or Device SoA:
+
+```cpp
+template
+ using TrackSoACollection = std::conditional_t,
+ TrackSoAHost,
+ TrackSoADevice>;
+```
+
+## Utilities
+
+`alpaka/TrackUtilities.h` contains a collection of methods which were originally
+defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT`
+which have been adapted to operate on `View` instances, so that they are callable
+from within `__global__` kernels, on both CPU and CPU.
+
+## Use case
+
+See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation,
+processing and copying from device to host.
diff --git a/DataFormats/TrackSoA/interface/TrackDefinitions.h b/DataFormats/TrackSoA/interface/TrackDefinitions.h
new file mode 100644
index 0000000000000..6bd36b5bd3cd1
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/TrackDefinitions.h
@@ -0,0 +1,32 @@
+#ifndef DataFormats_Track_interface_TrackDefinitions_h
+#define DataFormats_Track_interface_TrackDefinitions_h
+#include
+#include
+#include
+
+namespace pixelTrack {
+
+ enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality };
+ constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)};
+ constexpr std::string_view qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"};
+ inline Quality qualityByName(std::string_view name) {
+ auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName;
+ auto ret = static_cast(qp);
+
+ if (ret == pixelTrack::Quality::notQuality)
+ throw std::invalid_argument(std::string(name) + " is not a pixelTrack::Quality!");
+
+ return ret;
+ }
+
+#ifdef GPU_SMALL_EVENTS
+ // kept for testing and debugging
+ constexpr uint32_t maxNumber() { return 2 * 1024; }
+#else
+ // tested on MC events with 55-75 pileup events
+ constexpr uint32_t maxNumber() { return 32 * 1024; }
+#endif
+
+} // namespace pixelTrack
+
+#endif
diff --git a/DataFormats/TrackSoA/interface/TracksDevice.h b/DataFormats/TrackSoA/interface/TracksDevice.h
new file mode 100644
index 0000000000000..6ef28014bab63
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/TracksDevice.h
@@ -0,0 +1,38 @@
+#ifndef DataFormats_Track_interface_TracksDevice_h
+#define DataFormats_Track_interface_TracksDevice_h
+
+#include
+#include
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/Portable/interface/PortableDeviceCollection.h"
+
+// TODO: The class is created via inheritance of the PortableCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+template
+class TracksDevice : public PortableDeviceCollection, TDev> {
+public:
+ static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime
+ TracksDevice() = default; // necessary for ROOT dictionaries
+
+ using PortableDeviceCollection, TDev>::view;
+ using PortableDeviceCollection, TDev>::const_view;
+ using PortableDeviceCollection, TDev>::buffer;
+
+ // Constructor which specifies the SoA size
+ template
+ explicit TracksDevice(TQueue& queue)
+ : PortableDeviceCollection, TDev>(S, queue) {}
+};
+
+namespace pixelTrack {
+
+ template
+ using TracksDevicePhase1 = TracksDevice;
+ template
+ using TracksDevicePhase2 = TracksDevice;
+
+} // namespace pixelTrack
+
+#endif // DataFormats_Track_TracksDevice_H
diff --git a/DataFormats/TrackSoA/interface/TracksHost.h b/DataFormats/TrackSoA/interface/TracksHost.h
new file mode 100644
index 0000000000000..a8f459eac066c
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/TracksHost.h
@@ -0,0 +1,42 @@
+#ifndef DataFormats_Track_TracksHost_H
+#define DataFormats_Track_TracksHost_H
+
+#include
+#include
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/Portable/interface/PortableHostCollection.h"
+
+// TODO: The class is created via inheritance of the PortableHostCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+template
+class TracksHost : public PortableHostCollection> {
+public:
+ static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime
+ TracksHost() = default; // Needed for the dictionary; not sure if line above is needed anymore
+
+ using PortableHostCollection>::view;
+ using PortableHostCollection>::const_view;
+ using PortableHostCollection>::buffer;
+
+ // Constructor which specifies the SoA size
+ template
+ explicit TracksHost(TQueue& queue)
+ : PortableHostCollection>(S, queue) {}
+
+ // Constructor which specifies the DevHost
+ explicit TracksHost(alpaka_common::DevHost const& host)
+ : PortableHostCollection>(S, host) {}
+};
+
+namespace pixelTrack {
+
+ using TracksHostPhase1 = TracksHost;
+ using TracksHostPhase2 = TracksHost;
+ using TracksHostHIonPhase1 = TracksHost;
+
+} // namespace pixelTrack
+
+#endif // DataFormats_Track_TracksHost_H
diff --git a/DataFormats/TrackSoA/interface/TracksSoA.h b/DataFormats/TrackSoA/interface/TracksSoA.h
new file mode 100644
index 0000000000000..bc3a8c4be9cb5
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/TracksSoA.h
@@ -0,0 +1,56 @@
+#ifndef DataFormats_Track_interface_TrackLayout_h
+#define DataFormats_Track_interface_TrackLayout_h
+
+#include
+#include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+
+namespace reco {
+
+ template
+ struct TrackSoA {
+ static constexpr int32_t S = TrackerTraits::maxNumberOfTuples;
+ static constexpr int32_t H = TrackerTraits::avgHitsPerTrack;
+ // Aliases in order to not confuse the GENERATE_SOA_LAYOUT
+ // macro with weird colons and angled brackets.
+ using Vector5f = Eigen::Matrix;
+ using Vector15f = Eigen::Matrix;
+ using Quality = pixelTrack::Quality;
+
+ using hindex_type = uint32_t;
+
+ using HitContainer = cms::alpakatools::OneToManyAssocSequential;
+
+ GENERATE_SOA_LAYOUT(Layout,
+ SOA_COLUMN(Quality, quality),
+ SOA_COLUMN(float, chi2),
+ SOA_COLUMN(int8_t, nLayers),
+ SOA_COLUMN(float, eta),
+ SOA_COLUMN(float, pt),
+ SOA_EIGEN_COLUMN(Vector5f, state),
+ SOA_EIGEN_COLUMN(Vector15f, covariance),
+ SOA_SCALAR(int, nTracks),
+ SOA_SCALAR(HitContainer, hitIndices),
+ SOA_SCALAR(HitContainer, detIndices))
+ };
+
+ template
+ using TrackLayout = typename reco::TrackSoA::template Layout<>;
+ template
+ using TrackSoAView = typename reco::TrackSoA::template Layout<>::View;
+ template
+ using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView;
+
+ template
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks,
+ int32_t i) {
+ //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23
+ float v = tracks[i].state()(2);
+ return float((0.0f < v) - (v < 0.0f));
+ }
+
+} // namespace reco
+
+#endif
diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h
new file mode 100644
index 0000000000000..8affb29845779
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h
@@ -0,0 +1,197 @@
+#ifndef DataFormats_Track_interface_alpaka_TrackUtilities_h
+#define DataFormats_Track_interface_alpaka_TrackUtilities_h
+
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+
+// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods.
+template
+struct TracksUtilities {
+ using TrackSoAView = typename reco::TrackSoA::template Layout<>::View;
+ using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView;
+ using hindex_type = typename reco::TrackSoA::hindex_type;
+
+ // State at the Beam spot
+ // phi,tip,1/pt,cotan(theta),zip
+ /* ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float charge(const TrackSoAConstView &tracks, int32_t i) {
+ //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23
+ float v = tracks[i].state()(2);
+ return float((0.0f < v) - (v < 0.0f));
+ }
+*/
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float phi(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(0);
+ }
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float tip(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(1);
+ }
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr float zip(const TrackSoAConstView &tracks, int32_t i) {
+ return tracks[i].state()(4);
+ }
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr bool isTriplet(const TrackSoAConstView &tracks, int i) {
+ return tracks[i].nLayers() == 3;
+ }
+
+ template
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromCircle(
+ TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) {
+ tracks[i].state() << cp.template cast(), lp.template cast();
+
+ tracks[i].state()(2) = tracks[i].state()(2) * b;
+ auto cov = tracks[i].covariance();
+ cov(0) = ccov(0, 0);
+ cov(1) = ccov(0, 1);
+ cov(2) = b * float(ccov(0, 2));
+ cov(4) = cov(3) = 0;
+ cov(5) = ccov(1, 1);
+ cov(6) = b * float(ccov(1, 2));
+ cov(8) = cov(7) = 0;
+ cov(9) = b * b * float(ccov(2, 2));
+ cov(11) = cov(10) = 0;
+ cov(12) = lcov(0, 0);
+ cov(13) = lcov(0, 1);
+ cov(14) = lcov(1, 1);
+ }
+
+ template
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromDense(TrackSoAView &tracks,
+ V5 const &v,
+ M5 const &cov,
+ int32_t i) {
+ tracks[i].state() = v.template cast();
+ for (int j = 0, ind = 0; j < 5; ++j)
+ for (auto k = j; k < 5; ++k)
+ tracks[i].covariance()(ind++) = cov(j, k);
+ }
+
+ template
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyToDense(const TrackSoAConstView &tracks,
+ V5 &v,
+ M5 &cov,
+ int32_t i) {
+ v = tracks[i].state().template cast();
+ for (int j = 0, ind = 0; j < 5; ++j) {
+ cov(j, j) = tracks[i].covariance()(ind++);
+ for (auto k = j + 1; k < 5; ++k)
+ cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++);
+ }
+ }
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int computeNumberOfLayers(const TrackSoAConstView &tracks,
+ int32_t i) {
+ auto pdet = tracks.detIndices().begin(i);
+ int nl = 1;
+ auto ol = pixelTopology::getLayer(*pdet);
+ for (; pdet < tracks.detIndices().end(i); ++pdet) {
+ auto il = pixelTopology::getLayer(*pdet);
+ if (il != ol)
+ ++nl;
+ ol = il;
+ }
+ return nl;
+ }
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int nHits(const TrackSoAConstView &tracks, int i) {
+ return tracks.detIndices().size(i);
+ }
+};
+
+namespace pixelTrack {
+
+ template
+ struct QualityCutsT {};
+
+ template
+ struct QualityCutsT> {
+ using TrackSoAView = typename reco::TrackSoA::template Layout<>::View;
+ using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView;
+ using tracksHelper = TracksUtilities;
+ float chi2Coeff[4];
+ float chi2MaxPt; // GeV
+ float chi2Scale;
+
+ struct Region {
+ float maxTip; // cm
+ float minPt; // GeV
+ float maxZip; // cm
+ };
+
+ Region triplet;
+ Region quadruplet;
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
+ // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip)
+ // default cuts:
+ // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm
+ // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
+ // (see CAHitNtupletGeneratorGPU.cc)
+ auto const ®ion = (nHits > 3) ? quadruplet : triplet;
+ return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and
+ (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip);
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const {
+ auto roughLog = [](float x) {
+ // max diff [0.5,12] at 1.25 0.16143
+ // average diff 0.0662998
+ union IF {
+ uint32_t i;
+ float f;
+ };
+ IF z;
+ z.f = x;
+ uint32_t lsb = 1 < 21;
+ z.i += lsb;
+ z.i >>= 21;
+ auto f = z.i & 3;
+ int ex = int(z.i >> 2) - 127;
+
+ // log2(1+0.25*f)
+ // averaged over bins
+ const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
+ return float(ex) + frac[f];
+ };
+
+ float pt = std::min(tracks.pt(it), chi2MaxPt);
+ float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]);
+ if (tracks.chi2(it) >= chi2Cut) {
+#ifdef NTUPLE_FIT_DEBUG
+ printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it));
+#endif
+ return true;
+ }
+ return false;
+ }
+ };
+
+ template
+ struct QualityCutsT> {
+ using TrackSoAView = typename reco::TrackSoA::template Layout<>::View;
+ using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView;
+ using tracksHelper = TracksUtilities;
+
+ float maxChi2;
+ float minPt;
+ float maxTip;
+ float maxZip;
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
+ return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and
+ (std::abs(tracksHelper::zip(tracks, it)) < maxZip);
+ }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool strictCut(const TrackSoAConstView &tracks, int it) const {
+ return tracks.chi2(it) >= maxChi2;
+ }
+ };
+
+} // namespace pixelTrack
+
+// TODO: Should those be placed in the ALPAKA_ACCELERATOR_NAMESPACE
+template struct TracksUtilities;
+template struct TracksUtilities;
+
+#endif
diff --git a/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h b/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h
new file mode 100644
index 0000000000000..62e9f69e34636
--- /dev/null
+++ b/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h
@@ -0,0 +1,52 @@
+#ifndef DataFormats_Track_interface_alpaka_TracksSoACollection_h
+#define DataFormats_Track_interface_alpaka_TracksSoACollection_h
+
+#include
+#include
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "DataFormats/Portable/interface/alpaka/PortableCollection.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/TrackSoA/interface/TracksHost.h"
+#include "DataFormats/TrackSoA/interface/TracksDevice.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h"
+
+// TODO: The class is created via inheritance of the PortableCollection.
+// This is generally discouraged, and should be done via composition.
+// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+
+ template
+ using TracksSoACollection = std::conditional_t,
+ TracksHost,
+ TracksDevice>;
+
+ //Classes definition for Phase1/Phase2/HIonPhase1, to make the classes_def lighter. Not actually used in the code.
+ namespace pixelTrack {
+ using TracksSoACollectionPhase1 = TracksSoACollection;
+ using TracksSoACollectionPhase2 = TracksSoACollection;
+ using TracksSoACollectionHIonPhase1 = TracksSoACollection;
+ } // namespace pixelTrack
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
+
+namespace cms::alpakatools {
+ template
+ struct CopyToHost> {
+ template
+ static auto copyAsync(TQueue& queue, TracksDevice const& deviceData) {
+ ::TracksHost hostData(queue);
+ alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer());
+#ifdef GPU_DEBUG
+ printf("TracksSoACollection: I'm copying to host.\n");
+#endif
+ return hostData;
+ }
+ };
+} // namespace cms::alpakatools
+
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionPhase1, pixelTrack::TracksHostPhase1);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionPhase2, pixelTrack::TracksHostPhase2);
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionHIonPhase1, pixelTrack::TracksHostHIonPhase1);
+
+#endif // DataFormats_Track_interface_alpaka_TracksSoACollection_h
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h
new file mode 100644
index 0000000000000..4783184611401
--- /dev/null
+++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda.h
@@ -0,0 +1,14 @@
+
+#ifndef DataFormats_TrackSoA_src_alpaka_classes_cuda_h
+#define DataFormats_TrackSoA_src_alpaka_classes_cuda_h
+
+#include "DataFormats/Common/interface/DeviceProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h"
+#include "DataFormats/TrackSoA/interface/TracksDevice.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+
+using namespace reco;
+
+#endif // DataFormats_TrackSoA_src_alpaka_classes_cuda_h
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml
new file mode 100644
index 0000000000000..c04ca173c49f9
--- /dev/null
+++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h
new file mode 100644
index 0000000000000..38143a6058c36
--- /dev/null
+++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm.h
@@ -0,0 +1,14 @@
+
+#ifndef DataFormats_TrackSoA_src_alpaka_classes_rocm_h
+#define DataFormats_TrackSoA_src_alpaka_classes_rocm_h
+
+#include "DataFormats/Common/interface/DeviceProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h"
+#include "DataFormats/TrackSoA/interface/TracksDevice.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+
+using namespace reco;
+
+#endif // DataFormats_TrackSoA_src_alpaka_classes_rocm_h
diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml
new file mode 100644
index 0000000000000..b7e40aedead42
--- /dev/null
+++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/TrackSoA/src/classes.cc b/DataFormats/TrackSoA/src/classes.cc
new file mode 100644
index 0000000000000..97e00cc5b5638
--- /dev/null
+++ b/DataFormats/TrackSoA/src/classes.cc
@@ -0,0 +1,9 @@
+#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+
+using namespace reco;
+
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>);
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>);
+// SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); //TODO: For the moment we live without HIons
diff --git a/DataFormats/TrackSoA/src/classes.h b/DataFormats/TrackSoA/src/classes.h
new file mode 100644
index 0000000000000..43d40e5f8f3ac
--- /dev/null
+++ b/DataFormats/TrackSoA/src/classes.h
@@ -0,0 +1,11 @@
+#ifndef DataFormats_TrackSoA_src_classes_h
+#define DataFormats_TrackSoA_src_classes_h
+
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "DataFormats/TrackSoA/interface/TracksHost.h"
+
+using namespace pixelTopology;
+using namespace reco;
+
+#endif // DataFormats_TrackSoA_src_classes_h
diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml
new file mode 100644
index 0000000000000..fd8fc0781ee25
--- /dev/null
+++ b/DataFormats/TrackSoA/src/classes_def.xml
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/TrackSoA/test/BuildFile.xml b/DataFormats/TrackSoA/test/BuildFile.xml
new file mode 100644
index 0000000000000..ce2b273d90577
--- /dev/null
+++ b/DataFormats/TrackSoA/test/BuildFile.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc
new file mode 100644
index 0000000000000..f4af0688ca1bf
--- /dev/null
+++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc
@@ -0,0 +1,82 @@
+/**
+ Simple test for the pixelTrack::TrackSoA data structure
+ which inherits from PortableDeviceCollection.
+
+ Creates an instance of the class (automatically allocates
+ memory on device), passes the view of the SoA data to
+ the CUDA kernels which:
+ - Fill the SoA with data.
+ - Verify that the data written is correct.
+
+ Then, the SoA data are copied back to Host, where
+ a temporary host-side view (tmp_view) is created using
+ the same Layout to access the data on host and print it.
+ */
+
+#include
+#include
+#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h"
+#include "DataFormats/TrackSoA/interface/TracksDevice.h"
+#include "DataFormats/TrackSoA/interface/TracksHost.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/host.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+
+using namespace std;
+using namespace reco;
+using namespace ALPAKA_ACCELERATOR_NAMESPACE;
+using namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelTrack;
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ namespace testTrackSoA {
+
+ template
+ void runKernels(TrackSoAView tracks_view, Queue& queue);
+ }
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
+
+int main() {
+ const auto host = cms::alpakatools::host();
+ const auto device = cms::alpakatools::devices()[0];
+ Queue queue(device);
+
+ // Inner scope to deallocate memory before destroying the stream
+ {
+ // Instantiate tracks on device. PortableDeviceCollection allocates
+ // SoA on device automatically.
+ TracksSoACollection tracks_d(queue);
+ testTrackSoA::runKernels(tracks_d.view(), queue);
+
+ // Instantate tracks on host. This is where the data will be
+ // copied to from device.
+ TracksHost tracks_h(queue);
+
+ std::cout << tracks_h.view().metadata().size() << std::endl;
+ alpaka::memcpy(queue, tracks_h.buffer(), tracks_d.const_buffer());
+ alpaka::wait(queue);
+
+ // Print results
+ std::cout << "pt"
+ << "\t"
+ << "eta"
+ << "\t"
+ << "chi2"
+ << "\t"
+ << "quality"
+ << "\t"
+ << "nLayers"
+ << "\t"
+ << "hitIndices off" << std::endl;
+
+ for (int i = 0; i < 10; ++i) {
+ std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2()
+ << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t"
+ << tracks_h.view().hitIndices().off[i] << std::endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc
new file mode 100644
index 0000000000000..2c2d0961eb106
--- /dev/null
+++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc
@@ -0,0 +1,74 @@
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h"
+#include "DataFormats/TrackSoA/interface/TracksDevice.h"
+#include "DataFormats/TrackSoA/interface/TracksHost.h"
+
+using namespace reco;
+
+using Quality = pixelTrack::Quality;
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ using namespace cms::alpakatools;
+ namespace testTrackSoA {
+
+ // Kernel which fills the TrackSoAView with data
+ // to test writing to it
+ template
+ class TestFillKernel {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAView tracks_view) const {
+ if (cms::alpakatools::once_per_grid(acc)) {
+ tracks_view.nTracks() = 420;
+ }
+
+ for (int32_t j : elements_with_stride(acc, tracks_view.metadata().size())) {
+ tracks_view[j].pt() = (float)j;
+ tracks_view[j].eta() = (float)j;
+ tracks_view[j].chi2() = (float)j;
+ tracks_view[j].quality() = (Quality)(j % 256);
+ tracks_view[j].nLayers() = j % 128;
+ tracks_view.hitIndices().off[j] = j;
+ }
+ }
+ };
+
+ // Kernel which reads from the TrackSoAView to verify
+ // that it was written correctly from the fill kernel
+ template
+ class TestVerifyKernel {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackSoAConstView tracks_view) const {
+ if (cms::alpakatools::once_per_grid(acc)) {
+ ALPAKA_ASSERT_OFFLOAD(tracks_view.nTracks() == 420);
+ }
+ for (int32_t j : elements_with_stride(acc, tracks_view.nTracks())) {
+ assert(abs(tracks_view[j].pt() - (float)j) < .0001);
+ assert(abs(tracks_view[j].eta() - (float)j) < .0001);
+ assert(abs(tracks_view[j].chi2() - (float)j) < .0001);
+ assert(tracks_view[j].quality() == (Quality)(j % 256));
+ assert(tracks_view[j].nLayers() == j % 128);
+ assert(tracks_view.hitIndices().off[j] == uint32_t(j));
+ }
+ }
+ };
+
+ // Host function which invokes the two kernels above
+ template
+ void runKernels(TrackSoAView tracks_view, Queue& queue) {
+ uint32_t items = 64;
+ uint32_t groups = divide_up_by(tracks_view.metadata().size(), items);
+ auto workDiv = make_workdiv(groups, items);
+ alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view);
+ alpaka::exec(queue,
+ workDiv,
+ TestVerifyKernel{},
+ tracks_view); //TODO: wait for some PR that solves this and then check it!!!
+ }
+
+ template void runKernels(TrackSoAView tracks_view, Queue& queue);
+ template void runKernels(TrackSoAView tracks_view, Queue& queue);
+
+ } // namespace testTrackSoA
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/DataFormats/VertexSoA/BuildFile.xml b/DataFormats/VertexSoA/BuildFile.xml
new file mode 100644
index 0000000000000..af53fc68f5a45
--- /dev/null
+++ b/DataFormats/VertexSoA/BuildFile.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/DataFormats/VertexSoA/README.md b/DataFormats/VertexSoA/README.md
new file mode 100644
index 0000000000000..54172eda14281
--- /dev/null
+++ b/DataFormats/VertexSoA/README.md
@@ -0,0 +1,45 @@
+# Vertex Portable Data Formats
+
+`DataFormat`s meant to be used on Host (CPU) or Device (GPU) for
+storing information about vertices created during the Pixel-local Reconstruction
+chain. It stores data in an SoA manner. It contains the data that was previously
+contained in the deprecated `ZVertexSoA` class.
+
+The host format is inheriting from `DataFormats/Common/interface/PortableHostCollection.h`,
+while the device format is inheriting from `DataFormats/Common/interface/PortableDeviceCollection.h`
+
+Both formats use the same SoA Layout (`ZVertexLayout`) which is generated
+via the `GENERATE_SOA_LAYOUT` macro in the `ZVertexUtilities.h` file.
+
+## Notes
+
+- Initially, `ZVertexSoA` had distinct array sizes for each attribute (e.g. `zv` was `MAXVTX` elements
+long, `ndof` was `MAXTRACKS` elements long). All columns are now of uniform `MAXTRACKS` size,
+meaning that there will be some wasted space (appx. 190kB).
+- Host and Device classes should **not** be created via inheritance, as they're done here,
+but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309).
+
+## ZVertexHeterogeneousHost
+
+The version of the data format to be used for storing vertex data on the CPU.
+Instances of this class are to be used for:
+
+- Having a place to copy data to host from device, via `cudaMemcpy`, or
+- Running host-side algorithms using data stored in an SoA manner.
+
+## ZVertexHeterogeneousDevice
+
+The version of the data format to be used for storing vertex data on the GPU.
+
+Instances of `ZVertexHeterogeneousDevice` are to be created on host and be
+used on device only. To do so, the instance's `view()` method is to be called
+to pass a `View` to any kernel launched. Accessing data from the `view()` is not
+possible on the host side.
+
+## Utilities
+
+Apart from `ZVertexLayout`, `ZVertexUtilities.h` also contains
+a collection of methods which were originally
+defined as class methods inside the `ZVertexSoA` class
+which have been adapted to operate on `View` instances, so that they are callable
+from within `__global__` kernels, on both CPU and CPU.
diff --git a/DataFormats/VertexSoA/interface/ZVertexDefinitions.h b/DataFormats/VertexSoA/interface/ZVertexDefinitions.h
new file mode 100644
index 0000000000000..028668d1ff52a
--- /dev/null
+++ b/DataFormats/VertexSoA/interface/ZVertexDefinitions.h
@@ -0,0 +1,13 @@
+#ifndef DataFormats_VertexSoA_ZVertexDefinitions_h
+#define DataFormats_VertexSoA_ZVertexDefinitions_h
+
+#include
+
+namespace zVertex {
+
+ constexpr uint32_t MAXTRACKS = 32 * 1024;
+ constexpr uint32_t MAXVTX = 1024;
+
+} // namespace zVertex
+
+#endif
diff --git a/DataFormats/VertexSoA/interface/ZVertexDevice.h b/DataFormats/VertexSoA/interface/ZVertexDevice.h
new file mode 100644
index 0000000000000..8d120ae190f3c
--- /dev/null
+++ b/DataFormats/VertexSoA/interface/ZVertexDevice.h
@@ -0,0 +1,26 @@
+#ifndef DataFormats_VertexSoA_interface_ZVertexDevice_h
+#define DataFormats_VertexSoA_interface_ZVertexDevice_h
+
+#include
+
+#include
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDefinitions.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+#include "DataFormats/Portable/interface/PortableDeviceCollection.h"
+
+template
+class ZVertexDeviceSoA : public PortableDeviceCollection, TDev> {
+public:
+ ZVertexDeviceSoA() = default; // necessary for ROOT dictionaries
+
+ // Constructor which specifies the SoA size
+ template
+ explicit ZVertexDeviceSoA(TQueue queue) : PortableDeviceCollection, TDev>(S, queue) {}
+};
+
+using namespace ::zVertex;
+template
+using ZVertexDevice = ZVertexDeviceSoA;
+
+#endif // DataFormats_VertexSoA_interface_ZVertexDevice_h
diff --git a/DataFormats/VertexSoA/interface/ZVertexHost.h b/DataFormats/VertexSoA/interface/ZVertexHost.h
new file mode 100644
index 0000000000000..2d72b83bfe385
--- /dev/null
+++ b/DataFormats/VertexSoA/interface/ZVertexHost.h
@@ -0,0 +1,29 @@
+#ifndef DataFormats_VertexSoA_ZVertexHost_H
+#define DataFormats_VertexSoA_ZVertexHost_H
+
+#include
+
+#include
+
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDefinitions.h"
+#include "DataFormats/Portable/interface/PortableHostCollection.h"
+
+template
+class ZVertexHostSoA : public PortableHostCollection {
+public:
+ ZVertexHostSoA() = default;
+
+ // Constructor which specifies the queue
+ template
+ explicit ZVertexHostSoA(TQueue queue) : PortableHostCollection(S, queue) {}
+
+ // Constructor which specifies the DevHost
+ explicit ZVertexHostSoA(alpaka_common::DevHost const& host) : PortableHostCollection(S, host) {}
+};
+
+//using namespace ::zVertex;
+using ZVertexHost = ZVertexHostSoA;
+
+#endif // DataFormats_VertexSoA_ZVertexHost_H
diff --git a/DataFormats/VertexSoA/interface/ZVertexSoA.h b/DataFormats/VertexSoA/interface/ZVertexSoA.h
new file mode 100644
index 0000000000000..045603618acd7
--- /dev/null
+++ b/DataFormats/VertexSoA/interface/ZVertexSoA.h
@@ -0,0 +1,31 @@
+#ifndef DataFormats_VertexSoA_interface_ZVertexSoA_h
+#define DataFormats_VertexSoA_interface_ZVertexSoA_h
+
+#include
+
+#include
+
+#include "DataFormats/SoATemplate/interface/SoALayout.h"
+
+namespace reco {
+
+ GENERATE_SOA_LAYOUT(ZVertexLayout,
+ SOA_COLUMN(int16_t, idv),
+ SOA_COLUMN(float, zv),
+ SOA_COLUMN(float, wv),
+ SOA_COLUMN(float, chi2),
+ SOA_COLUMN(float, ptv2),
+ SOA_COLUMN(int32_t, ndof),
+ SOA_COLUMN(uint16_t, sortInd),
+ SOA_SCALAR(uint32_t, nvFinal))
+
+ // Common types for both Host and Device code
+ using ZVertexSoA = ZVertexLayout<>;
+ using ZVertexSoAView = ZVertexSoA::View;
+ using ZVertexSoAConstView = ZVertexSoA::ConstView;
+
+ ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; }
+
+} // namespace reco
+
+#endif // DataFormats_VertexSoA_interface_ZVertexSoA_h
diff --git a/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h b/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h
new file mode 100644
index 0000000000000..636a07e2bd978
--- /dev/null
+++ b/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h
@@ -0,0 +1,39 @@
+#ifndef DataFormats_VertexSoA_interface_ZVertexSoACollection_h
+#define DataFormats_VertexSoA_interface_ZVertexSoACollection_h
+
+#include
+
+#include
+#include "DataFormats/Portable/interface/alpaka/PortableCollection.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDefinitions.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDevice.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h"
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+
+ using ZVertexSoACollection =
+ std::conditional_t, ZVertexHost, ZVertexDevice>;
+
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
+
+namespace cms::alpakatools {
+ template
+ struct CopyToHost> {
+ template
+ static auto copyAsync(TQueue& queue, ZVertexDevice const& deviceData) {
+ ZVertexHost hostData(queue);
+ alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer());
+#ifdef GPU_DEBUG
+ printf("ZVertexSoACollection: I'm copying to host.\n");
+#endif
+ return hostData;
+ }
+ };
+} // namespace cms::alpakatools
+
+ASSERT_DEVICE_MATCHES_HOST_COLLECTION(ZVertexSoACollection, ZVertexHost);
+
+#endif // DataFormats_VertexSoA_interface_ZVertexSoACollection_h
diff --git a/DataFormats/VertexSoA/src/alpaka/classes_cuda.h b/DataFormats/VertexSoA/src/alpaka/classes_cuda.h
new file mode 100644
index 0000000000000..e76f6ca1365c1
--- /dev/null
+++ b/DataFormats/VertexSoA/src/alpaka/classes_cuda.h
@@ -0,0 +1,10 @@
+#ifndef DataFormats_VertexSoA_src_alpaka_classes_cuda_h
+#define DataFormats_VertexSoA_src_alpaka_classes_cuda_h
+
+#include "DataFormats/Common/interface/DeviceProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface//ZVertexDevice.h"
+#include "DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h"
+
+#endif // DataFormats_VertexSoA_src_alpaka_classes_cuda_h
diff --git a/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml
new file mode 100644
index 0000000000000..606937a5bd3e5
--- /dev/null
+++ b/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/DataFormats/VertexSoA/src/alpaka/classes_rocm.h b/DataFormats/VertexSoA/src/alpaka/classes_rocm.h
new file mode 100644
index 0000000000000..f5ea845c028b1
--- /dev/null
+++ b/DataFormats/VertexSoA/src/alpaka/classes_rocm.h
@@ -0,0 +1,9 @@
+#ifndef DataFormats_VertexSoA_src_alpaka_classes_rocm_h
+#define DataFormats_VertexSoA_src_alpaka_classes_rocm_h
+
+#include "DataFormats/Common/interface/DeviceProduct.h"
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface//ZVertexDevice.h"
+#include "DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h"
+#endif // DataFormats_VertexSoA_src_alpaka_classes_rocm_h
diff --git a/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml
new file mode 100644
index 0000000000000..94deb6fff7d61
--- /dev/null
+++ b/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/DataFormats/VertexSoA/src/classes.cc b/DataFormats/VertexSoA/src/classes.cc
new file mode 100644
index 0000000000000..edffb6e08a9e5
--- /dev/null
+++ b/DataFormats/VertexSoA/src/classes.cc
@@ -0,0 +1,4 @@
+#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+
+SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection);
diff --git a/DataFormats/VertexSoA/src/classes.h b/DataFormats/VertexSoA/src/classes.h
new file mode 100644
index 0000000000000..883182c01dcf9
--- /dev/null
+++ b/DataFormats/VertexSoA/src/classes.h
@@ -0,0 +1,8 @@
+#ifndef DataFormats_VertexSoA_src_classes_h
+#define DataFormats_VertexSoA_src_classes_h
+
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/VertexSoA/interface/ZVertexSoA.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+
+#endif // DataFormats_VertexSoA_src_classes_h
diff --git a/DataFormats/VertexSoA/src/classes_def.xml b/DataFormats/VertexSoA/src/classes_def.xml
new file mode 100644
index 0000000000000..820d28ecc3493
--- /dev/null
+++ b/DataFormats/VertexSoA/src/classes_def.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/DataFormats/VertexSoA/test/BuildFile.xml b/DataFormats/VertexSoA/test/BuildFile.xml
new file mode 100644
index 0000000000000..49dee4babd8a1
--- /dev/null
+++ b/DataFormats/VertexSoA/test/BuildFile.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc
new file mode 100644
index 0000000000000..0c0c8e8591df9
--- /dev/null
+++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc
@@ -0,0 +1,82 @@
+/**
+ Simple test for the reco::ZVertexSoA data structure
+ which inherits from Portable{Host}Collection.
+
+ Creates an instance of the class (automatically allocates
+ memory on device), passes the view of the SoA data to
+ the kernels which:
+ - Fill the SoA with data.
+ - Verify that the data written is correct.
+
+ Then, the SoA data are copied back to Host, where
+ a temporary host-side view (tmp_view) is created using
+ the same Layout to access the data on host and print it.
+ */
+
+#include
+#include
+#include "DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDevice.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/devices.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/host.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
+
+using namespace std;
+using namespace ALPAKA_ACCELERATOR_NAMESPACE;
+using namespace reco;
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ namespace testZVertexSoAT {
+ void runKernels(ZVertexSoAView zvertex_view, Queue& queue);
+ }
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
+
+int main() {
+ const auto host = cms::alpakatools::host();
+ const auto device = cms::alpakatools::devices()[0];
+ Queue queue(device);
+
+ // Inner scope to deallocate memory before destroying the stream
+ {
+ // Instantiate vertices on device. PortableCollection allocates
+ // SoA on device automatically.
+ ZVertexSoACollection zvertex_d(queue);
+ testZVertexSoAT::runKernels(zvertex_d.view(), queue);
+
+ // Instantate vertices on host. This is where the data will be
+ // copied to from device.
+ ZVertexHost zvertex_h(queue);
+ std::cout << zvertex_h.view().metadata().size() << std::endl;
+ alpaka::memcpy(queue, zvertex_h.buffer(), zvertex_d.const_buffer());
+ alpaka::wait(queue);
+
+ // Print results
+ std::cout << "idv"
+ << "\t"
+ << "zv"
+ << "\t"
+ << "wv"
+ << "\t"
+ << "chi2"
+ << "\t"
+ << "ptv2"
+ << "\t"
+ << "ndof"
+ << "\t"
+ << "sortInd"
+ << "\t"
+ << "nvFinal" << std::endl;
+
+ for (int i = 0; i < 10; ++i) {
+ std::cout << (int)zvertex_h.view()[i].idv() << "\t" << zvertex_h.view()[i].zv() << "\t"
+ << zvertex_h.view()[i].wv() << "\t" << zvertex_h.view()[i].chi2() << "\t" << zvertex_h.view()[i].ptv2()
+ << "\t" << (int)zvertex_h.view()[i].ndof() << "\t" << (int)zvertex_h.view()[i].sortInd() << "\t"
+ << (int)zvertex_h.view().nvFinal() << std::endl;
+ }
+ }
+
+ return 0;
+}
diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc
new file mode 100644
index 0000000000000..1b22159a53b88
--- /dev/null
+++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc
@@ -0,0 +1,62 @@
+#include "DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h"
+#include "DataFormats/VertexSoA/interface/ZVertexDevice.h"
+#include "DataFormats/VertexSoA/interface/ZVertexHost.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" // Check if this is really needed; code doesn't compile without it
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ using namespace alpaka;
+ using namespace cms::alpakatools;
+
+ namespace testZVertexSoAT {
+
+ class TestFillKernel {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const {
+ if (cms::alpakatools::once_per_grid(acc)) {
+ zvertex_view.nvFinal() = 420;
+ }
+
+ for (int32_t j : elements_with_stride(acc, zvertex_view.metadata().size())) {
+ zvertex_view[j].idv() = (int16_t)j;
+ zvertex_view[j].zv() = (float)j;
+ zvertex_view[j].wv() = (float)j;
+ zvertex_view[j].chi2() = (float)j;
+ zvertex_view[j].ptv2() = (float)j;
+ zvertex_view[j].ndof() = (int32_t)j;
+ zvertex_view[j].sortInd() = (uint16_t)j;
+ }
+ }
+ };
+
+ class TestVerifyKernel {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const& acc, reco::ZVertexSoAView zvertex_view) const {
+ if (cms::alpakatools::once_per_grid(acc)) {
+ ALPAKA_ASSERT_OFFLOAD(zvertex_view.nvFinal() == 420);
+ }
+
+ for (int32_t j : elements_with_stride(acc, zvertex_view.nvFinal())) {
+ assert(zvertex_view[j].idv() == j);
+ assert(zvertex_view[j].zv() - (float)j < 0.0001);
+ assert(zvertex_view[j].wv() - (float)j < 0.0001);
+ assert(zvertex_view[j].chi2() - (float)j < 0.0001);
+ assert(zvertex_view[j].ptv2() - (float)j < 0.0001);
+ assert(zvertex_view[j].ndof() == j);
+ assert(zvertex_view[j].sortInd() == uint32_t(j));
+ }
+ }
+ };
+
+ void runKernels(reco::ZVertexSoAView zvertex_view, Queue& queue) {
+ uint32_t items = 64;
+ uint32_t groups = divide_up_by(zvertex_view.metadata().size(), items);
+ auto workDiv = make_workdiv(groups, items);
+ alpaka::exec(queue, workDiv, TestFillKernel{}, zvertex_view);
+ alpaka::exec(queue, workDiv, TestVerifyKernel{}, zvertex_view);
+ }
+
+ } // namespace testZVertexSoAT
+
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc
index 9881aeab46bab..a76ff6af49ac9 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc
@@ -9,7 +9,6 @@
#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/Framework/interface/global/EDProducer.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
@@ -181,7 +180,10 @@ void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID,
}
using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka;
-DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1);
-
using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka;
+using SiPixelRecHitFromSoAAlpakaHIonPhase1 = SiPixelRecHitFromSoAAlpaka;
+
+#include "FWCore/Framework/interface/MakerMacros.h"
+DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1);
DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase2);
+DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaHIonPhase1);
diff --git a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
index c08a0987d3f59..895ba32eca71a 100644
--- a/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
+++ b/RecoTracker/Configuration/python/RecoPixelVertexing_cff.py
@@ -98,6 +98,33 @@
pixelVerticesTask.copy()
))
+## pixel vertex reconstruction with Alpaka
+
+# pixel vertex SoA producer with alpaka on the device
+from RecoTracker.PixelVertexFinding.pixelVertexProducerAlpakaPhase1_cfi import pixelVertexProducerAlpakaPhase1 as _pixelVerticesAlpakaPhase1
+from RecoTracker.PixelVertexFinding.pixelVertexProducerAlpakaPhase2_cfi import pixelVertexProducerAlpakaPhase2 as _pixelVerticesAlpakaPhase2
+pixelVerticesAlpaka = _pixelVerticesAlpakaPhase1.clone()
+phase2_tracker.toReplaceWith(pixelVerticesAlpaka,_pixelVerticesAlpakaPhase2.clone())
+
+from RecoTracker.PixelVertexFinding.pixelVertexFromSoAAlpaka_cfi import pixelVertexFromSoAAlpaka as _pixelVertexFromSoAAlpaka
+alpaka.toReplaceWith(pixelVertices, _pixelVertexFromSoAAlpaka.clone())
+
+# pixel vertex SoA producer with alpaka on the cpu, for validation
+pixelVerticesAlpakaSerial = pixelVerticesAlpaka.clone(
+ pixelTrackSrc = 'pixelTracksAlpakaSerial',
+ alpaka = None
+)
+pixelVerticesAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + pixelVerticesAlpaka._TypedParameterizable__type.removesuffix('@alpaka')
+
+alpaka.toReplaceWith(pixelVerticesTask, cms.Task(
+ # Build the pixel vertices in SoA format with alpaka on the device
+ pixelVerticesAlpaka,
+ # Build the pixel vertices in SoA format with alpaka on the cpu (if requested by the validation)
+ pixelVerticesAlpakaSerial,
+ # Convert the pixel vertices from SoA format (on the host) to the legacy format
+ pixelVertices
+))
+
# Tasks and Sequences
recopixelvertexingTask = cms.Task(
pixelTracksTask,
diff --git a/RecoTracker/PixelSeeding/plugins/BuildFile.xml b/RecoTracker/PixelSeeding/plugins/BuildFile.xml
index 82b80e1c55b66..f9863a6a8c292 100644
--- a/RecoTracker/PixelSeeding/plugins/BuildFile.xml
+++ b/RecoTracker/PixelSeeding/plugins/BuildFile.xml
@@ -1,21 +1,36 @@
-
-
-
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc
new file mode 100644
index 0000000000000..a21fed668b54c
--- /dev/null
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc
@@ -0,0 +1,412 @@
+//
+// Author: Felice Pantaleo, CERN
+//
+
+//#define BROKENLINE_DEBUG
+//#define BL_DUMP_HITS
+#include
+#include
+
+#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h"
+#include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h"
+
+#include "HelixFit.h"
+
+template
+using Tuples = typename reco::TrackSoA::HitContainer;
+template
+using OutputSoAView = reco::TrackSoAView;
+template
+using TupleMultiplicity = caStructures::TupleMultiplicityT;
+
+// #define BL_DUMP_HITS
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ template
+ class Kernel_BLFastFit {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const &acc,
+ Tuples const *__restrict__ foundNtuplets,
+ TupleMultiplicity const *__restrict__ tupleMultiplicity,
+ TrackingRecHitSoAConstView hh,
+ pixelCPEforDevice::ParamsOnDeviceT const *__restrict__ cpeParams,
+ typename TrackerTraits::tindex_type *__restrict__ ptkids,
+ double *__restrict__ phits,
+ float *__restrict__ phits_ge,
+ double *__restrict__ pfast_fit,
+ uint32_t nHitsL,
+ uint32_t nHitsH,
+ int32_t offset) const {
+ constexpr uint32_t hitsInFit = N;
+ constexpr auto invalidTkId = std::numeric_limits::max();
+
+ ALPAKA_ASSERT_OFFLOAD(hitsInFit <= nHitsL);
+ ALPAKA_ASSERT_OFFLOAD(nHitsL <= nHitsH);
+ ALPAKA_ASSERT_OFFLOAD(phits);
+ ALPAKA_ASSERT_OFFLOAD(pfast_fit);
+ ALPAKA_ASSERT_OFFLOAD(foundNtuplets);
+ ALPAKA_ASSERT_OFFLOAD(tupleMultiplicity);
+
+ // look in bin for this hit multiplicity
+ int totTK = tupleMultiplicity->end(nHitsH) - tupleMultiplicity->begin(nHitsL);
+ ALPAKA_ASSERT_OFFLOAD(totTK <= int(tupleMultiplicity->size()));
+ ALPAKA_ASSERT_OFFLOAD(totTK >= 0);
+
+#ifdef BROKENLINE_DEBUG
+ const uint32_t threadIdx(alpaka::getIdx(acc)[0u]);
+ if (cms::alpakatools::once_per_grid(acc)) {
+ printf("%d total Ntuple\n", tupleMultiplicity->size());
+ printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit);
+ }
+#endif
+ const auto nt = riemannFit::maxNumberOfConcurrentFits;
+ for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) {
+ auto tuple_idx = local_idx + offset;
+ if ((int)tuple_idx >= totTK) {
+ ptkids[local_idx] = invalidTkId;
+ break;
+ }
+ // get it from the ntuple container (one to one to helix)
+ auto tkid = *(tupleMultiplicity->begin(nHitsL) + tuple_idx);
+ ALPAKA_ASSERT_OFFLOAD(static_cast(tkid) < foundNtuplets->nOnes());
+
+ ptkids[local_idx] = tkid;
+
+ auto nHits = foundNtuplets->size(tkid);
+
+ ALPAKA_ASSERT_OFFLOAD(nHits >= nHitsL);
+ ALPAKA_ASSERT_OFFLOAD(nHits <= nHitsH);
+
+ riemannFit::Map3xNd hits(phits + local_idx);
+ riemannFit::Map4d fast_fit(pfast_fit + local_idx);
+ riemannFit::Map6xNf hits_ge(phits_ge + local_idx);
+
+#ifdef BL_DUMP_HITS
+ auto &&done = alpaka::declareSharedVar(acc);
+ done = 0;
+ alpaka::syncBlockThreads(acc);
+ bool dump =
+ (foundNtuplets->size(tkid) == 5 && 0 == alpaka::atomicAdd(acc, &done, 1, alpaka::hierarchy::Blocks{}));
+#endif
+
+ // Prepare data structure
+ auto const *hitId = foundNtuplets->begin(tkid);
+
+ // #define YERR_FROM_DC
+#ifdef YERR_FROM_DC
+ // try to compute more precise error in y
+ auto dx = hh[hitId[hitsInFit - 1]].xGlobal() - hh[hitId[0]].xGlobal();
+ auto dy = hh[hitId[hitsInFit - 1]].yGlobal() - hh[hitId[0]].yGlobal();
+ auto dz = hh[hitId[hitsInFit - 1]].zGlobal() - hh[hitId[0]].zGlobal();
+ float ux, uy, uz;
+#endif
+
+ float incr = std::max(1.f, float(nHits) / float(hitsInFit));
+ float n = 0;
+ for (uint32_t i = 0; i < hitsInFit; ++i) {
+ int j = int(n + 0.5f); // round
+ if (hitsInFit - 1 == i)
+ j = nHits - 1; // force last hit to ensure max lever arm.
+ ALPAKA_ASSERT_OFFLOAD(j < int(nHits));
+ n += incr;
+ auto hit = hitId[j];
+ float ge[6];
+
+#ifdef YERR_FROM_DC
+ auto const &dp = cpeParams->detParams(hh.detectorIndex(hit));
+ auto status = hh[hit].chargeAndStatus().status;
+ int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin;
+ ALPAKA_ASSERT_OFFLOAD(qbin >= 0 && qbin < 5);
+ bool nok = (status.isBigY | status.isOneY);
+ // compute cotanbeta and use it to recompute error
+ dp.frame.rotation().multiply(dx, dy, dz, ux, uy, uz);
+ auto cb = std::abs(uy / uz);
+ int bin =
+ int(cb * (float(phase1PixelTopology::pixelThickess) / float(phase1PixelTopology::pixelPitchY)) * 8.f) - 4;
+ int low_value = 0;
+ int high_value = CPEFastParametrisation::kNumErrorBins - 1;
+ // return estimated bin value truncated to [0, 15]
+ bin = std::clamp(bin, low_value, high_value);
+ float yerr = dp.sigmay[bin] * 1.e-4f; // toCM
+ yerr *= dp.yfact[qbin]; // inflate
+ yerr *= yerr;
+ yerr += dp.apeYY;
+ yerr = nok ? hh[hit].yerrLocal() : yerr;
+ dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge);
+#else
+ cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge);
+#endif
+
+#ifdef BL_DUMP_HITS
+ bool dump = foundNtuplets->size(tkid) == 5;
+ if (dump) {
+ printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n",
+ local_idx,
+ tkid,
+ hit,
+ hh[hit].detectorIndex(),
+ i,
+ hh[hit].xGlobal(),
+ hh[hit].yGlobal(),
+ hh[hit].zGlobal());
+ printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]);
+ }
+#endif
+
+ hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal();
+ hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5];
+ }
+ brokenline::fastFit(acc, hits, fast_fit);
+
+ // no NaN here....
+ ALPAKA_ASSERT_OFFLOAD(fast_fit(0) == fast_fit(0));
+ ALPAKA_ASSERT_OFFLOAD(fast_fit(1) == fast_fit(1));
+ ALPAKA_ASSERT_OFFLOAD(fast_fit(2) == fast_fit(2));
+ ALPAKA_ASSERT_OFFLOAD(fast_fit(3) == fast_fit(3));
+ }
+ }
+ };
+
+ template
+ struct Kernel_BLFit {
+ public:
+ template >>
+ ALPAKA_FN_ACC void operator()(TAcc const &acc,
+ TupleMultiplicity const *__restrict__ tupleMultiplicity,
+ double bField,
+ OutputSoAView results_view,
+ typename TrackerTraits::tindex_type const *__restrict__ ptkids,
+ double *__restrict__ phits,
+ float *__restrict__ phits_ge,
+ double *__restrict__ pfast_fit) const {
+ ALPAKA_ASSERT_OFFLOAD(results_view.pt());
+ ALPAKA_ASSERT_OFFLOAD(results_view.eta());
+ ALPAKA_ASSERT_OFFLOAD(results_view.chi2());
+ ALPAKA_ASSERT_OFFLOAD(pfast_fit);
+ constexpr auto invalidTkId = std::numeric_limits::max();
+
+ // same as above...
+ // look in bin for this hit multiplicity
+ const auto nt = riemannFit::maxNumberOfConcurrentFits;
+ for (auto local_idx : cms::alpakatools::elements_with_stride(acc, nt)) {
+ if (invalidTkId == ptkids[local_idx])
+ break;
+ auto tkid = ptkids[local_idx];
+
+ ALPAKA_ASSERT_OFFLOAD(tkid < TrackerTraits::maxNumberOfTuples);
+
+ riemannFit::Map3xNd hits(phits + local_idx);
+ riemannFit::Map4d fast_fit(pfast_fit + local_idx);
+ riemannFit::Map6xNf hits_ge(phits_ge + local_idx);
+
+ brokenline::PreparedBrokenLineData data;
+
+ brokenline::karimaki_circle_fit circle;
+ riemannFit::LineFit line;
+
+ brokenline::prepareBrokenLineData(acc, hits, fast_fit, bField, data);
+ brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line);
+ brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle);
+
+ TracksUtilities::copyFromCircle(
+ results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
+ results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2)));
+ results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0));
+ results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5);
+
+#ifdef BROKENLINE_DEBUG
+ if (!(circle.chi2 >= 0) || !(line.chi2 >= 0))
+ printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2);
+ printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n",
+ N,
+ N,
+ tkid,
+ circle.par(0),
+ circle.par(1),
+ circle.par(2));
+ printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1));
+ printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n",
+ circle.chi2,
+ line.chi2,
+ circle.cov(0, 0),
+ circle.cov(1, 1),
+ circle.cov(2, 2),
+ line.cov(0, 0),
+ line.cov(1, 1));
+#endif
+ }
+ }
+ };
+
+ template
+ void HelixFit::launchBrokenLineKernels(
+ const TrackingRecHitSoAConstView &hv,
+ pixelCPEforDevice::ParamsOnDeviceT const *cpeParams,
+ uint32_t hitsInFit,
+ uint32_t maxNumberOfTuples,
+ Queue &queue) {
+ ALPAKA_ASSERT_OFFLOAD(tuples_);
+
+ uint32_t blockSize = 64;
+ uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize);
+ const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize);
+ const WorkDiv1D workDivQuadsPenta = cms::alpakatools::make_workdiv(numberOfBlocks / 4, blockSize);
+
+ // Fit internals
+ auto tkidDevice =
+ cms::alpakatools::make_device_buffer(queue, maxNumberOfConcurrentFits_);
+ auto hitsDevice = cms::alpakatools::make_device_buffer(
+ queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double));
+ auto hits_geDevice = cms::alpakatools::make_device_buffer(
+ queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float));
+ auto fast_fit_resultsDevice = cms::alpakatools::make_device_buffer(
+ queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double));
+
+ for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
+ // fit triplets
+
+ alpaka::exec(queue,
+ workDivTriplets,
+ Kernel_BLFastFit<3, TrackerTraits>{},
+ tuples_,
+ tupleMultiplicity_,
+ hv,
+ cpeParams,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data(),
+ 3,
+ 3,
+ offset);
+
+ alpaka::exec(queue,
+ workDivTriplets,
+ Kernel_BLFit<3, TrackerTraits>{},
+ tupleMultiplicity_,
+ bField_,
+ outputSoa_,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data());
+
+ if (fitNas4_) {
+ // fit all as 4
+ riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this,
+ &hv,
+ &cpeParams,
+ &tkidDevice,
+ &hitsDevice,
+ &hits_geDevice,
+ &fast_fit_resultsDevice,
+ &offset,
+ &queue,
+ &workDivQuadsPenta](auto i) {
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFastFit<4, TrackerTraits>{},
+ tuples_,
+ tupleMultiplicity_,
+ hv,
+ cpeParams,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data(),
+ 4,
+ 4,
+ offset);
+
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFit<4, TrackerTraits>{},
+ tupleMultiplicity_,
+ bField_,
+ outputSoa_,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data());
+ });
+
+ } else {
+ riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this,
+ &hv,
+ &cpeParams,
+ &tkidDevice,
+ &hitsDevice,
+ &hits_geDevice,
+ &fast_fit_resultsDevice,
+ &offset,
+ &queue,
+ &workDivQuadsPenta](auto i) {
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFastFit{},
+ tuples_,
+ tupleMultiplicity_,
+ hv,
+ cpeParams,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data(),
+ i,
+ i,
+ offset);
+
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFit{},
+ tupleMultiplicity_,
+ bField_,
+ outputSoa_,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data());
+ });
+
+ static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack);
+
+ //Fit all the rest using the maximum from previous call
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFastFit{},
+ tuples_,
+ tupleMultiplicity_,
+ hv,
+ cpeParams,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data(),
+ TrackerTraits::maxHitsOnTrackForFullFit,
+ TrackerTraits::maxHitsOnTrack - 1,
+ offset);
+
+ alpaka::exec(queue,
+ workDivQuadsPenta,
+ Kernel_BLFit{},
+ tupleMultiplicity_,
+ bField_,
+ outputSoa_,
+ tkidDevice.data(),
+ hitsDevice.data(),
+ hits_geDevice.data(),
+ fast_fit_resultsDevice.data());
+ }
+
+ } // loop on concurrent fits
+ }
+
+ template class HelixFit;
+ template class HelixFit;
+ template class HelixFit;
+
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h
new file mode 100644
index 0000000000000..d0142f78415ae
--- /dev/null
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h
@@ -0,0 +1,391 @@
+#ifndef RecoPixelVertexing_PixelTriplets_CACellT_h
+#define RecoPixelVertexing_PixelTriplets_CACellT_h
+
+//
+// Author: Felice Pantaleo, CERN
+//
+
+// #define ONLY_TRIPLETS_IN_HOLE
+
+#include
+
+#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h"
+#include "RecoTracker/PixelSeeding/interface/CircleEq.h"
+#include "DataFormats/TrackSoA/interface/TrackDefinitions.h"
+#include "DataFormats/TrackSoA/interface/TracksSoA.h"
+#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
+#include "CAStructures.h"
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ template
+ class CACellT {
+ public:
+ using PtrAsInt = unsigned long long;
+
+ static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit;
+ using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT;
+ using OuterHitOfCell = caStructures::OuterHitOfCellT;
+ using CellNeighbors = caStructures::CellNeighborsT;
+ using CellTracks = caStructures::CellTracksT;
+ using CellNeighborsVector = caStructures::CellNeighborsVectorT;
+ using CellTracksVector = caStructures::CellTracksVectorT;
+
+ using HitsConstView = TrackingRecHitSoAConstView;
+ using hindex_type = typename TrackerTraits::hindex_type;
+ using tindex_type = typename TrackerTraits::tindex_type;
+ static constexpr auto invalidHitId = std::numeric_limits::max();
+
+ using TmpTuple = cms::alpakatools::VecArray;
+
+ using HitContainer = typename reco::TrackSoA::HitContainer;
+ using Quality = ::pixelTrack::Quality;
+ static constexpr auto bad = ::pixelTrack::Quality::bad;
+
+ enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 };
+
+ CACellT() = default;
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(CellNeighborsVector& cellNeighbors,
+ CellTracksVector& cellTracks,
+ const HitsConstView& hh,
+ int layerPairId,
+ hindex_type innerHitId,
+ hindex_type outerHitId) {
+ theInnerHitId = innerHitId;
+ theOuterHitId = outerHitId;
+ theLayerPairId_ = layerPairId;
+ theStatus_ = 0;
+ theFishboneId = invalidHitId;
+
+ // optimization that depends on access pattern
+ theInnerZ = hh[innerHitId].zGlobal();
+ theInnerR = hh[innerHitId].rGlobal();
+
+ // link to default empty
+ theOuterNeighbors = &cellNeighbors[0];
+ theTracks = &cellTracks[0];
+ assert(outerNeighbors().empty());
+ assert(tracks().empty());
+ }
+
+ template
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addOuterNeighbor(
+ const TAcc& acc, typename TrackerTraits::cindex_type t, CellNeighborsVector& cellNeighbors) {
+ // use smart cache
+ if (outerNeighbors().empty()) {
+ auto i = cellNeighbors.extend(acc); // maybe wasted....
+ if (i > 0) {
+ cellNeighbors[i].reset();
+ alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
+#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
+ theOuterNeighbors = &cellNeighbors[i];
+#else
+ auto zero = (PtrAsInt)(&cellNeighbors[0]);
+ alpaka::atomicCas(acc,
+ (PtrAsInt*)(&theOuterNeighbors),
+ zero,
+ (PtrAsInt)(&cellNeighbors[i]),
+ alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back...
+#endif
+ } else
+ return -1;
+ }
+ alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
+ return outerNeighbors().push_back(acc, t);
+ }
+
+ template
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addTrack(TAcc const& acc,
+ tindex_type t,
+ CellTracksVector& cellTracks) {
+ if (tracks().empty()) {
+ auto i = cellTracks.extend(acc); // maybe wasted....
+ if (i > 0) {
+ cellTracks[i].reset();
+ alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
+#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
+ theTracks = &cellTracks[i];
+#else
+ auto zero = (PtrAsInt)(&cellTracks[0]);
+ alpaka::atomicCas(acc,
+ (PtrAsInt*)(&theTracks),
+ zero,
+ (PtrAsInt)(&cellTracks[i]),
+ alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back...
+
+#endif
+ } else
+ return -1;
+ }
+ alpaka::mem_fence(acc, alpaka::memory_scope::Grid{});
+ return tracks().push_back(acc, t);
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks& tracks() { return *theTracks; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks const& tracks() const { return *theTracks; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors& outerNeighbors() { return *theOuterNeighbors; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_detIndex(const HitsConstView& hh) const {
+ return hh[theInnerHitId].detectorIndex();
+ }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_detIndex(const HitsConstView& hh) const {
+ return hh[theOuterHitId].detectorIndex();
+ }
+
+ constexpr unsigned int inner_hit_id() const { return theInnerHitId; }
+ constexpr unsigned int outer_hit_id() const { return theOuterHitId; }
+
+ ALPAKA_FN_ACC void print_cell() const {
+ printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n",
+ theLayerPairId_,
+ theInnerHitId,
+ theOuterHitId);
+ }
+
+ ALPAKA_FN_ACC bool check_alignment(const HitsConstView& hh,
+ CACellT const& otherCell,
+ const float ptmin,
+ const float hardCurvCut,
+ const float caThetaCutBarrel,
+ const float caThetaCutForward,
+ const float dcaCutInnerTriplet,
+ const float dcaCutOuterTriplet) const {
+ // detIndex of the layerStart for the Phase1 Pixel Detector:
+ // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID]
+ // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856]
+ auto ri = inner_r(hh);
+ auto zi = inner_z(hh);
+
+ auto ro = outer_r(hh);
+ auto zo = outer_z(hh);
+
+ auto r1 = otherCell.inner_r(hh);
+ auto z1 = otherCell.inner_z(hh);
+ auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex;
+ // TODO tune CA cuts below (theta and dca)
+ bool aligned = areAlignedRZ(r1, z1, ri, zi, ro, zo, ptmin, isBarrel ? caThetaCutBarrel : caThetaCutForward);
+ return (aligned && dcaCut(hh,
+ otherCell,
+ otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet
+ : dcaCutOuterTriplet,
+ hardCurvCut));
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool areAlignedRZ(
+ float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) {
+ float radius_diff = std::abs(r1 - ro);
+ float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo);
+
+ float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by
+ // radius_diff later
+
+ float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri));
+ return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff;
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool dcaCut(const HitsConstView& hh,
+ CACellT const& otherCell,
+ const float region_origin_radius_plus_tolerance,
+ const float maxCurv) const {
+ auto x1 = otherCell.inner_x(hh);
+ auto y1 = otherCell.inner_y(hh);
+
+ auto x2 = inner_x(hh);
+ auto y2 = inner_y(hh);
+
+ auto x3 = outer_x(hh);
+ auto y3 = outer_y(hh);
+
+ CircleEq eq(x1, y1, x2, y2, x3, y3);
+
+ if (eq.curvature() > maxCurv)
+ return false;
+
+ return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool dcaCutH(
+ float x1,
+ float y1,
+ float x2,
+ float y2,
+ float x3,
+ float y3,
+ const float region_origin_radius_plus_tolerance,
+ const float maxCurv) {
+ CircleEq eq(x1, y1, x2, y2, x3, y3);
+
+ if (eq.curvature() > maxCurv)
+ return false;
+
+ return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature());
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole0(const HitsConstView& hh, CACellT const& innerCell) const {
+ using namespace phase1PixelTopology;
+
+ int p = innerCell.inner_iphi(hh);
+ if (p < 0)
+ p += std::numeric_limits::max();
+ p = (max_ladder_bpx0 * p) / std::numeric_limits::max();
+ p %= max_ladder_bpx0;
+ auto il = first_ladder_bpx0 + p;
+ auto r0 = hh.averageGeometry().ladderR[il];
+ auto ri = innerCell.inner_r(hh);
+ auto zi = innerCell.inner_z(hh);
+ auto ro = outer_r(hh);
+ auto zo = outer_z(hh);
+ auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri);
+ auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]);
+ auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0);
+ auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0);
+ return gap;
+ }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole4(const HitsConstView& hh, CACellT const& innerCell) const {
+ using namespace phase1PixelTopology;
+
+ int p = outer_iphi(hh);
+ if (p < 0)
+ p += std::numeric_limits::max();
+ p = (max_ladder_bpx4 * p) / std::numeric_limits::max();
+ p %= max_ladder_bpx4;
+ auto il = first_ladder_bpx4 + p;
+ auto r4 = hh.averageGeometry().ladderR[il];
+ auto ri = innerCell.inner_r(hh);
+ auto zi = innerCell.inner_z(hh);
+ auto ro = outer_r(hh);
+ auto zo = outer_z(hh);
+ auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri);
+ auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]);
+ auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4);
+ auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4);
+ auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0];
+ auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1];
+ return gap || holeP || holeN;
+ }
+
+ // trying to free the track building process from hardcoded layers, leaving
+ // the visit of the graph based on the neighborhood connections between cells.
+ template
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(TAcc const& acc,
+ const HitsConstView& hh,
+ CACellT* __restrict__ cells,
+ CellTracksVector& cellTracks,
+ HitContainer& foundNtuplets,
+ cms::alpakatools::AtomicPairCounter& apc,
+ Quality* __restrict__ quality,
+ TmpTuple& tmpNtuplet,
+ const unsigned int minHitsPerNtuplet,
+ bool startAt0) const {
+ // the building process for a track ends if:
+ // it has no right neighbor
+ // it has no compatible neighbor
+ // the ntuplets is then saved if the number of hits it contains is greater
+ // than a threshold
+
+ if constexpr (DEPTH <= 0) {
+ printf("ERROR: CACellT::find_ntuplets reached full depth!\n");
+ ALPAKA_ASSERT_OFFLOAD(false);
+ } else {
+ auto doubletId = this - cells;
+ tmpNtuplet.push_back_unsafe(doubletId);
+ ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3));
+
+ bool last = true;
+ for (unsigned int otherCell : outerNeighbors()) {
+ if (cells[otherCell].isKilled())
+ continue; // killed by earlyFishbone
+ last = false;
+ cells[otherCell].template find_ntuplets(
+ acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0);
+ }
+ if (last) { // if long enough save...
+ if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) {
+#ifdef ONLY_TRIPLETS_IN_HOLE
+ // triplets accepted only pointing to the hole
+ if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) ||
+ ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]])))
+#endif
+ {
+ hindex_type hits[TrackerTraits::maxDepth + 2];
+ auto nh = 0U;
+ constexpr int maxFB = 2; // for the time being let's limit this
+ int nfb = 0;
+ for (auto c : tmpNtuplet) {
+ hits[nh++] = cells[c].theInnerHitId;
+ if (nfb < maxFB && cells[c].hasFishbone()) {
+ ++nfb;
+ hits[nh++] = cells[c].theFishboneId; // Fishbone hit is always outer than inner hit
+ }
+ }
+ assert(nh < TrackerTraits::maxHitsOnTrack);
+ hits[nh] = theOuterHitId;
+ auto it = foundNtuplets.bulkFill(acc, apc, hits, nh + 1);
+ if (it >= 0) { // if negative is overflow....
+ for (auto c : tmpNtuplet)
+ cells[c].addTrack(acc, it, cellTracks);
+ quality[it] = bad; // initialize to bad
+ }
+ }
+ }
+ }
+ tmpNtuplet.pop_back();
+ assert(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1));
+ }
+ }
+
+ // Cell status management
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; }
+
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); }
+
+ template
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(TAcc const& acc, hindex_type id, float z, const HitsConstView& hh) {
+ // make it deterministic: use the farther apart (in z)
+ auto old = theFishboneId;
+ while (old !=
+ alpaka::atomicCas(
+ acc,
+ &theFishboneId,
+ old,
+ (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old,
+ alpaka::hierarchy::Blocks{}))
+ old = theFishboneId;
+ }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId; }
+ ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId != invalidHitId; }
+
+ private:
+ CellNeighbors* theOuterNeighbors;
+ CellTracks* theTracks;
+
+ int16_t theLayerPairId_;
+ uint16_t theStatus_; // tbd
+
+ float theInnerZ;
+ float theInnerR;
+ hindex_type theInnerHitId;
+ hindex_type theOuterHitId;
+ hindex_type theFishboneId;
+ };
+} // namespace ALPAKA_ACCELERATOR_NAMESPACE
+#endif // RecoPixelVertexing_PixelTriplets_plugins_CACellT_h
diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h
new file mode 100644
index 0000000000000..343e0cf9ad005
--- /dev/null
+++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h
@@ -0,0 +1,148 @@
+#ifndef RecoPixelVertexing_PixelTriplets_alpaka_CAFishbone_h
+#define RecoPixelVertexing_PixelTriplets_alpaka_CAFishbone_h
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/traits.h"
+#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h"
+#include "DataFormats/Math/interface/approx_atan2.h"
+
+#include "CACell.h"
+#include "CAStructures.h"
+
+namespace ALPAKA_ACCELERATOR_NAMESPACE {
+ namespace caPixelDoublets {
+
+ template
+ using CellNeighbors = caStructures::CellNeighborsT;
+ template
+ using CellTracks = caStructures::CellTracksT;
+ template
+ using CellNeighborsVector = caStructures::CellNeighborsVectorT;
+ template
+ using CellTracksVector = caStructures::CellTracksVectorT;
+ template
+ using OuterHitOfCell = caStructures::OuterHitOfCellT;
+ template
+ using HitsConstView = typename CACellT::HitsConstView;
+
+ template