diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernelsImpl.h index fbf175836..bb95fdada 100644 --- a/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/alpaka/plugin-PixelTriplets/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -327,7 +327,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets( + thisCell.find_ntuplets<6>( acc, hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); ALPAKA_ASSERT_OFFLOAD(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); diff --git a/src/alpaka/plugin-PixelTriplets/alpaka/GPUCACell.h b/src/alpaka/plugin-PixelTriplets/alpaka/GPUCACell.h index 3e6afafd5..f204cf8fd 100644 --- a/src/alpaka/plugin-PixelTriplets/alpaka/GPUCACell.h +++ b/src/alpaka/plugin-PixelTriplets/alpaka/GPUCACell.h @@ -323,7 +323,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. - template + template ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(const T_Acc& acc, Hits const& hh, GPUCACell* __restrict__ cells, @@ -334,49 +334,54 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TmpTuple& tmpNtuplet, const unsigned int minHitsPerNtuplet, bool startAt0) const { - // the building process for a track ends if: - // it has no right neighbor - // it has no compatible neighbor - // the ntuplets is then saved if the number of hits it contains is greater - // than a threshold - - tmpNtuplet.push_back_unsafe(theDoubletId); - ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= 4); - - bool last = true; - for (int j = 0; j < outerNeighbors().size(); ++j) { - auto otherCell = outerNeighbors()[j]; - if (cells[otherCell].theDoubletId < 0) - continue; // killed by earlyFishbone - last = false; - cells[otherCell].find_ntuplets( - acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); - } - if (last) { // if long enough save... - if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { + if constexpr (DEPTH == 0) { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); + ALPAKA_ASSERT_OFFLOAD(false); + } else { + // the building process for a track ends if: + // it has no right neighbor + // it has no compatible neighbor + // the ntuplets is then saved if the number of hits it contains is greater + // than a threshold + + tmpNtuplet.push_back_unsafe(theDoubletId); + ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() <= 4); + + bool last = true; + for (int j = 0; j < outerNeighbors().size(); ++j) { + auto otherCell = outerNeighbors()[j]; + if (cells[otherCell].theDoubletId < 0) + continue; // killed by earlyFishbone + last = false; + cells[otherCell].find_ntuplets( + acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); + } + if (last) { // if long enough save... + if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { #ifdef ONLY_TRIPLETS_IN_HOLE - // triplets accepted only pointing to the hole - if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || - ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) + // triplets accepted only pointing to the hole + if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || + ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) #endif - { - hindex_type hits[6]; - auto nh = 0U; - for (auto c : tmpNtuplet) { - hits[nh++] = cells[c].theInnerHitId; - } - hits[nh] = theOuterHitId; - auto it = foundNtuplets.bulkFill(acc, apc, hits, tmpNtuplet.size() + 1); - if (it >= 0) { // if negative is overflow.... - for (auto c : tmpNtuplet) - cells[c].addTrack(acc, it, cellTracks); - quality[it] = bad; // initialize to bad + { + hindex_type hits[6]; + auto nh = 0U; + for (auto c : tmpNtuplet) { + hits[nh++] = cells[c].theInnerHitId; + } + hits[nh] = theOuterHitId; + auto it = foundNtuplets.bulkFill(acc, apc, hits, tmpNtuplet.size() + 1); + if (it >= 0) { // if negative is overflow.... + for (auto c : tmpNtuplet) + cells[c].addTrack(acc, it, cellTracks); + quality[it] = bad; // initialize to bad + } } } } + tmpNtuplet.pop_back(); + ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() < 4); } - tmpNtuplet.pop_back(); - ALPAKA_ASSERT_OFFLOAD(tmpNtuplet.size() < 4); } private: @@ -394,7 +399,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hindex_type theInnerHitId; hindex_type theOuterHitId; }; - } // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/cuda/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/cuda/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index e35e20be9..c663dbc56 100644 --- a/src/cuda/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/cuda/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -287,7 +287,8 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + thisCell.find_ntuplets<6>( + hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); } diff --git a/src/cuda/plugin-PixelTriplets/GPUCACell.h b/src/cuda/plugin-PixelTriplets/GPUCACell.h index df4354e59..3d964bb40 100644 --- a/src/cuda/plugin-PixelTriplets/GPUCACell.h +++ b/src/cuda/plugin-PixelTriplets/GPUCACell.h @@ -275,6 +275,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template __device__ inline void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -299,7 +300,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -345,4 +346,22 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +__device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); +#ifdef __CUDA_ARCH__ + __trap(); +#else + abort(); +#endif +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/cudacompat/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/cudacompat/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index e35e20be9..c663dbc56 100644 --- a/src/cudacompat/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/cudacompat/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -287,7 +287,8 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + thisCell.find_ntuplets<6>( + hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); } diff --git a/src/cudacompat/plugin-PixelTriplets/GPUCACell.h b/src/cudacompat/plugin-PixelTriplets/GPUCACell.h index df4354e59..3d964bb40 100644 --- a/src/cudacompat/plugin-PixelTriplets/GPUCACell.h +++ b/src/cudacompat/plugin-PixelTriplets/GPUCACell.h @@ -275,6 +275,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template __device__ inline void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -299,7 +300,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -345,4 +346,22 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +__device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); +#ifdef __CUDA_ARCH__ + __trap(); +#else + abort(); +#endif +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index 6506a104b..f14f5d8ea 100644 --- a/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/cudadev/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -284,7 +284,8 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + thisCell.find_ntuplets<6>( + hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); } diff --git a/src/cudadev/plugin-PixelTriplets/GPUCACell.h b/src/cudadev/plugin-PixelTriplets/GPUCACell.h index 159db16bb..58ef54ae8 100644 --- a/src/cudadev/plugin-PixelTriplets/GPUCACell.h +++ b/src/cudadev/plugin-PixelTriplets/GPUCACell.h @@ -271,6 +271,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template __device__ inline void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -294,7 +295,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId_ < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -347,4 +348,22 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +__device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); +#ifdef __CUDA_ARCH__ + __trap(); +#else + abort(); +#endif +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/cudauvm/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/cudauvm/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index e35e20be9..c663dbc56 100644 --- a/src/cudauvm/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/cudauvm/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -287,7 +287,8 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + thisCell.find_ntuplets<6>( + hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); } diff --git a/src/cudauvm/plugin-PixelTriplets/GPUCACell.h b/src/cudauvm/plugin-PixelTriplets/GPUCACell.h index df4354e59..3d964bb40 100644 --- a/src/cudauvm/plugin-PixelTriplets/GPUCACell.h +++ b/src/cudauvm/plugin-PixelTriplets/GPUCACell.h @@ -275,6 +275,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template __device__ inline void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -299,7 +300,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -345,4 +346,22 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +__device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); +#ifdef __CUDA_ARCH__ + __trap(); +#else + abort(); +#endif +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/kokkos/plugin-PixelTriplets/GPUCACell.h b/src/kokkos/plugin-PixelTriplets/GPUCACell.h index 81d1bb464..30dfeedc1 100644 --- a/src/kokkos/plugin-PixelTriplets/GPUCACell.h +++ b/src/kokkos/plugin-PixelTriplets/GPUCACell.h @@ -281,6 +281,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template KOKKOS_INLINE_FUNCTION void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -305,7 +306,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -351,4 +352,18 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +KOKKOS_INLINE_FUNCTION void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::kokkos::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); + assert(false); +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/src/kokkos/plugin-PixelTriplets/kokkos/CAHitNtupletGeneratorKernelsImpl.h b/src/kokkos/plugin-PixelTriplets/kokkos/CAHitNtupletGeneratorKernelsImpl.h index 44e77488d..9f1b651ca 100644 --- a/src/kokkos/plugin-PixelTriplets/kokkos/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/kokkos/plugin-PixelTriplets/kokkos/CAHitNtupletGeneratorKernelsImpl.h @@ -293,7 +293,7 @@ namespace KOKKOS_NAMESPACE { if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets( + thisCell.find_ntuplets<6>( hh, cells.data(), cellTracks(), *foundNtuplets, apc(), quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); diff --git a/src/serial/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h b/src/serial/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h index 66bdc69e8..308a1c391 100644 --- a/src/serial/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h +++ b/src/serial/plugin-PixelTriplets/CAHitNtupletGeneratorKernelsImpl.h @@ -283,7 +283,8 @@ __global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, if (doit) { GPUCACell::TmpTuple stack; stack.reset(); - thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + thisCell.find_ntuplets<6>( + hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); assert(stack.empty()); // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); } diff --git a/src/serial/plugin-PixelTriplets/GPUCACell.h b/src/serial/plugin-PixelTriplets/GPUCACell.h index 339d53b39..6d1f2c9de 100644 --- a/src/serial/plugin-PixelTriplets/GPUCACell.h +++ b/src/serial/plugin-PixelTriplets/GPUCACell.h @@ -275,6 +275,7 @@ class GPUCACell { // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. + template __device__ inline void find_ntuplets(Hits const& hh, GPUCACell* __restrict__ cells, CellTracksVector& cellTracks, @@ -299,7 +300,7 @@ class GPUCACell { if (cells[otherCell].theDoubletId < 0) continue; // killed by earlyFishbone last = false; - cells[otherCell].find_ntuplets( + cells[otherCell].find_ntuplets( hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... @@ -345,4 +346,22 @@ class GPUCACell { hindex_type theOuterHitId; }; +template <> +__device__ inline void GPUCACell::find_ntuplets<0>(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + printf("ERROR: GPUCACell::find_ntuplets reached full depth!\n"); +#ifdef __CUDA_ARCH__ + __trap(); +#else + abort(); +#endif +} + #endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h