diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml
deleted file mode 100644
index cf07e3b540f24..0000000000000
--- a/CUDADataFormats/Track/BuildFile.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-<use name="cuda"/>
-<use name="rootcore"/>
-<use name="CUDADataFormats/Common"/>
-<use name="DataFormats/Common"/>
-<use name="DataFormats/SoATemplate" source_only="1"/>
-<use name="HeterogeneousCore/CUDAUtilities"/>
-<use name="eigen"/>
-<export>
-    <lib name="1"/>
-</export>
diff --git a/CUDADataFormats/Track/README.md b/CUDADataFormats/Track/README.md
deleted file mode 100644
index 8f66d9e4c4467..0000000000000
--- a/CUDADataFormats/Track/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Track CUDA Data Formats
-
-`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for
-storing information about `Track`s created during the Pixel-local Reconstruction
-chain. It stores data in an SoA manner. It combines the data contained in the
-deprecated `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` classes. 
-
-The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`,
-while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h`
-
-Both formats use the same SoA Layout (`TrackSoAHeterogeneousLayout`) which is generated
-via the `GENERATE_SOA_LAYOUT` macro in the `PixelTrackUtilities.h` file.
-
-## Notes
-
--`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the
-layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA
-`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size)
-is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this.
-- Host and Device classes should **not** be created via inheritance, as they're done here,
-but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309).
-
-## TrackSoAHeterogeneousHost
-
-The version of the data format to be used for storing `Track` data on the CPU. 
-Instances of this class are to be used for:
-
-- Having a place to copy data to host from device, via `cudaMemcpy`, or
-- Running host-side algorithms using data stored in an SoA manner.
-
-## TrackSoAHeterogeneousDevice
-
-The version of the data format to be used for storing `Track` data on the GPU.
-
-Instances of `TrackSoAHeterogeneousDevice` are to be created on host and be
-used on device only. To do so, the instance's `view()` method is to be called
-to pass a `View` to any kernel launched. Accessing data from the `view()` is not
-possible on the host side.
-
-## Utilities
-
-`PixelTrackUtilities.h` contains a collection of methods which were originally
-defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT`
-which have been adapted to operate on `View` instances, so that they are callable
-from within `__global__` kernels, on both CPU and CPU. 
-
-## Use case
-
-See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation,
-processing and copying from device to host.
diff --git a/CUDADataFormats/Track/interface/PixelTrackUtilities.h b/CUDADataFormats/Track/interface/PixelTrackUtilities.h
deleted file mode 100644
index 6d7ea258be8d2..0000000000000
--- a/CUDADataFormats/Track/interface/PixelTrackUtilities.h
+++ /dev/null
@@ -1,243 +0,0 @@
-#ifndef CUDADataFormats_Track_PixelTrackUtilities_h
-#define CUDADataFormats_Track_PixelTrackUtilities_h
-
-#include <Eigen/Dense>
-#include <Eigen/Core>
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "DataFormats/SoATemplate/interface/SoALayout.h"
-
-namespace pixelTrack {
-
-  enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality };
-  constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)};
-  const std::string qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"};
-  inline Quality qualityByName(std::string const &name) {
-    auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName;
-    return static_cast<Quality>(qp);
-  }
-
-}  // namespace pixelTrack
-
-template <typename TrackerTraits>
-struct TrackSoA {
-  static constexpr int32_t S = TrackerTraits::maxNumberOfTuples;
-  static constexpr int32_t H = TrackerTraits::avgHitsPerTrack;
-  // Aliases in order to not confuse the GENERATE_SOA_LAYOUT
-  // macro with weird colons and angled brackets.
-  using Vector5f = Eigen::Matrix<float, 5, 1>;
-  using Vector15f = Eigen::Matrix<float, 15, 1>;
-  using Quality = pixelTrack::Quality;
-
-  using hindex_type = uint32_t;
-
-  using HitContainer = cms::cuda::OneToManyAssoc<hindex_type, S + 1, H * S>;
-
-  GENERATE_SOA_LAYOUT(TrackSoALayout,
-                      SOA_COLUMN(Quality, quality),
-                      SOA_COLUMN(float, chi2),
-                      SOA_COLUMN(int8_t, nLayers),
-                      SOA_COLUMN(float, eta),
-                      SOA_COLUMN(float, pt),
-                      SOA_EIGEN_COLUMN(Vector5f, state),
-                      SOA_EIGEN_COLUMN(Vector15f, covariance),
-                      SOA_SCALAR(int, nTracks),
-                      SOA_SCALAR(HitContainer, hitIndices),
-                      SOA_SCALAR(HitContainer, detIndices))
-};
-
-// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods.
-
-template <typename TrackerTraits>
-struct TracksUtilities {
-  using TrackSoAView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::View;
-  using TrackSoAConstView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::ConstView;
-  using hindex_type = typename TrackSoA<TrackerTraits>::hindex_type;
-
-  // State at the Beam spot
-  // phi,tip,1/pt,cotan(theta),zip
-  static __host__ __device__ inline float charge(const TrackSoAConstView &tracks, int32_t i) {
-    return std::copysign(1.f, tracks[i].state()(2));
-  }
-
-  static constexpr __host__ __device__ inline float phi(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(0);
-  }
-
-  static constexpr __host__ __device__ inline float tip(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(1);
-  }
-
-  static constexpr __host__ __device__ inline float zip(const TrackSoAConstView &tracks, int32_t i) {
-    return tracks[i].state()(4);
-  }
-
-  static constexpr __host__ __device__ inline bool isTriplet(const TrackSoAConstView &tracks, int i) {
-    return tracks[i].nLayers() == 3;
-  }
-
-  template <typename V3, typename M3, typename V2, typename M2>
-  static constexpr __host__ __device__ inline void copyFromCircle(
-      TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) {
-    tracks[i].state() << cp.template cast<float>(), lp.template cast<float>();
-
-    tracks[i].state()(2) = tracks[i].state()(2) * b;
-    auto cov = tracks[i].covariance();
-    cov(0) = ccov(0, 0);
-    cov(1) = ccov(0, 1);
-    cov(2) = b * float(ccov(0, 2));
-    cov(4) = cov(3) = 0;
-    cov(5) = ccov(1, 1);
-    cov(6) = b * float(ccov(1, 2));
-    cov(8) = cov(7) = 0;
-    cov(9) = b * b * float(ccov(2, 2));
-    cov(11) = cov(10) = 0;
-    cov(12) = lcov(0, 0);
-    cov(13) = lcov(0, 1);
-    cov(14) = lcov(1, 1);
-  }
-
-  template <typename V5, typename M5>
-  static constexpr __host__ __device__ inline void copyFromDense(TrackSoAView &tracks,
-                                                                 V5 const &v,
-                                                                 M5 const &cov,
-                                                                 int32_t i) {
-    tracks[i].state() = v.template cast<float>();
-    for (int j = 0, ind = 0; j < 5; ++j)
-      for (auto k = j; k < 5; ++k)
-        tracks[i].covariance()(ind++) = cov(j, k);
-  }
-
-  template <typename V5, typename M5>
-  static constexpr __host__ __device__ inline void copyToDense(const TrackSoAConstView &tracks,
-                                                               V5 &v,
-                                                               M5 &cov,
-                                                               int32_t i) {
-    v = tracks[i].state().template cast<typename V5::Scalar>();
-    for (int j = 0, ind = 0; j < 5; ++j) {
-      cov(j, j) = tracks[i].covariance()(ind++);
-      for (auto k = j + 1; k < 5; ++k)
-        cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++);
-    }
-  }
-
-  static constexpr __host__ __device__ inline int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i) {
-    auto pdet = tracks.detIndices().begin(i);
-    int nl = 1;
-    auto ol = pixelTopology::getLayer<TrackerTraits>(*pdet);
-    for (; pdet < tracks.detIndices().end(i); ++pdet) {
-      auto il = pixelTopology::getLayer<TrackerTraits>(*pdet);
-      if (il != ol)
-        ++nl;
-      ol = il;
-    }
-    return nl;
-  }
-
-  static constexpr __host__ __device__ inline int nHits(const TrackSoAConstView &tracks, int i) {
-    return tracks.detIndices().size(i);
-  }
-};
-
-namespace pixelTrack {
-
-  template <typename TrackerTraits, typename Enable = void>
-  struct QualityCutsT {};
-
-  template <typename TrackerTraits>
-  struct QualityCutsT<TrackerTraits, pixelTopology::isPhase1Topology<TrackerTraits>> {
-    using TrackSoAView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::View;
-    using TrackSoAConstView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::ConstView;
-    using tracksHelper = TracksUtilities<TrackerTraits>;
-    // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3])))
-    float chi2Coeff[4];
-    float chi2MaxPt;  // GeV
-    float chi2Scale;
-
-    struct Region {
-      float maxTip;  // cm
-      float minPt;   // GeV
-      float maxZip;  // cm
-    };
-
-    Region triplet;
-    Region quadruplet;
-
-    __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
-      // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip)
-      // default cuts:
-      //   - for triplets:    |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm
-      //   - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm
-      // (see CAHitNtupletGeneratorGPU.cc)
-      auto const &region = (nHits > 3) ? quadruplet : triplet;
-      return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and
-             (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip);
-    }
-
-    __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const {
-      auto roughLog = [](float x) {
-        // max diff [0.5,12] at 1.25 0.16143
-        // average diff  0.0662998
-        union IF {
-          uint32_t i;
-          float f;
-        };
-        IF z;
-        z.f = x;
-        uint32_t lsb = 1 < 21;
-        z.i += lsb;
-        z.i >>= 21;
-        auto f = z.i & 3;
-        int ex = int(z.i >> 2) - 127;
-
-        // log2(1+0.25*f)
-        // averaged over bins
-        const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f};
-        return float(ex) + frac[f];
-      };
-
-      float pt = std::min<float>(tracks.pt(it), chi2MaxPt);
-      float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]);
-      if (tracks.chi2(it) >= chi2Cut) {
-#ifdef NTUPLE_FIT_DEBUG
-        printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it));
-#endif
-        return true;
-      }
-      return false;
-    }
-  };
-
-  template <typename TrackerTraits>
-  struct QualityCutsT<TrackerTraits, pixelTopology::isPhase2Topology<TrackerTraits>> {
-    using TrackSoAView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::View;
-    using TrackSoAConstView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::ConstView;
-    using tracksHelper = TracksUtilities<TrackerTraits>;
-
-    float maxChi2;
-    float minPt;
-    float maxTip;
-    float maxZip;
-
-    __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const {
-      return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and
-             (std::abs(tracksHelper::zip(tracks, it)) < maxZip);
-    }
-    __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const {
-      return tracks.chi2(it) >= maxChi2;
-    }
-  };
-
-}  // namespace pixelTrack
-
-template <typename TrackerTraits>
-using TrackLayout = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>;
-template <typename TrackerTraits>
-using TrackSoAView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::View;
-template <typename TrackerTraits>
-using TrackSoAConstView = typename TrackSoA<TrackerTraits>::template TrackSoALayout<>::ConstView;
-
-template struct TracksUtilities<pixelTopology::Phase1>;
-template struct TracksUtilities<pixelTopology::Phase2>;
-
-#endif
diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h
deleted file mode 100644
index 04d286a767ab0..0000000000000
--- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef CUDADataFormats_Track_TrackHeterogeneousDevice_H
-#define CUDADataFormats_Track_TrackHeterogeneousDevice_H
-
-#include <cstdint>
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
-
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-
-// TODO: The class is created via inheritance of the PortableDeviceCollection.
-// This is generally discouraged, and should be done via composition.
-// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
-template <typename TrackerTraits>
-class TrackSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>> {
-public:
-  using cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>>::view;
-  using cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>>::const_view;
-  using cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>>::buffer;
-  using cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>>::bufferSize;
-
-  TrackSoAHeterogeneousDevice() = default;  // cms::cuda::Product needs this
-
-  // Constructor which specifies the SoA size
-  explicit TrackSoAHeterogeneousDevice(cudaStream_t stream)
-      : cms::cuda::PortableDeviceCollection<TrackLayout<TrackerTraits>>(TrackerTraits::maxNumberOfTuples, stream) {}
-};
-
-namespace pixelTrack {
-
-  using TrackSoADevicePhase1 = TrackSoAHeterogeneousDevice<pixelTopology::Phase1>;
-  using TrackSoADevicePhase2 = TrackSoAHeterogeneousDevice<pixelTopology::Phase2>;
-  using TrackSoADeviceHIonPhase1 = TrackSoAHeterogeneousDevice<pixelTopology::HIonPhase1>;
-
-}  // namespace pixelTrack
-
-#endif  // CUDADataFormats_Track_TrackHeterogeneousT_H
diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h
deleted file mode 100644
index 39e83491e1769..0000000000000
--- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef CUDADataFormats_Track_TrackHeterogeneousHost_H
-#define CUDADataFormats_Track_TrackHeterogeneousHost_H
-
-#include <cstdint>
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
-
-// TODO: The class is created via inheritance of the PortableHostCollection.
-// This is generally discouraged, and should be done via composition.
-// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
-template <typename TrackerTraits>
-class TrackSoAHeterogeneousHost : public cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>> {
-public:
-  static constexpr int32_t S = TrackerTraits::maxNumberOfTuples;  //TODO: this could be made configurable at runtime
-  explicit TrackSoAHeterogeneousHost() : cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>(S) {}
-
-  using cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>::view;
-  using cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>::const_view;
-  using cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>::buffer;
-  using cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>::bufferSize;
-
-  // Constructor which specifies the SoA size
-  explicit TrackSoAHeterogeneousHost(cudaStream_t stream)
-      : cms::cuda::PortableHostCollection<TrackLayout<TrackerTraits>>(S, stream) {}
-};
-
-namespace pixelTrack {
-
-  using TrackSoAHostPhase1 = TrackSoAHeterogeneousHost<pixelTopology::Phase1>;
-  using TrackSoAHostPhase2 = TrackSoAHeterogeneousHost<pixelTopology::Phase2>;
-  using TrackSoAHostHIonPhase1 = TrackSoAHeterogeneousHost<pixelTopology::HIonPhase1>;
-}  // namespace pixelTrack
-
-#endif  // CUDADataFormats_Track_TrackHeterogeneousT_H
diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml
deleted file mode 100644
index 32256c87ed577..0000000000000
--- a/CUDADataFormats/Track/test/BuildFile.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<use name="HeterogeneousCore/CUDAUtilities"/>
-
-<use name="CUDADataFormats/Track"/>
-
-<bin file="TrajectoryStateSOA_t.cpp" name="cpuTrajectoryStateSOA_t">
-  <use name="eigen"/>
-  <flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
-  <flags CXXFLAGS="-g -DGPU_DEBUG"/>
-</bin>
-<iftool name="cuda-gcc-support">
-  <bin file="TrackSoAHeterogeneous_test.cpp TrackSoAHeterogeneous_test.cu" name="TrackSoAHeterogeneous_test">
-	<use name="eigen"/>
-	<flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
-	<flags CXXFLAGS="-g -DGPU_DEBUG"/>
-  </bin>
-  <bin file="TrajectoryStateSOA_t.cu" name="gpuTrajectoryStateSOA_t">
-	<use name="eigen"/>	
-	<flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
-	<flags CXXFLAGS="-g -DGPU_DEBUG"/>	
-  </bin>
-  
-</iftool>
diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp
deleted file mode 100644
index dafa75e2e18d7..0000000000000
--- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
-   Simple test for the pixelTrack::TrackSoA data structure
-   which inherits from PortableDeviceCollection.
-
-   Creates an instance of the class (automatically allocates
-   memory on device), passes the view of the SoA data to
-   the CUDA kernels which:
-   - Fill the SoA with data.
-   - Verify that the data written is correct.
-
-   Then, the SoA data are copied back to Host, where
-   a temporary host-side view (tmp_view) is created using
-   the same Layout to access the data on host and print it.
- */
-
-#include <cstdint>
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-
-namespace testTrackSoA {
-
-  template <typename TrackerTraits>
-  void runKernels(TrackSoAView<TrackerTraits> &tracks_view, cudaStream_t stream);
-}
-
-int main() {
-  cms::cudatest::requireDevices();
-
-  cudaStream_t stream;
-  cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
-
-  // Inner scope to deallocate memory before destroying the stream
-  {
-    // Instantiate tracks on device. PortableDeviceCollection allocates
-    // SoA on device automatically.
-    TrackSoAHeterogeneousDevice<pixelTopology::Phase1> tracks_d(stream);
-    testTrackSoA::runKernels<pixelTopology::Phase1>(tracks_d.view(), stream);
-
-    // Instantate tracks on host. This is where the data will be
-    // copied to from device.
-    TrackSoAHeterogeneousHost<pixelTopology::Phase1> tracks_h(stream);
-
-    cudaCheck(cudaMemcpyAsync(
-        tracks_h.buffer().get(), tracks_d.const_buffer().get(), tracks_d.bufferSize(), cudaMemcpyDeviceToHost, stream));
-    cudaCheck(cudaStreamSynchronize(stream));
-
-    // Print results
-    std::cout << "pt"
-              << "\t"
-              << "eta"
-              << "\t"
-              << "chi2"
-              << "\t"
-              << "quality"
-              << "\t"
-              << "nLayers"
-              << "\t"
-              << "hitIndices off" << std::endl;
-
-    for (int i = 0; i < 10; ++i) {
-      std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2()
-                << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t"
-                << tracks_h.view().hitIndices().off[i] << std::endl;
-    }
-  }
-  cudaCheck(cudaStreamDestroy(stream));
-
-  return 0;
-}
diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu
deleted file mode 100644
index 8e8595eb43e94..0000000000000
--- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/OneToManyAssoc.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-
-namespace testTrackSoA {
-
-  // Kernel which fills the TrackSoAView with data
-  // to test writing to it
-  template <typename TrackerTraits>
-  __global__ void fill(TrackSoAView<TrackerTraits> tracks_view) {
-    int i = threadIdx.x;
-    if (i == 0) {
-      tracks_view.nTracks() = 420;
-    }
-
-    for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) {
-      tracks_view[j].pt() = (float)j;
-      tracks_view[j].eta() = (float)j;
-      tracks_view[j].chi2() = (float)j;
-      tracks_view[j].quality() = (pixelTrack::Quality)(j % 256);
-      tracks_view[j].nLayers() = j % 128;
-      tracks_view.hitIndices().off[j] = j;
-    }
-  }
-
-  // Kernel which reads from the TrackSoAView to verify
-  // that it was written correctly from the fill kernel
-  template <typename TrackerTraits>
-  __global__ void verify(TrackSoAConstView<TrackerTraits> tracks_view) {
-    int i = threadIdx.x;
-
-    if (i == 0) {
-      printf("SoA size: % d, block dims: % d\n", tracks_view.metadata().size(), blockDim.x);
-      assert(tracks_view.nTracks() == 420);
-    }
-    for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) {
-      assert(abs(tracks_view[j].pt() - (float)j) < .0001);
-      assert(abs(tracks_view[j].eta() - (float)j) < .0001);
-      assert(abs(tracks_view[j].chi2() - (float)j) < .0001);
-      assert(tracks_view[j].quality() == (pixelTrack::Quality)(j % 256));
-      assert(tracks_view[j].nLayers() == j % 128);
-      assert(tracks_view.hitIndices().off[j] == j);
-    }
-  }
-
-  // Host function which invokes the two kernels above
-  template <typename TrackerTraits>
-  void runKernels(TrackSoAView<TrackerTraits>& tracks_view, cudaStream_t stream) {
-    fill<TrackerTraits><<<1, 1024, 0, stream>>>(tracks_view);
-    cudaCheck(cudaGetLastError());
-    cudaCheck(cudaDeviceSynchronize());
-
-    verify<TrackerTraits><<<1, 1024, 0, stream>>>(tracks_view);
-    cudaCheck(cudaGetLastError());
-    cudaCheck(cudaDeviceSynchronize());
-  }
-
-  template void runKernels<pixelTopology::Phase1>(TrackSoAView<pixelTopology::Phase1>& tracks_view,
-                                                  cudaStream_t stream);
-  template void runKernels<pixelTopology::Phase2>(TrackSoAView<pixelTopology::Phase2>& tracks_view,
-                                                  cudaStream_t stream);
-
-}  // namespace testTrackSoA
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp
deleted file mode 100644
index d6ff539a642b0..0000000000000
--- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "TrajectoryStateSOA_t.h"
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu
deleted file mode 100644
index d6ff539a642b0..0000000000000
--- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "TrajectoryStateSOA_t.h"
diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
deleted file mode 100644
index 6ba0eaa5c986e..0000000000000
--- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-
-using Vector5d = Eigen::Matrix<double, 5, 1>;
-using Matrix5d = Eigen::Matrix<double, 5, 5>;
-using helper = TracksUtilities<pixelTopology::Phase1>;
-
-__host__ __device__ Matrix5d loadCov(Vector5d const& e) {
-  Matrix5d cov;
-  for (int i = 0; i < 5; ++i)
-    cov(i, i) = e(i) * e(i);
-  for (int i = 0; i < 5; ++i) {
-    for (int j = 0; j < i; ++j) {
-      double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j));  // this makes the matrix pos defined
-      cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v;
-      cov(j, i) = cov(i, j);
-    }
-  }
-  return cov;
-}
-
-template <typename TrackerTraits>
-__global__ void testTSSoA(TrackSoAView<TrackerTraits> ts) {
-  Vector5d par0;
-  par0 << 0.2, 0.1, 3.5, 0.8, 0.1;
-  Vector5d e0;
-  e0 << 0.01, 0.01, 0.035, -0.03, -0.01;
-  auto cov0 = loadCov(e0);
-
-  int first = threadIdx.x + blockIdx.x * blockDim.x;
-
-  for (int i = first; i < ts.metadata().size(); i += blockDim.x * gridDim.x) {
-    helper::copyFromDense(ts, par0, cov0, i);
-    Vector5d par1;
-    Matrix5d cov1;
-    helper::copyToDense(ts, par1, cov1, i);
-    Vector5d delV = par1 - par0;
-    Matrix5d delM = cov1 - cov0;
-    for (int j = 0; j < 5; ++j) {
-      assert(std::abs(delV(j)) < 1.e-5);
-      for (auto k = j; k < 5; ++k) {
-        assert(cov0(k, j) == cov0(j, k));
-        assert(cov1(k, j) == cov1(j, k));
-        assert(std::abs(delM(k, j)) < 1.e-5);
-      }
-    }
-  }
-}
-
-#ifdef __CUDACC__
-#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#endif
-
-int main() {
-#ifdef __CUDACC__
-  cms::cudatest::requireDevices();
-  cudaStream_t stream;
-  cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
-#endif
-
-#ifdef __CUDACC__
-  // Since we are going to copy data from ts_d to ts_h, we
-  // need to initialize the Host collection with a stream.
-  TrackSoAHeterogeneousHost<pixelTopology::Phase1> ts_h(stream);
-  TrackSoAHeterogeneousDevice<pixelTopology::Phase1> ts_d(stream);
-#else
-  // If CUDA is not available, Host collection must not be initialized
-  // with a stream.
-  TrackSoAHeterogeneousHost<pixelTopology::Phase1> ts_h;
-#endif
-
-#ifdef __CUDACC__
-  testTSSoA<pixelTopology::Phase1><<<1, 64, 0, stream>>>(ts_d.view());
-  cudaCheck(cudaGetLastError());
-  cudaCheck(cudaMemcpyAsync(
-      ts_h.buffer().get(), ts_d.const_buffer().get(), ts_d.bufferSize(), cudaMemcpyDeviceToHost, stream));
-  cudaCheck(cudaGetLastError());
-  cudaCheck(cudaStreamSynchronize(stream));
-#else
-  testTSSoA<pixelTopology::Phase1>(ts_h.view());
-#endif
-}
diff --git a/CUDADataFormats/Vertex/BuildFile.xml b/CUDADataFormats/Vertex/BuildFile.xml
deleted file mode 100644
index c6b918ec4b12b..0000000000000
--- a/CUDADataFormats/Vertex/BuildFile.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-<use name="cuda"/>
-<use name="eigen"/>
-<use name="rootcore"/>
-<use name="CUDADataFormats/Common"/>
-<use name="DataFormats/Common"/>
-<use name="DataFormats/SoATemplate" source_only="1"/>
-<use name="HeterogeneousCore/CUDAUtilities"/>
-<export>
-    <lib name="1"/>
-</export>
diff --git a/CUDADataFormats/Vertex/README.md b/CUDADataFormats/Vertex/README.md
deleted file mode 100644
index 3e495d15f776e..0000000000000
--- a/CUDADataFormats/Vertex/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Vertex CUDA Data Formats
-
-`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for
-storing information about vertices created during the Pixel-local Reconstruction
-chain. It stores data in an SoA manner. It contains the data that was previously
-contained in the deprecated `ZVertexSoA` class. 
-
-The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`,
-while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h`
-
-Both formats use the same SoA Layout (`ZVertexSoAHeterogeneousLayout`) which is generated
-via the `GENERATE_SOA_LAYOUT` macro in the `ZVertexUtilities.h` file.
-
-## Notes
-
-- Initially, `ZVertexSoA` had distinct array sizes for each attribute (e.g. `zv` was `MAXVTX` elements 
-long, `ndof` was `MAXTRACKS` elements long). All columns are now of uniform `MAXTRACKS` size, 
-meaning that there will be some wasted space (appx. 190kB). 
-- Host and Device classes should **not** be created via inheritance, as they're done here,
-but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309).
-
-## ZVertexHeterogeneousHost
-
-The version of the data format to be used for storing vertex data on the CPU. 
-Instances of this class are to be used for:
-
-- Having a place to copy data to host from device, via `cudaMemcpy`, or
-- Running host-side algorithms using data stored in an SoA manner.
-
-## ZVertexHeterogeneousDevice
-
-The version of the data format to be used for storing vertex data on the GPU.
-
-Instances of `ZVertexHeterogeneousDevice` are to be created on host and be
-used on device only. To do so, the instance's `view()` method is to be called
-to pass a `View` to any kernel launched. Accessing data from the `view()` is not
-possible on the host side.
-
-## Utilities
-
-Apart from `ZVertexSoAHeterogeneousLayout`, `ZVertexUtilities.h` also contains
-a collection of methods which were originally
-defined as class methods inside the `ZVertexSoA` class
-which have been adapted to operate on `View` instances, so that they are callable
-from within `__global__` kernels, on both CPU and CPU. 
diff --git a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h b/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h
deleted file mode 100644
index 417a960951fb1..0000000000000
--- a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef CUDADataFormatsVertexZVertexHeterogeneous_H
-#define CUDADataFormatsVertexZVertexHeterogeneous_H
-
-#include "CUDADataFormats/Vertex/interface/ZVertexSoA.h"
-#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h"
-
-using ZVertexHeterogeneous = HeterogeneousSoA<ZVertexSoA>;
-#ifndef __CUDACC__
-#include "CUDADataFormats/Common/interface/Product.h"
-using ZVertexCUDAProduct = cms::cuda::Product<ZVertexHeterogeneous>;
-#endif
-
-#endif
diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoA.h b/CUDADataFormats/Vertex/interface/ZVertexSoA.h
deleted file mode 100644
index 95106050f3d7a..0000000000000
--- a/CUDADataFormats/Vertex/interface/ZVertexSoA.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef CUDADataFormats_Vertex_ZVertexSoA_h
-#define CUDADataFormats_Vertex_ZVertexSoA_h
-
-#include <cstdint>
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
-
-// SOA for vertices
-// These vertices are clusterized and fitted only along the beam line (z)
-// to obtain their global coordinate the beam spot position shall be added (eventually correcting for the beam angle as well)
-struct ZVertexSoA {
-  static constexpr uint32_t MAXTRACKS = 128 * 1024;
-  static constexpr uint32_t MAXVTX = 1024;
-
-  int16_t idv[MAXTRACKS];    // vertex index for each associated (original) track  (-1 == not associate)
-  float zv[MAXVTX];          // output z-posistion of found vertices
-  float wv[MAXVTX];          // output weight (1/error^2) on the above
-  float chi2[MAXVTX];        // vertices chi2
-  float ptv2[MAXVTX];        // vertices pt^2
-  int32_t ndof[MAXTRACKS];   // vertices number of dof (reused as workspace for the number of nearest neighbours FIXME)
-  uint16_t sortInd[MAXVTX];  // sorted index (by pt2)  ascending
-  uint32_t nvFinal;          // the number of vertices
-
-  __host__ __device__ void init() { nvFinal = 0; }
-};
-
-#endif  // CUDADataFormats_Vertex_ZVertexSoA_h
diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h
deleted file mode 100644
index ae662d7fd5f9a..0000000000000
--- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H
-#define CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H
-
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
-
-// TODO: The class is created via inheritance of the PortableDeviceCollection.
-// This is generally discouraged, and should be done via composition.
-// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
-template <int32_t S>
-class ZVertexSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection<ZVertexSoAHeterogeneousLayout<>> {
-public:
-  ZVertexSoAHeterogeneousDevice() = default;  // cms::cuda::Product needs this
-
-  // Constructor which specifies the SoA size
-  explicit ZVertexSoAHeterogeneousDevice(cudaStream_t stream)
-      : PortableDeviceCollection<ZVertexSoAHeterogeneousLayout<>>(S, stream) {}
-};
-
-using ZVertexSoADevice = ZVertexSoAHeterogeneousDevice<zVertex::utilities::MAXTRACKS>;
-
-#endif  // CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H
diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h
deleted file mode 100644
index 6b62d615e1d11..0000000000000
--- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H
-#define CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H
-
-#include <cstdint>
-
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
-
-// TODO: The class is created via inheritance of the PortableHostCollection.
-// This is generally discouraged, and should be done via composition.
-// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306
-template <int32_t S>
-class ZVertexSoAHeterogeneousHost : public cms::cuda::PortableHostCollection<ZVertexSoAHeterogeneousLayout<>> {
-public:
-  explicit ZVertexSoAHeterogeneousHost() : cms::cuda::PortableHostCollection<ZVertexSoAHeterogeneousLayout<>>(S) {}
-
-  // Constructor which specifies the SoA size and CUDA stream
-  explicit ZVertexSoAHeterogeneousHost(cudaStream_t stream)
-      : PortableHostCollection<ZVertexSoAHeterogeneousLayout<>>(S, stream) {}
-};
-
-using ZVertexSoAHost = ZVertexSoAHeterogeneousHost<zVertex::utilities::MAXTRACKS>;
-
-#endif  // CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H
diff --git a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h b/CUDADataFormats/Vertex/interface/ZVertexUtilities.h
deleted file mode 100644
index 2403652377971..0000000000000
--- a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef CUDADataFormats_Vertex_ZVertexUtilities_h
-#define CUDADataFormats_Vertex_ZVertexUtilities_h
-
-#include <cuda_runtime.h>
-#include "DataFormats/SoATemplate/interface/SoALayout.h"
-
-GENERATE_SOA_LAYOUT(ZVertexSoAHeterogeneousLayout,
-                    SOA_COLUMN(int16_t, idv),
-                    SOA_COLUMN(float, zv),
-                    SOA_COLUMN(float, wv),
-                    SOA_COLUMN(float, chi2),
-                    SOA_COLUMN(float, ptv2),
-                    SOA_COLUMN(int32_t, ndof),
-                    SOA_COLUMN(uint16_t, sortInd),
-                    SOA_SCALAR(uint32_t, nvFinal))
-
-// Previous ZVertexSoA class methods.
-// They operate on View and ConstView of the ZVertexSoA.
-namespace zVertex {
-  // Common types for both Host and Device code
-  using ZVertexSoALayout = ZVertexSoAHeterogeneousLayout<>;
-  using ZVertexSoAView = ZVertexSoAHeterogeneousLayout<>::View;
-  using ZVertexSoAConstView = ZVertexSoAHeterogeneousLayout<>::ConstView;
-
-  namespace utilities {
-
-    static constexpr uint32_t MAXTRACKS = 128 * 1024;
-    static constexpr uint32_t MAXVTX = 1024;
-
-    __host__ __device__ inline void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; }
-
-  }  // namespace utilities
-}  // namespace zVertex
-
-#endif
diff --git a/RecoTauTag/HLTProducers/BuildFile.xml b/RecoTauTag/HLTProducers/BuildFile.xml
index 79cdf947dd105..865bdb508e891 100644
--- a/RecoTauTag/HLTProducers/BuildFile.xml
+++ b/RecoTauTag/HLTProducers/BuildFile.xml
@@ -1,20 +1,18 @@
-<use name="FWCore/Framework"/>
-<use name="FWCore/ParameterSet"/>
-<use name="DataFormats/Common"/>
+<use name="root"/>
 <use name="DataFormats/BTauReco"/>
-<use name="DataFormats/TauReco"/>
-<use name="DataFormats/TrackReco"/>
-<use name="DataFormats/Math"/>
+<use name="DataFormats/CaloTowers"/>
 <use name="DataFormats/Candidate"/>
+<use name="DataFormats/Common"/>
+<use name="DataFormats/HLTReco"/>
 <use name="DataFormats/JetReco"/>
-<use name="DataFormats/CaloTowers"/>
-<use name="DataFormats/VertexReco"/>
 <use name="DataFormats/L1Trigger"/>
-<use name="DataFormats/HLTReco"/>
+<use name="DataFormats/Math"/>
+<use name="DataFormats/TauReco"/>
+<use name="DataFormats/TrackReco"/>
+<use name="DataFormats/VertexReco"/>
+<use name="FWCore/Framework"/>
+<use name="FWCore/ParameterSet"/>
 <use name="HLTrigger/HLTcore"/>
-<use name="RecoTracker/TkTrackingRegions"/>
-<use name="CUDADataFormats/Track"/>
-<use name="CUDADataFormats/Vertex"/>
 <use name="PhysicsTools/TensorFlow"/>
-<use name="root"/>
+<use name="RecoTracker/TkTrackingRegions"/>
 <flags EDM_PLUGIN="1"/>
diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc
deleted file mode 100644
index 051766a05606b..0000000000000
--- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc
+++ /dev/null
@@ -1,822 +0,0 @@
-/*
- * \class L2TauTagProducer
- *
- * L2Tau identification using Convolutional NN.
- *
- * \author Valeria D'Amante, Università di Siena and INFN Pisa
- *         Konstantin Androsov, EPFL and ETHZ
-*/
-#include <memory>
-#include <boost/property_tree/json_parser.hpp>
-#include <cmath>
-
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/Frameworkfwd.h"
-#include "FWCore/MessageLogger/interface/MessageLogger.h"
-#include "DataFormats/Math/interface/deltaR.h"
-#include "DataFormats/Common/interface/Handle.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/Utilities/interface/isFinite.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
-#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h"
-#include "Geometry/CaloGeometry/interface/CaloGeometry.h"
-#include "Geometry/CaloTopology/interface/HcalTopology.h"
-#include "Geometry/Records/interface/CaloGeometryRecord.h"
-#include "DataFormats/CaloRecHit/interface/CaloRecHit.h"
-#include "DataFormats/EcalRecHit/interface/EcalRecHit.h"
-#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h"
-#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h"
-#include "DataFormats/HcalDetId/interface/HcalDetId.h"
-#include "DataFormats/HcalRecHit/interface/HBHERecHit.h"
-#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h"
-#include "DataFormats/HcalRecHit/interface/HFRecHit.h"
-#include "DataFormats/HcalRecHit/interface/HORecHit.h"
-#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h"
-#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h"
-#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h"
-#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h"
-#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h"
-#include "DataFormats/TrackReco/interface/HitPattern.h"
-#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h"
-#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h"
-#include "DataFormats/GeometrySurface/interface/Plane.h"
-#include "DataFormats/BeamSpot/interface/BeamSpot.h"
-#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h"
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-
-namespace L2TauTagNNv1 {
-  constexpr int nCellEta = 5;
-  constexpr int nCellPhi = 5;
-  constexpr int nVars = 31;
-  constexpr float dR_max = 0.5;
-  enum class NNInputs {
-    nVertices = 0,
-    l1Tau_pt,
-    l1Tau_eta,
-    l1Tau_hwIso,
-    EcalEnergySum,
-    EcalSize,
-    EcalEnergyStdDev,
-    EcalDeltaEta,
-    EcalDeltaPhi,
-    EcalChi2,
-    EcalEnergySumForPositiveChi2,
-    EcalSizeForPositiveChi2,
-    HcalEnergySum,
-    HcalSize,
-    HcalEnergyStdDev,
-    HcalDeltaEta,
-    HcalDeltaPhi,
-    HcalChi2,
-    HcalEnergySumForPositiveChi2,
-    HcalSizeForPositiveChi2,
-    PatatrackPtSum,
-    PatatrackSize,
-    PatatrackSizeWithVertex,
-    PatatrackPtSumWithVertex,
-    PatatrackChargeSum,
-    PatatrackDeltaEta,
-    PatatrackDeltaPhi,
-    PatatrackChi2OverNdof,
-    PatatrackNdof,
-    PatatrackDxy,
-    PatatrackDz
-  };
-
-  const std::map<NNInputs, std::string> varNameMap = {
-      {NNInputs::nVertices, "nVertices"},
-      {NNInputs::l1Tau_pt, "l1Tau_pt"},
-      {NNInputs::l1Tau_eta, "l1Tau_eta"},
-      {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"},
-      {NNInputs::EcalEnergySum, "EcalEnergySum"},
-      {NNInputs::EcalSize, "EcalSize"},
-      {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"},
-      {NNInputs::EcalDeltaEta, "EcalDeltaEta"},
-      {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"},
-      {NNInputs::EcalChi2, "EcalChi2"},
-      {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"},
-      {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"},
-      {NNInputs::HcalEnergySum, "HcalEnergySum"},
-      {NNInputs::HcalSize, "HcalSize"},
-      {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"},
-      {NNInputs::HcalDeltaEta, "HcalDeltaEta"},
-      {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"},
-      {NNInputs::HcalChi2, "HcalChi2"},
-      {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"},
-      {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"},
-      {NNInputs::PatatrackPtSum, "PatatrackPtSum"},
-      {NNInputs::PatatrackSize, "PatatrackSize"},
-      {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"},
-      {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"},
-      {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"},
-      {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"},
-      {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"},
-      {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"},
-      {NNInputs::PatatrackNdof, "PatatrackNdof"},
-      {NNInputs::PatatrackDxy, "PatatrackDxy"},
-      {NNInputs::PatatrackDz, "PatatrackDz"}};
-}  // namespace L2TauTagNNv1
-namespace {
-  inline float& getCellImpl(
-      tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) {
-    return cellGridMatrix.tensor<float, 4>()(tau_idx, phi_idx, eta_idx, static_cast<int>(NNInput_idx));
-  }
-}  // namespace
-struct normDictElement {
-  float mean;
-  float std;
-  float min;
-  float max;
-};
-
-struct L2TauNNProducerCacheData {
-  L2TauNNProducerCacheData() : graphDef(nullptr), session(nullptr) {}
-  tensorflow::GraphDef* graphDef;
-  tensorflow::Session* session;
-  std::vector<normDictElement> normVec;
-};
-
-class L2TauNNProducer : public edm::stream::EDProducer<edm::GlobalCache<L2TauNNProducerCacheData>> {
-public:
-  using TrackSoAHost = pixelTrack::TrackSoAHostPhase1;
-
-  struct caloRecHitCollections {
-    const HBHERecHitCollection* hbhe;
-    const HORecHitCollection* ho;
-    const EcalRecHitCollection* eb;
-    const EcalRecHitCollection* ee;
-    const CaloGeometry* geometry;
-  };
-
-  struct InputDescTau {
-    std::string CollectionName;
-    edm::EDGetTokenT<trigger::TriggerFilterObjectWithRefs> inputToken_;
-  };
-
-  static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max;
-  static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast<float>(L2TauTagNNv1::nCellEta);
-  static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast<float>(L2TauTagNNv1::nCellPhi);
-
-  explicit L2TauNNProducer(const edm::ParameterSet&, const L2TauNNProducerCacheData*);
-  static void fillDescriptions(edm::ConfigurationDescriptions&);
-  static std::unique_ptr<L2TauNNProducerCacheData> initializeGlobalCache(const edm::ParameterSet&);
-  static void globalEndJob(L2TauNNProducerCacheData*);
-
-private:
-  void checknan(tensorflow::Tensor& tensor, int debugLevel);
-  void standardizeTensor(tensorflow::Tensor& tensor);
-  std::vector<float> getTauScore(const tensorflow::Tensor& cellGridMatrix);
-  void produce(edm::Event& event, const edm::EventSetup& eventsetup) override;
-  void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector<l1t::TauRef>& allTaus);
-  void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
-                       const std::vector<l1t::TauRef>& allTaus,
-                       const caloRecHitCollections& caloRecHits);
-  void fillPatatracks(tensorflow::Tensor& cellGridMatrix,
-                      const std::vector<l1t::TauRef>& allTaus,
-                      const TrackSoAHost& patatracks_tsoa,
-                      const ZVertexSoAHost& patavtx_soa,
-                      const reco::BeamSpot& beamspot,
-                      const MagneticField* magfi);
-  void selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa,
-                                   const TrackSoAHost& patatracks_tsoa,
-                                   std::vector<int>& trkGood,
-                                   std::vector<int>& vtxGood);
-
-  std::pair<float, float> impactParameter(int it,
-                                          const TrackSoAHost& patatracks_tsoa,
-                                          float patatrackPhi,
-                                          const reco::BeamSpot& beamspot,
-                                          const MagneticField* magfi);
-  template <typename VPos, typename LVec>
-  std::tuple<float, float, int, int> getEtaPhiIndices(const VPos& position, const LVec& tau_p4);
-  template <typename LVec>
-  std::tuple<float, float, int, int> getEtaPhiIndices(float eta, float phi, const LVec& tau_p4);
-
-private:
-  const int debugLevel_;
-  const edm::EDGetTokenT<trigger::TriggerFilterObjectWithRefs> tauTriggerToken_;
-  std::vector<InputDescTau> L1TauDesc_;
-  const edm::EDGetTokenT<HBHERecHitCollection> hbheToken_;
-  const edm::EDGetTokenT<HORecHitCollection> hoToken_;
-  const edm::EDGetTokenT<EcalRecHitCollection> ebToken_;
-  const edm::EDGetTokenT<EcalRecHitCollection> eeToken_;
-  const edm::ESGetToken<CaloGeometry, CaloGeometryRecord> geometryToken_;
-  const edm::ESGetToken<MagneticField, IdealMagneticFieldRecord> bFieldToken_;
-  const edm::EDGetTokenT<ZVertexSoAHost> pataVerticesToken_;
-  const edm::EDGetTokenT<TrackSoAHost> pataTracksToken_;
-  const edm::EDGetTokenT<reco::BeamSpot> beamSpotToken_;
-  const unsigned int maxVtx_;
-  const float fractionSumPt2_;
-  const float minSumPt2_;
-  const float trackPtMin_;
-  const float trackPtMax_;
-  const float trackChi2Max_;
-  std::string inputTensorName_;
-  std::string outputTensorName_;
-  const L2TauNNProducerCacheData* L2cacheData_;
-};
-
-std::unique_ptr<L2TauNNProducerCacheData> L2TauNNProducer::initializeGlobalCache(const edm::ParameterSet& cfg) {
-  std::unique_ptr<L2TauNNProducerCacheData> cacheData = std::make_unique<L2TauNNProducerCacheData>();
-  cacheData->normVec.reserve(L2TauTagNNv1::nVars);
-
-  auto const graphPath = edm::FileInPath(cfg.getParameter<std::string>("graphPath")).fullPath();
-
-  cacheData->graphDef = tensorflow::loadGraphDef(graphPath);
-  cacheData->session = tensorflow::createSession(cacheData->graphDef);
-
-  boost::property_tree::ptree loadPtreeRoot;
-  auto const normalizationDict = edm::FileInPath(cfg.getParameter<std::string>("normalizationDict")).fullPath();
-  boost::property_tree::read_json(normalizationDict, loadPtreeRoot);
-  for (const auto& [key, val] : L2TauTagNNv1::varNameMap) {
-    boost::property_tree::ptree var = loadPtreeRoot.get_child(val);
-    normDictElement current_element;
-    current_element.mean = var.get_child("mean").get_value<float>();
-    current_element.std = var.get_child("std").get_value<float>();
-    current_element.min = var.get_child("min").get_value<float>();
-    current_element.max = var.get_child("max").get_value<float>();
-    cacheData->normVec.push_back(current_element);
-  }
-  return cacheData;
-}
-void L2TauNNProducer::globalEndJob(L2TauNNProducerCacheData* cacheData) {
-  if (cacheData->graphDef != nullptr) {
-    delete cacheData->graphDef;
-  }
-  tensorflow::closeSession(cacheData->session);
-}
-void L2TauNNProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<int>("debugLevel", 0)->setComment("set debug level for printing out info");
-  edm::ParameterSetDescription l1TausPset;
-  l1TausPset.add<std::string>("L1CollectionName", "DoubleTau")->setComment("Name of collections");
-  l1TausPset.add<edm::InputTag>("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR"))
-      ->setComment("Which trigger should the L1 Taus collection pass");
-  edm::ParameterSet l1TausPSetDefault;
-  l1TausPSetDefault.addParameter<std::string>("L1CollectionName", "DoubleTau");
-  l1TausPSetDefault.addParameter<edm::InputTag>("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR"));
-  desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault});
-  desc.add<edm::InputTag>("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection");
-  desc.add<edm::InputTag>("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection");
-  desc.add<edm::InputTag>("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection");
-  desc.add<edm::InputTag>("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection");
-  desc.add<edm::InputTag>("pataVertices", edm::InputTag("hltPixelVerticesSoA"))
-      ->setComment("patatrack vertices collection");
-  desc.add<edm::InputTag>("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection");
-  desc.add<edm::InputTag>("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection");
-  desc.add<uint>("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)");
-  desc.add<double>("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex");
-  desc.add<double>("minSumPt2", 0.)->setComment("min sumPt2");
-  desc.add<double>("track_pt_min", 1.0)->setComment("min track p_T");
-  desc.add<double>("track_pt_max", 10.0)->setComment("max track p_T");
-  desc.add<double>("track_chi2_max", 99999.)->setComment("max track chi2");
-  desc.add<std::string>("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb")
-      ->setComment("path to the saved CNN");
-  desc.add<std::string>("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json")
-      ->setComment("path to the dictionary for variable standardization");
-  descriptions.addWithDefaultLabel(desc);
-}
-
-L2TauNNProducer::L2TauNNProducer(const edm::ParameterSet& cfg, const L2TauNNProducerCacheData* cacheData)
-    : debugLevel_(cfg.getParameter<int>("debugLevel")),
-      hbheToken_(consumes<HBHERecHitCollection>(cfg.getParameter<edm::InputTag>("hbheInput"))),
-      hoToken_(consumes<HORecHitCollection>(cfg.getParameter<edm::InputTag>("hoInput"))),
-      ebToken_(consumes<EcalRecHitCollection>(cfg.getParameter<edm::InputTag>("ebInput"))),
-      eeToken_(consumes<EcalRecHitCollection>(cfg.getParameter<edm::InputTag>("eeInput"))),
-      geometryToken_(esConsumes<CaloGeometry, CaloGeometryRecord>()),
-      bFieldToken_(esConsumes<MagneticField, IdealMagneticFieldRecord>()),
-      pataVerticesToken_(consumes(cfg.getParameter<edm::InputTag>("pataVertices"))),
-      pataTracksToken_(consumes(cfg.getParameter<edm::InputTag>("pataTracks"))),
-      beamSpotToken_(consumes<reco::BeamSpot>(cfg.getParameter<edm::InputTag>("BeamSpot"))),
-      maxVtx_(cfg.getParameter<uint>("maxVtx")),
-      fractionSumPt2_(cfg.getParameter<double>("fractionSumPt2")),
-      minSumPt2_(cfg.getParameter<double>("minSumPt2")),
-      trackPtMin_(cfg.getParameter<double>("track_pt_min")),
-      trackPtMax_(cfg.getParameter<double>("track_pt_max")),
-      trackChi2Max_(cfg.getParameter<double>("track_chi2_max")) {
-  if (cacheData->graphDef == nullptr) {
-    throw cms::Exception("InvalidCacheData") << "Invalid Cache Data.";
-  }
-  inputTensorName_ = cacheData->graphDef->node(0).name();
-  outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name();
-  L2cacheData_ = cacheData;
-  std::vector<edm::ParameterSet> L1TauCollections = cfg.getParameter<std::vector<edm::ParameterSet>>("L1Taus");
-  L1TauDesc_.reserve(L1TauCollections.size());
-  for (const auto& l1TauInput : L1TauCollections) {
-    InputDescTau toInsert;
-    toInsert.CollectionName = l1TauInput.getParameter<std::string>("L1CollectionName");
-    toInsert.inputToken_ =
-        consumes<trigger::TriggerFilterObjectWithRefs>(l1TauInput.getParameter<edm::InputTag>("L1TauTrigger"));
-    L1TauDesc_.push_back(toInsert);
-  }
-  for (const auto& desc : L1TauDesc_)
-    produces<std::vector<float>>(desc.CollectionName);
-}
-
-void L2TauNNProducer::checknan(tensorflow::Tensor& tensor, int debugLevel) {
-  using NNInputs = L2TauTagNNv1::NNInputs;
-  std::vector<int> tensor_shape(tensor.shape().dims());
-  for (int d = 0; d < tensor.shape().dims(); d++) {
-    tensor_shape.at(d) = tensor.shape().dim_size(d);
-  }
-  if (tensor_shape.size() != 4) {
-    throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!";
-  }
-  for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) {
-    for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) {
-      for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) {
-        for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) {
-          auto getCell = [&](NNInputs input) -> float& {
-            return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input);
-          };
-          auto nonstd_var = getCell(static_cast<NNInputs>(var_idx));
-          if (edm::isNotFinite(nonstd_var)) {
-            edm::LogWarning("InputVar") << "var is nan \nvar name= "
-                                        << L2TauTagNNv1::varNameMap.at(static_cast<L2TauTagNNv1::NNInputs>(var_idx))
-                                        << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx
-                                        << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx;
-            if (debugLevel > 2) {
-              edm::LogWarning("InputVar") << "other vars in same cell \n";
-              if (var_idx + 1 < tensor_shape.at(3))
-                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 1))
-                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 1));
-              if (var_idx + 2 < tensor_shape.at(3))
-                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 2))
-                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 2));
-              if (var_idx + 3 < tensor_shape.at(3))
-                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 3))
-                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 3));
-              if (var_idx + 4 < tensor_shape.at(3))
-                edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 4))
-                                            << "\t = " << getCell(static_cast<NNInputs>(var_idx + 4));
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-void L2TauNNProducer::standardizeTensor(tensorflow::Tensor& tensor) {
-  using NNInputs = L2TauTagNNv1::NNInputs;
-  std::vector<int> tensor_shape(tensor.shape().dims());
-  for (int d = 0; d < tensor.shape().dims(); d++) {
-    tensor_shape.at(d) = tensor.shape().dim_size(d);
-  }
-  if (tensor_shape.size() != 4) {
-    throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!";
-  }
-  for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) {
-    for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) {
-      for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) {
-        for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) {
-          auto getCell = [&](NNInputs input) -> float& {
-            return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input);
-          };
-          float mean = L2cacheData_->normVec.at(var_idx).mean;
-          float std = L2cacheData_->normVec.at(var_idx).std;
-          float min = L2cacheData_->normVec.at(var_idx).min;
-          float max = L2cacheData_->normVec.at(var_idx).max;
-          float nonstd_var = getCell(static_cast<NNInputs>(var_idx));
-          float std_var = static_cast<float>((nonstd_var - mean) / std);
-          if (std_var > max) {
-            std_var = static_cast<float>(max);
-          } else if (std_var < min) {
-            std_var = static_cast<float>(min);
-          }
-          getCell(static_cast<NNInputs>(var_idx)) = std_var;
-        }
-      }
-    }
-  }
-}
-
-void L2TauNNProducer::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector<l1t::TauRef>& allTaus) {
-  using NNInputs = L2TauTagNNv1::NNInputs;
-
-  const int nTaus = allTaus.size();
-  for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) {
-    for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
-      for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
-        auto getCell = [&](NNInputs input) -> float& {
-          return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
-        };
-        getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt();
-        getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta();
-        getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso();
-      }
-    }
-  }
-}
-
-template <typename LVec>
-std::tuple<float, float, int, int> L2TauNNProducer::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) {
-  const float deta = eta - tau_p4.eta();
-  const float dphi = reco::deltaPhi(phi, tau_p4.phi());
-  const int eta_idx = static_cast<int>(floor((deta + L2TauTagNNv1::dR_max) / dEta_width));
-  const int phi_idx = static_cast<int>(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width));
-  return std::make_tuple(deta, dphi, eta_idx, phi_idx);
-}
-
-template <typename VPos, typename LVec>
-std::tuple<float, float, int, int> L2TauNNProducer::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) {
-  return getEtaPhiIndices(position.eta(), position.phi(), tau_p4);
-}
-
-void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
-                                      const std::vector<l1t::TauRef>& allTaus,
-                                      const caloRecHitCollections& caloRecHits) {
-  using NNInputs = L2TauTagNNv1::NNInputs;
-
-  const int nTaus = allTaus.size();
-  float deta, dphi;
-  int eta_idx = 0;
-  int phi_idx = 0;
-  int tau_idx = 0;
-
-  auto getCell = [&](NNInputs input) -> float& {
-    return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
-  };
-  for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
-    // calorechit_EE
-    for (const auto& caloRecHit_ee : *caloRecHits.ee) {
-      if (caloRecHit_ee.energy() <= 0)
-        continue;
-      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition();
-      const float eeCalEn = caloRecHit_ee.energy();
-      const float eeCalChi2 = caloRecHit_ee.chi2();
-      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
-        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
-        getCell(NNInputs::EcalEnergySum) += eeCalEn;
-        getCell(NNInputs::EcalSize) += 1.;
-        getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn;
-        getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn;
-        getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn;
-        if (eeCalChi2 >= 0) {
-          getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn;
-          getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn;
-          getCell(NNInputs::EcalSizeForPositiveChi2) += 1.;
-        }
-      }
-    }
-
-    // calorechit_EB
-    for (const auto& caloRecHit_eb : *caloRecHits.eb) {
-      if (caloRecHit_eb.energy() <= 0)
-        continue;
-      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition();
-      const float ebCalEn = caloRecHit_eb.energy();
-      const float ebCalChi2 = caloRecHit_eb.chi2();
-      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
-        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
-        getCell(NNInputs::EcalEnergySum) += ebCalEn;
-        getCell(NNInputs::EcalSize) += 1.;
-        getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn;
-        getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn;
-        getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn;
-        if (ebCalChi2 >= 0) {
-          getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn;
-          getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn;
-          getCell(NNInputs::EcalSizeForPositiveChi2) += 1.;
-        }
-      }
-    }
-
-    // calorechit_HBHE
-    for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) {
-      if (caloRecHit_hbhe.energy() <= 0)
-        continue;
-      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition();
-      const float hbheCalEn = caloRecHit_hbhe.energy();
-      const float hbheCalChi2 = caloRecHit_hbhe.chi2();
-      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
-        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
-        getCell(NNInputs::HcalEnergySum) += hbheCalEn;
-        getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn;
-        getCell(NNInputs::HcalSize) += 1.;
-        getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn;
-        getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn;
-        if (hbheCalChi2 >= 0) {
-          getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn;
-          getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn;
-          getCell(NNInputs::HcalSizeForPositiveChi2) += 1.;
-        }
-      }
-    }
-
-    // calorechit_HO
-    for (const auto& caloRecHit_ho : *caloRecHits.ho) {
-      if (caloRecHit_ho.energy() <= 0)
-        continue;
-      const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition();
-      const float hoCalEn = caloRecHit_ho.energy();
-      if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) {
-        std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4());
-        getCell(NNInputs::HcalEnergySum) += hoCalEn;
-        getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn;
-        getCell(NNInputs::HcalSize) += 1.;
-        getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn;
-        getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn;
-      }
-    }
-
-    // normalize to sum and define stdDev
-    for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
-      for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
-        /* normalize eCal vars*/
-        if (getCell(NNInputs::EcalEnergySum) > 0.) {
-          getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum);
-          getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum);
-        }
-        if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) {
-          getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2);
-        }
-        if (getCell(NNInputs::EcalSize) > 1.) {
-          // (stdDev - (enSum*enSum)/size) / (size-1)
-          getCell(NNInputs::EcalEnergyStdDev) =
-              (getCell(NNInputs::EcalEnergyStdDev) -
-               (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) /
-              (getCell(NNInputs::EcalSize) - 1);
-        } else {
-          getCell(NNInputs::EcalEnergyStdDev) = 0.;
-        }
-        /* normalize hCal Vars */
-        if (getCell(NNInputs::HcalEnergySum) > 0.) {
-          getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum);
-          getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum);
-        }
-        if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) {
-          getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2);
-        }
-        if (getCell(NNInputs::HcalSize) > 1.) {
-          // (stdDev - (enSum*enSum)/size) / (size-1)
-          getCell(NNInputs::HcalEnergyStdDev) =
-              (getCell(NNInputs::HcalEnergyStdDev) -
-               (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) /
-              (getCell(NNInputs::HcalSize) - 1);
-        } else {
-          getCell(NNInputs::HcalEnergyStdDev) = 0.;
-        }
-      }
-    }
-  }
-}
-
-void L2TauNNProducer::selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa,
-                                                  const TrackSoAHost& patatracks_tsoa,
-                                                  std::vector<int>& trkGood,
-                                                  std::vector<int>& vtxGood) {
-  using patatrackHelpers = TracksUtilities<pixelTopology::Phase1>;
-  const auto maxTracks = patatracks_tsoa.view().metadata().size();
-  const int nv = patavtx_soa.view().nvFinal();
-  trkGood.clear();
-  trkGood.reserve(maxTracks);
-  vtxGood.clear();
-  vtxGood.reserve(nv);
-  auto const quality = patatracks_tsoa.view().quality();
-
-  // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum).
-  std::vector<float> pTSquaredSum(nv, 0);
-  std::vector<int> nTrkAssociated(nv, 0);
-
-  for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) {
-    auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx);
-    if (nHits == 0) {
-      break;
-    }
-    int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv();
-    if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) {
-      auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt();
-      ++nTrkAssociated[vtx_ass_to_track];
-      if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) {
-        patatrackPt = std::min(patatrackPt, trackPtMax_);
-        pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt;
-      }
-    }
-    if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) {
-      trkGood.push_back(trk_idx);
-    }
-  }
-  if (nv > 0) {
-    const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_;
-    for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) {
-      auto vtx_idx = patavtx_soa.view()[j].sortInd();
-      assert(vtx_idx < nv);
-      if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
-          pTSquaredSum[vtx_idx] > minSumPt2_) {
-        vtxGood.push_back(vtx_idx);
-      }
-    }
-  }
-}
-
-std::pair<float, float> L2TauNNProducer::impactParameter(int it,
-                                                         const TrackSoAHost& patatracks_tsoa,
-                                                         float patatrackPhi,
-                                                         const reco::BeamSpot& beamspot,
-                                                         const MagneticField* magfi) {
-  /* dxy and dz */
-  riemannFit::Vector5d ipar, opar;
-  riemannFit::Matrix5d icov, ocov;
-  TracksUtilities<pixelTopology::Phase1>::copyToDense(patatracks_tsoa.view(), ipar, icov, it);
-  riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov);
-  LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.);
-  float sp = std::sin(patatrackPhi);
-  float cp = std::cos(patatrackPhi);
-  Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0);
-  GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0());
-  Plane impPointPlane(BeamSpotPoint, Rotation);
-  GlobalTrajectoryParameters gp(
-      impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi);
-  GlobalPoint vv = gp.position();
-  math::XYZPoint pos(vv.x(), vv.y(), vv.z());
-  GlobalVector pp = gp.momentum();
-  math::XYZVector mom(pp.x(), pp.y(), pp.z());
-  auto lambda = M_PI_2 - pp.theta();
-  auto phi = pp.phi();
-  float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi);
-  float patatrackDz =
-      (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) /
-      std::cos(lambda);
-  return std::make_pair(patatrackDxy, patatrackDz);
-}
-
-void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
-                                     const std::vector<l1t::TauRef>& allTaus,
-                                     const TrackSoAHost& patatracks_tsoa,
-                                     const ZVertexSoAHost& patavtx_soa,
-                                     const reco::BeamSpot& beamspot,
-                                     const MagneticField* magfi) {
-  using NNInputs = L2TauTagNNv1::NNInputs;
-  using patatrackHelpers = TracksUtilities<pixelTopology::Phase1>;
-  float deta, dphi;
-  int eta_idx = 0;
-  int phi_idx = 0;
-  int tau_idx = 0;
-
-  auto getCell = [&](NNInputs input) -> float& {
-    return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
-  };
-
-  std::vector<int> trkGood;
-  std::vector<int> vtxGood;
-
-  selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood);
-
-  const int nTaus = allTaus.size();
-  for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
-    const float tauEta = allTaus[tau_idx]->eta();
-    const float tauPhi = allTaus[tau_idx]->phi();
-
-    for (const auto it : trkGood) {
-      const float patatrackPt = patatracks_tsoa.const_view()[it].pt();
-      if (patatrackPt <= 0)
-        continue;
-      const float patatrackPhi = patatrackHelpers::phi(patatracks_tsoa.const_view(), it);
-      const float patatrackEta = patatracks_tsoa.const_view()[it].eta();
-      const float patatrackCharge = patatrackHelpers::charge(patatracks_tsoa.const_view(), it);
-      const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2();
-      const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it);
-      if (nHits <= 0)
-        continue;
-      const int patatrackNdof = 2 * std::min(6, nHits) - 5;
-
-      const int vtx_idx_assTrk = patavtx_soa.view()[it].idv();
-      if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) {
-        std::tie(deta, dphi, eta_idx, phi_idx) =
-            getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4());
-        getCell(NNInputs::PatatrackPtSum) += patatrackPt;
-        getCell(NNInputs::PatatrackSize) += 1.;
-        getCell(NNInputs::PatatrackChargeSum) += patatrackCharge;
-        getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt;
-        getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt;
-        getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt;
-        getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt;
-        std::pair<float, float> impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi);
-        getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt;
-        getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt;
-        if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) {
-          getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt;
-          getCell(NNInputs::PatatrackSizeWithVertex) += 1.;
-        }
-      }
-    }
-
-    // normalize to sum and define stdDev
-    for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
-      for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
-        getCell(NNInputs::nVertices) = vtxGood.size();
-        if (getCell(NNInputs::PatatrackPtSum) > 0.) {
-          getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum);
-          getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum);
-          getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum);
-          getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum);
-          getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum);
-          getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum);
-        }
-      }
-    }
-  }
-}
-
-std::vector<float> L2TauNNProducer::getTauScore(const tensorflow::Tensor& cellGridMatrix) {
-  const int nTau = cellGridMatrix.shape().dim_size(0);
-  std::vector<float> pred_vector(nTau);
-  if (nTau > 0) {
-    // Only run the inference if there are taus to process
-    std::vector<tensorflow::Tensor> pred_tensor;
-    tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor);
-    for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) {
-      pred_vector[tau_idx] = pred_tensor[0].matrix<float>()(tau_idx, 0);
-    }
-  }
-  return pred_vector;
-}
-
-void L2TauNNProducer::produce(edm::Event& event, const edm::EventSetup& eventsetup) {
-  std::vector<std::vector<size_t>> TauCollectionMap(L1TauDesc_.size());
-  l1t::TauVectorRef allTaus;
-
-  for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) {
-    l1t::TauVectorRef l1Taus;
-    auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_);
-    l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus);
-    TauCollectionMap.at(inp_idx).resize(l1Taus.size());
-
-    for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) {
-      size_t tau_idx;
-      const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]);
-      if (iter != allTaus.end()) {
-        tau_idx = std::distance(allTaus.begin(), iter);
-      } else {
-        allTaus.push_back(l1Taus[l1_idx]);
-        tau_idx = allTaus.size() - 1;
-      }
-      TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx;
-    }
-  }
-  const auto ebCal = event.getHandle(ebToken_);
-  const auto eeCal = event.getHandle(eeToken_);
-  const auto hbhe = event.getHandle(hbheToken_);
-  const auto ho = event.getHandle(hoToken_);
-  auto const& patatracks_SoA = event.get(pataTracksToken_);
-  auto const& vertices_SoA = event.get(pataVerticesToken_);
-  const auto bsHandle = event.getHandle(beamSpotToken_);
-
-  auto const fieldESH = eventsetup.getHandle(bFieldToken_);
-  auto const geometry = eventsetup.getHandle(geometryToken_);
-
-  caloRecHitCollections caloRecHits;
-  caloRecHits.hbhe = &*hbhe;
-  caloRecHits.ho = &*ho;
-  caloRecHits.eb = &*ebCal;
-  caloRecHits.ee = &*eeCal;
-  caloRecHits.geometry = &*geometry;
-
-  const int nTaus = allTaus.size();
-  tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT,
-                                    {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars});
-  const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars;
-  for (int input_idx = 0; input_idx < n_inputs; ++input_idx) {
-    cellGridMatrix.flat<float>()(input_idx) = 0;
-  }
-  fillL1TauVars(cellGridMatrix, allTaus);
-
-  fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits);
-
-  fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product());
-
-  standardizeTensor(cellGridMatrix);
-
-  if (debugLevel_ > 0) {
-    checknan(cellGridMatrix, debugLevel_);
-  }
-
-  std::vector<float> tau_score = getTauScore(cellGridMatrix);
-
-  for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) {
-    const size_t nTau = TauCollectionMap[inp_idx].size();
-    auto tau_tags = std::make_unique<std::vector<float>>(nTau);
-    for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) {
-      const auto tau_idx = TauCollectionMap[inp_idx][tau_pos];
-      if (debugLevel_ > 0) {
-        edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t "
-                                  << tau_score.at(tau_idx) << std::endl;
-      }
-      (*tau_tags)[tau_pos] = tau_score.at(tau_idx);
-    }
-    event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName);
-  }
-}
-//define this as a plug-in
-#include "FWCore/Framework/interface/MakerMacros.h"
-DEFINE_FWK_MODULE(L2TauNNProducer);
diff --git a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml
index 6c8c102293651..eae412ec800cc 100644
--- a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml
+++ b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml
@@ -1,10 +1,7 @@
 <library file="*.cc" name="RecoPixelVertexingPixelTrackFittingPlugins">
-  <use name="cuda"/>
-  <use name="CUDADataFormats/Track"/>
   <use name="DataFormats/TrackSoA"/>
   <use name="DataFormats/TrackingRecHit"/>
   <use name="Geometry/Records"/>
   <use name="RecoTracker/PixelTrackFitting"/>
-  <use name="HeterogeneousCore/CUDACore"/>
   <flags EDM_PLUGIN="1"/>
 </library>
diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc
deleted file mode 100644
index 6bff9a7c42292..0000000000000
--- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <cuda_runtime.h>
-#include <Eigen/Core>  // needed here by soa layout
-
-#include "CUDADataFormats/Common/interface/Product.h"
-#include "DataFormats/Common/interface/Handle.h"
-#include "FWCore/Framework/interface/ConsumesCollector.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/global/EDAnalyzer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/Utilities/interface/RunningAverage.h"
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h"
-
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-
-template <typename TrackerTraits>
-class PixelTrackDumpCUDAT : public edm::global::EDAnalyzer<> {
-public:
-  using TrackSoAHost = TrackSoAHeterogeneousHost<TrackerTraits>;
-  using TrackSoADevice = TrackSoAHeterogeneousDevice<TrackerTraits>;
-
-  using VertexSoAHost = ZVertexSoAHost;
-  using VertexSoADevice = ZVertexSoADevice;
-
-  explicit PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig);
-  ~PixelTrackDumpCUDAT() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
-  void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override;
-  const bool m_onGPU;
-  edm::EDGetTokenT<cms::cuda::Product<TrackSoADevice>> tokenGPUTrack_;
-  edm::EDGetTokenT<cms::cuda::Product<VertexSoADevice>> tokenGPUVertex_;
-  edm::EDGetTokenT<TrackSoAHost> tokenSoATrack_;
-  edm::EDGetTokenT<VertexSoAHost> tokenSoAVertex_;
-};
-
-template <typename TrackerTraits>
-PixelTrackDumpCUDAT<TrackerTraits>::PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig)
-    : m_onGPU(iConfig.getParameter<bool>("onGPU")) {
-  if (m_onGPU) {
-    tokenGPUTrack_ = consumes(iConfig.getParameter<edm::InputTag>("pixelTrackSrc"));
-    tokenGPUVertex_ = consumes(iConfig.getParameter<edm::InputTag>("pixelVertexSrc"));
-  } else {
-    tokenSoATrack_ = consumes(iConfig.getParameter<edm::InputTag>("pixelTrackSrc"));
-    tokenSoAVertex_ = consumes(iConfig.getParameter<edm::InputTag>("pixelVertexSrc"));
-  }
-}
-
-template <typename TrackerTraits>
-void PixelTrackDumpCUDAT<TrackerTraits>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-
-  desc.add<bool>("onGPU", true);
-  desc.add<edm::InputTag>("pixelTrackSrc", edm::InputTag("pixelTracksCUDA"));
-  desc.add<edm::InputTag>("pixelVertexSrc", edm::InputTag("pixelVerticesCUDA"));
-  descriptions.addWithDefaultLabel(desc);
-}
-
-template <typename TrackerTraits>
-void PixelTrackDumpCUDAT<TrackerTraits>::analyze(edm::StreamID streamID,
-                                                 edm::Event const& iEvent,
-                                                 const edm::EventSetup& iSetup) const {
-  if (m_onGPU) {
-    auto const& hTracks = iEvent.get(tokenGPUTrack_);
-    cms::cuda::ScopedContextProduce ctx{hTracks};
-
-    auto const& tracks = ctx.get(hTracks);
-    auto const* tsoa = &tracks;
-    assert(tsoa->buffer());
-
-    auto const& vertices = ctx.get(iEvent.get(tokenGPUVertex_));
-    auto const* vsoa = &vertices;
-    assert(vsoa->buffer());
-
-  } else {
-    auto const& tsoa = iEvent.get(tokenSoATrack_);
-    assert(tsoa.buffer());
-
-    auto const& vsoa = iEvent.get(tokenSoAVertex_);
-    assert(vsoa.buffer());
-  }
-}
-
-using PixelTrackDumpCUDAPhase1 = PixelTrackDumpCUDAT<pixelTopology::Phase1>;
-DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase1);
-
-using PixelTrackDumpCUDAPhase2 = PixelTrackDumpCUDAT<pixelTopology::Phase2>;
-DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase2);
-
-using PixelTrackDumpCUDAHIonPhase1 = PixelTrackDumpCUDAT<pixelTopology::HIonPhase1>;
-DEFINE_FWK_MODULE(PixelTrackDumpCUDAHIonPhase1);
diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc
deleted file mode 100644
index a7dd1b3ddf209..0000000000000
--- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc
+++ /dev/null
@@ -1,270 +0,0 @@
-#include <algorithm>
-#include <cmath>
-//#include <iostream>
-#include <memory>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "CUDADataFormats/Common/interface/HostProduct.h"
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "DataFormats/BeamSpot/interface/BeamSpot.h"
-#include "DataFormats/Common/interface/OrphanHandle.h"
-#include "DataFormats/GeometrySurface/interface/Plane.h"
-#include "DataFormats/TrackReco/interface/Track.h"
-#include "DataFormats/TrackReco/interface/TrackExtra.h"
-#include "DataFormats/TrackReco/interface/TrackFwd.h"
-#include "DataFormats/TrackerCommon/interface/TrackerTopology.h"
-#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h"
-#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h"
-#include "FWCore/Framework/interface/ConsumesCollector.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/global/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "Geometry/Records/interface/TrackerTopologyRcd.h"
-#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h"
-#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h"
-#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h"
-#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h"
-#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h"
-
-#include "storeTracks.h"
-
-/**
- * This class creates "legacy" reco::Track
- * objects from the output of SoA CA.
- */
-template <typename TrackerTraits>
-class PixelTrackProducerFromSoAT : public edm::global::EDProducer<> {
-  using TrackSoAHost = TrackSoAHeterogeneousHost<TrackerTraits>;
-  using TracksHelpers = TracksUtilities<TrackerTraits>;
-  using HMSstorage = HostProduct<uint32_t[]>;
-  using IndToEdm = std::vector<uint32_t>;
-
-public:
-  explicit PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig);
-  ~PixelTrackProducerFromSoAT() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
-
-private:
-  void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override;
-
-  // Event Data tokens
-  const edm::EDGetTokenT<reco::BeamSpot> tBeamSpot_;
-  const edm::EDGetTokenT<TrackSoAHost> tokenTrack_;
-  const edm::EDGetTokenT<SiPixelRecHitCollectionNew> cpuHits_;
-  const edm::EDGetTokenT<HMSstorage> hmsToken_;
-  // Event Setup tokens
-  const edm::ESGetToken<MagneticField, IdealMagneticFieldRecord> idealMagneticFieldToken_;
-  const edm::ESGetToken<TrackerTopology, TrackerTopologyRcd> ttTopoToken_;
-
-  int32_t const minNumberOfHits_;
-  pixelTrack::Quality const minQuality_;
-};
-
-template <typename TrackerTraits>
-PixelTrackProducerFromSoAT<TrackerTraits>::PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig)
-    : tBeamSpot_(consumes<reco::BeamSpot>(iConfig.getParameter<edm::InputTag>("beamSpot"))),
-      tokenTrack_(consumes(iConfig.getParameter<edm::InputTag>("trackSrc"))),
-      cpuHits_(consumes<SiPixelRecHitCollectionNew>(iConfig.getParameter<edm::InputTag>("pixelRecHitLegacySrc"))),
-      hmsToken_(consumes<HMSstorage>(iConfig.getParameter<edm::InputTag>("pixelRecHitLegacySrc"))),
-      idealMagneticFieldToken_(esConsumes()),
-      ttTopoToken_(esConsumes()),
-      minNumberOfHits_(iConfig.getParameter<int>("minNumberOfHits")),
-      minQuality_(pixelTrack::qualityByName(iConfig.getParameter<std::string>("minQuality"))) {
-  if (minQuality_ == pixelTrack::Quality::notQuality) {
-    throw cms::Exception("PixelTrackConfiguration")
-        << iConfig.getParameter<std::string>("minQuality") + " is not a pixelTrack::Quality";
-  }
-  if (minQuality_ < pixelTrack::Quality::dup) {
-    throw cms::Exception("PixelTrackConfiguration")
-        << iConfig.getParameter<std::string>("minQuality") + " not supported";
-  }
-  produces<TrackingRecHitCollection>();
-  produces<reco::TrackExtraCollection>();
-  // TrackCollection refers to TrackingRechit and TrackExtra
-  // collections, need to declare its production after them to work
-  // around a rare race condition in framework scheduling
-  produces<reco::TrackCollection>();
-  produces<IndToEdm>();
-}
-
-template <typename TrackerTraits>
-void PixelTrackProducerFromSoAT<TrackerTraits>::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<edm::InputTag>("beamSpot", edm::InputTag("offlineBeamSpot"));
-  desc.add<edm::InputTag>("trackSrc", edm::InputTag("pixelTracksSoA"));
-  desc.add<edm::InputTag>("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy"));
-  desc.add<int>("minNumberOfHits", 0);
-  desc.add<std::string>("minQuality", "loose");
-  descriptions.addWithDefaultLabel(desc);
-}
-
-template <typename TrackerTraits>
-void PixelTrackProducerFromSoAT<TrackerTraits>::produce(edm::StreamID streamID,
-                                                        edm::Event &iEvent,
-                                                        const edm::EventSetup &iSetup) const {
-  // enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity };
-  reco::TrackBase::TrackQuality recoQuality[] = {reco::TrackBase::undefQuality,
-                                                 reco::TrackBase::undefQuality,
-                                                 reco::TrackBase::discarded,
-                                                 reco::TrackBase::loose,
-                                                 reco::TrackBase::tight,
-                                                 reco::TrackBase::tight,
-                                                 reco::TrackBase::highPurity};
-  assert(reco::TrackBase::highPurity == recoQuality[int(pixelTrack::Quality::highPurity)]);
-
-  // std::cout << "Converting gpu helix in reco tracks" << std::endl;
-
-  auto indToEdmP = std::make_unique<IndToEdm>();
-  auto &indToEdm = *indToEdmP;
-
-  auto const &idealField = iSetup.getData(idealMagneticFieldToken_);
-
-  pixeltrackfitting::TracksWithRecHits tracks;
-
-  auto const &httopo = iSetup.getData(ttTopoToken_);
-
-  const auto &bsh = iEvent.get(tBeamSpot_);
-  GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0());
-
-  auto const &rechits = iEvent.get(cpuHits_);
-  std::vector<TrackingRecHit const *> hitmap;
-  auto const &rcs = rechits.data();
-  auto const nhits = rcs.size();
-
-  hitmap.resize(nhits, nullptr);
-
-  auto const *hitsModuleStart = iEvent.get(hmsToken_).get();
-
-  for (auto const &hit : rcs) {
-    auto const &thit = static_cast<BaseTrackerRecHit const &>(hit);
-    auto const detI = thit.det()->index();
-    auto const &clus = thit.firstClusterRef();
-    assert(clus.isPixel());
-    auto const idx = hitsModuleStart[detI] + clus.pixelCluster().originalId();
-    if (idx >= hitmap.size())
-      hitmap.resize(idx + 256, nullptr);  // only in case of hit overflow in one module
-
-    assert(nullptr == hitmap[idx]);
-    hitmap[idx] = &hit;
-  }
-
-  std::vector<const TrackingRecHit *> hits;
-  hits.reserve(5);
-
-  auto const &tsoa = iEvent.get(tokenTrack_);
-  auto const quality = tsoa.view().quality();
-  auto const &hitIndices = tsoa.view().hitIndices();
-  auto nTracks = tsoa.view().nTracks();
-
-  tracks.reserve(nTracks);
-
-  int32_t nt = 0;
-
-  // sort index by pt
-  std::vector<int32_t> sortIdxs(nTracks);
-  std::iota(sortIdxs.begin(), sortIdxs.end(), 0);
-  // sort good-quality tracks by pt, keep bad-quality tracks in the bottom
-  std::sort(sortIdxs.begin(), sortIdxs.end(), [&](int32_t const i1, int32_t const i2) {
-    if (quality[i1] >= minQuality_ && quality[i2] >= minQuality_)
-      return tsoa.view()[i1].pt() > tsoa.view()[i2].pt();
-    else
-      return quality[i1] > quality[i2];
-  });
-
-  // store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists)
-  indToEdm.resize(sortIdxs.size(), -1);
-  for (const auto &it : sortIdxs) {
-    auto nHits = TracksHelpers::nHits(tsoa.view(), it);
-    assert(nHits >= 3);
-    auto q = quality[it];
-
-    if (q < minQuality_)
-      continue;
-    if (nHits < minNumberOfHits_)  // move to nLayers?
-      continue;
-    indToEdm[it] = nt;
-    ++nt;
-
-    hits.resize(nHits);
-    auto b = hitIndices.begin(it);
-    for (int iHit = 0; iHit < nHits; ++iHit)
-      hits[iHit] = hitmap[*(b + iHit)];
-
-    // mind: this values are respect the beamspot!
-    float chi2 = tsoa.view()[it].chi2();
-    float phi = TracksHelpers::phi(tsoa.view(), it);
-
-    riemannFit::Vector5d ipar, opar;
-    riemannFit::Matrix5d icov, ocov;
-    TracksHelpers::template copyToDense<riemannFit::Vector5d, riemannFit::Matrix5d>(tsoa.view(), ipar, icov, it);
-    riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov);
-
-    LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.);
-    AlgebraicSymMatrix55 m;
-    for (int i = 0; i < 5; ++i)
-      for (int j = i; j < 5; ++j)
-        m(i, j) = ocov(i, j);
-
-    float sp = std::sin(phi);
-    float cp = std::cos(phi);
-    Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0);
-
-    Plane impPointPlane(bs, rot);
-    GlobalTrajectoryParameters gp(
-        impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), &idealField);
-    JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, idealField);
-
-    AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m);
-
-    int ndof = 2 * hits.size() - 5;
-    chi2 = chi2 * ndof;
-    GlobalPoint vv = gp.position();
-    math::XYZPoint pos(vv.x(), vv.y(), vv.z());
-    GlobalVector pp = gp.momentum();
-    math::XYZVector mom(pp.x(), pp.y(), pp.z());
-
-    auto track = std::make_unique<reco::Track>(chi2, ndof, pos, mom, gp.charge(), CurvilinearTrajectoryError(mo));
-
-    // bad and edup not supported as fit not present or not reliable
-    auto tkq = recoQuality[int(q)];
-    track->setQuality(tkq);
-    // loose,tight and HP are inclusive
-    if (reco::TrackBase::highPurity == tkq) {
-      track->setQuality(reco::TrackBase::tight);
-      track->setQuality(reco::TrackBase::loose);
-    } else if (reco::TrackBase::tight == tkq) {
-      track->setQuality(reco::TrackBase::loose);
-    }
-    track->setQuality(tkq);
-    // filter???
-    tracks.emplace_back(track.release(), hits);
-  }
-  // std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl;
-
-  // store tracks
-  storeTracks(iEvent, tracks, httopo);
-  iEvent.put(std::move(indToEdmP));
-}
-
-using PixelTrackProducerFromSoAPhase1 = PixelTrackProducerFromSoAT<pixelTopology::Phase1>;
-DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase1);
-
-using PixelTrackProducerFromSoAPhase2 = PixelTrackProducerFromSoAT<pixelTopology::Phase2>;
-DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase2);
-
-using PixelTrackProducerFromSoAHIonPhase1 = PixelTrackProducerFromSoAT<pixelTopology::HIonPhase1>;
-DEFINE_FWK_MODULE(PixelTrackProducerFromSoAHIonPhase1);
diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc
deleted file mode 100644
index fc2c76ff00155..0000000000000
--- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-#include <cuda_runtime.h>
-#include <Eigen/Core>  // needed here by soa layout
-
-#include "CUDADataFormats/Common/interface/Product.h"
-#include "CUDADataFormats/Common/interface/HostProduct.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "DataFormats/Common/interface/Handle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-
-// Switch on to enable checks and printout for found tracks
-// #define PIXEL_DEBUG_PRODUCE
-
-template <typename TrackerTraits>
-class PixelTrackSoAFromCUDAT : public edm::stream::EDProducer<edm::ExternalWork> {
-  using TrackSoAHost = TrackSoAHeterogeneousHost<TrackerTraits>;
-  using TrackSoADevice = TrackSoAHeterogeneousDevice<TrackerTraits>;
-
-public:
-  explicit PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig);
-  ~PixelTrackSoAFromCUDAT() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
-  void acquire(edm::Event const& iEvent,
-               edm::EventSetup const& iSetup,
-               edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
-  void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override;
-
-  edm::EDGetTokenT<cms::cuda::Product<TrackSoADevice>> tokenCUDA_;
-  edm::EDPutTokenT<TrackSoAHost> tokenSOA_;
-
-  TrackSoAHost tracks_h_;
-};
-
-template <typename TrackerTraits>
-PixelTrackSoAFromCUDAT<TrackerTraits>::PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig)
-    : tokenCUDA_(consumes(iConfig.getParameter<edm::InputTag>("src"))), tokenSOA_(produces<TrackSoAHost>()) {}
-
-template <typename TrackerTraits>
-void PixelTrackSoAFromCUDAT<TrackerTraits>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-
-  desc.add<edm::InputTag>("src", edm::InputTag("pixelTracksCUDA"));
-  descriptions.addWithDefaultLabel(desc);
-}
-
-template <typename TrackerTraits>
-void PixelTrackSoAFromCUDAT<TrackerTraits>::acquire(edm::Event const& iEvent,
-                                                    edm::EventSetup const& iSetup,
-                                                    edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
-  cms::cuda::Product<TrackSoADevice> const& inputDataWrapped = iEvent.get(tokenCUDA_);
-  cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)};
-  auto const& tracks_d = ctx.get(inputDataWrapped);  // Tracks on device
-  tracks_h_ = TrackSoAHost(ctx.stream());            // Create an instance of Tracks on Host, using the stream
-  cudaCheck(cudaMemcpyAsync(tracks_h_.buffer().get(),
-                            tracks_d.const_buffer().get(),
-                            tracks_d.bufferSize(),
-                            cudaMemcpyDeviceToHost,
-                            ctx.stream()));  // Copy data from Device to Host
-}
-
-template <typename TrackerTraits>
-void PixelTrackSoAFromCUDAT<TrackerTraits>::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
-  auto maxTracks = tracks_h_.view().metadata().size();
-  auto nTracks = tracks_h_.view().nTracks();
-
-  assert(nTracks < maxTracks);
-  if (nTracks == maxTracks - 1) {
-    edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1
-                                   << " candidates";
-  }
-
-#ifdef PIXEL_DEBUG_PRODUCE
-  std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl;
-  std::cout << "found " << nTracks << " tracks in cpu SoA at " << &tsoa << std::endl;
-
-  int32_t nt = 0;
-  for (int32_t it = 0; it < maxTracks; ++it) {
-    auto nHits = TracksUtilities<TrackerTraits>::nHits(tracks_h_.view(), it);
-    assert(nHits == int(tracks_h_.view().hitIndices().size(it)));
-    if (nHits == 0)
-      break;  // this is a guard: maybe we need to move to nTracks...
-    nt++;
-  }
-  assert(nTracks == nt);
-#endif
-
-  // DO NOT  make a copy  (actually TWO....)
-  iEvent.emplace(tokenSOA_, std::move(tracks_h_));
-  assert(!tracks_h_.buffer());
-}
-
-using PixelTrackSoAFromCUDAPhase1 = PixelTrackSoAFromCUDAT<pixelTopology::Phase1>;
-DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase1);
-
-using PixelTrackSoAFromCUDAPhase2 = PixelTrackSoAFromCUDAT<pixelTopology::Phase2>;
-DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase2);
-
-using PixelTrackSoAFromCUDAHIonPhase1 = PixelTrackSoAFromCUDAT<pixelTopology::HIonPhase1>;
-DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAHIonPhase1);
diff --git a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml
index 5999fef50b346..ada2991b1cdb2 100644
--- a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml
+++ b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml
@@ -1,4 +1,3 @@
-<use name="cuda"/>
 <use name="CommonTools/RecoAlgos"/>
 <use name="CommonTools/Utils"/>
 <use name="DataFormats/BeamSpot"/>
diff --git a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc b/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc
deleted file mode 100644
index 8122ccbac3130..0000000000000
--- a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc
+++ /dev/null
@@ -1,179 +0,0 @@
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "DataFormats/BeamSpot/interface/BeamSpot.h"
-#include "DataFormats/GeometrySurface/interface/Plane.h"
-#include "DataFormats/TrackerCommon/interface/TrackerTopology.h"
-#include "DataFormats/TrackingRecHit/interface/InvalidTrackingRecHit.h"
-#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h"
-#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h"
-#include "FWCore/Framework/interface/ConsumesCollector.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/global/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "Geometry/CommonDetUnit/interface/GeomDet.h"
-#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h"
-#include "Geometry/Records/interface/TrackerTopologyRcd.h"
-#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h"
-#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h"
-#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h"
-#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h"
-#include "TrackingTools/MaterialEffects/interface/PropagatorWithMaterial.h"
-#include "TrackingTools/Records/interface/TrackingComponentsRecord.h"
-#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h"
-#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h"
-#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h"
-
-/*
-  produces seeds directly from cuda produced tuples
-*/
-template <typename TrackerTraits>
-class SeedProducerFromSoAT : public edm::global::EDProducer<> {
-public:
-  explicit SeedProducerFromSoAT(const edm::ParameterSet& iConfig);
-  ~SeedProducerFromSoAT() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
-  void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override;
-
-  // Event data tokens
-  const edm::EDGetTokenT<reco::BeamSpot> tBeamSpot_;
-  const edm::EDGetTokenT<TrackSoAHeterogeneousHost<TrackerTraits>> tokenTrack_;
-  // Event setup tokens
-  const edm::ESGetToken<MagneticField, IdealMagneticFieldRecord> idealMagneticFieldToken_;
-  const edm::ESGetToken<TrackerGeometry, TrackerDigiGeometryRecord> trackerDigiGeometryToken_;
-  const edm::ESGetToken<Propagator, TrackingComponentsRecord> trackerPropagatorToken_;
-  int32_t minNumberOfHits_;
-};
-
-template <typename TrackerTraits>
-SeedProducerFromSoAT<TrackerTraits>::SeedProducerFromSoAT(const edm::ParameterSet& iConfig)
-    : tBeamSpot_(consumes<reco::BeamSpot>(iConfig.getParameter<edm::InputTag>("beamSpot"))),
-      tokenTrack_(consumes(iConfig.getParameter<edm::InputTag>("src"))),
-      idealMagneticFieldToken_(esConsumes()),
-      trackerDigiGeometryToken_(esConsumes()),
-      trackerPropagatorToken_(esConsumes(edm::ESInputTag("PropagatorWithMaterial"))),
-      minNumberOfHits_(iConfig.getParameter<int>("minNumberOfHits"))
-
-{
-  produces<TrajectorySeedCollection>();
-}
-
-template <typename TrackerTraits>
-void SeedProducerFromSoAT<TrackerTraits>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-  desc.add<edm::InputTag>("beamSpot", edm::InputTag("offlineBeamSpot"));
-  desc.add<edm::InputTag>("src", edm::InputTag("pixelTrackSoA"));
-  desc.add<int>("minNumberOfHits", 0);
-
-  descriptions.addWithDefaultLabel(desc);
-}
-
-template <typename TrackerTraits>
-void SeedProducerFromSoAT<TrackerTraits>::produce(edm::StreamID streamID,
-                                                  edm::Event& iEvent,
-                                                  const edm::EventSetup& iSetup) const {
-  // std::cout << "Converting gpu helix to trajectory seed" << std::endl;
-  auto result = std::make_unique<TrajectorySeedCollection>();
-
-  using trackHelper = TracksUtilities<TrackerTraits>;
-
-  auto const& fieldESH = iSetup.getHandle(idealMagneticFieldToken_);
-  auto const& tracker = iSetup.getHandle(trackerDigiGeometryToken_);
-  auto const& dus = tracker->detUnits();
-
-  auto const& propagatorHandle = iSetup.getHandle(trackerPropagatorToken_);
-  const Propagator* propagator = &(*propagatorHandle);
-
-  const auto& bsh = iEvent.get(tBeamSpot_);
-  // std::cout << "beamspot " << bsh.x0() << ' ' << bsh.y0() << ' ' << bsh.z0() << std::endl;
-  GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0());
-
-  auto const& tsoa = iEvent.get(tokenTrack_);
-
-  auto const quality = tsoa.view().quality();
-  auto const& detIndices = tsoa.view().detIndices();
-  auto maxTracks = tsoa.view().metadata().size();
-
-  for (int32_t it = 0; it < maxTracks; ++it) {
-    auto nHits = trackHelper::nHits(tsoa.view(), it);
-    if (nHits == 0)
-      break;  // this is a guard: maybe we need to move to nTracks...
-
-    auto q = quality[it];
-    if (q != pixelTrack::Quality::loose)
-      continue;  // FIXME
-    if (nHits < minNumberOfHits_)
-      continue;
-
-    // fill hits with invalid just to hold the detId
-    auto b = detIndices.begin(it);
-    edm::OwnVector<TrackingRecHit> hits;
-    for (int iHit = 0; iHit < nHits; ++iHit) {
-      auto const* det = dus[*(b + iHit)];
-      // FIXME at some point get a proper type ...
-      hits.push_back(new InvalidTrackingRecHit(*det, TrackingRecHit::bad));
-    }
-
-    // mind: this values are respect the beamspot!
-
-    float phi = trackHelper::nHits(tsoa.view(), it);
-
-    riemannFit::Vector5d ipar, opar;
-    riemannFit::Matrix5d icov, ocov;
-    trackHelper::copyToDense(tsoa.view(), ipar, icov, it);
-    riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov);
-
-    LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.);
-    AlgebraicSymMatrix55 m;
-    for (int i = 0; i < 5; ++i)
-      for (int j = i; j < 5; ++j)
-        m(i, j) = ocov(i, j);
-
-    float sp = std::sin(phi);
-    float cp = std::cos(phi);
-    Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0);
-
-    Plane impPointPlane(bs, rot);
-    GlobalTrajectoryParameters gp(impPointPlane.toGlobal(lpar.position()),
-                                  impPointPlane.toGlobal(lpar.momentum()),
-                                  lpar.charge(),
-                                  fieldESH.product());
-
-    JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, *fieldESH.product());
-
-    AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m);
-
-    FreeTrajectoryState fts(gp, CurvilinearTrajectoryError(mo));
-
-    auto const& lastHit = hits.back();
-
-    TrajectoryStateOnSurface outerState = propagator->propagate(fts, *lastHit.surface());
-
-    if (!outerState.isValid()) {
-      edm::LogError("SeedFromGPU") << " was trying to create a seed from:\n"
-                                   << fts << "\n propagating to: " << lastHit.geographicalId().rawId();
-      continue;
-    }
-
-    auto const& pTraj = trajectoryStateTransform::persistentState(outerState, lastHit.geographicalId().rawId());
-
-    result->emplace_back(pTraj, hits, alongMomentum);
-  }
-
-  iEvent.put(std::move(result));
-}
-
-using SeedProducerFromSoAPhase1 = SeedProducerFromSoAT<pixelTopology::Phase1>;
-DEFINE_FWK_MODULE(SeedProducerFromSoAPhase1);
-
-using SeedProducerFromSoAPhase2 = SeedProducerFromSoAT<pixelTopology::Phase2>;
-DEFINE_FWK_MODULE(SeedProducerFromSoAPhase2);
diff --git a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml
index 85603d97c5f3b..ff50cc1bdbaee 100644
--- a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml
+++ b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml
@@ -20,19 +20,8 @@
 <use name="RecoVertex/PixelVertexFinding"/>
 <use name="SimDataFormats/PileupSummaryInfo"/>
 
-<iftool name="cuda-gcc-support">
-  <use name="cuda"/>
-  <set name="cuda_src" value="*.cu"/>
-<else/>
-  <set name="cuda_src" value=""/>
-</iftool>
-
-<library file="*.cc ${cuda_src}" name="RecoPixelVertexingPixelVertexFindingPlugins">
- <use name="CUDADataFormats/Vertex"/>
- <use name="CUDADataFormats/Track"/>
- <use name="HeterogeneousCore/CUDACore"/>
- <use name="HeterogeneousCore/CUDAUtilities"/>
- <flags EDM_PLUGIN="1"/>
+<library file="*.cc" name="RecoPixelVertexingPixelVertexFindingPlugins">
+  <flags EDM_PLUGIN="1"/>
 </library>
 
 <library file="alpaka/*.cc" name="RecoPixelVertexingPixelVertexFindingPluginsPortable">
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc
deleted file mode 100644
index a1f4101252319..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-#include <cuda_runtime.h>
-
-#include "CUDADataFormats/Common/interface/Product.h"
-#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h"
-#include "DataFormats/Common/interface/Handle.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/global/EDProducer.h"
-#include "FWCore/Framework/interface/ConsumesCollector.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/RunningAverage.h"
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-
-#include "gpuVertexFinder.h"
-
-#undef PIXVERTEX_DEBUG_PRODUCE
-
-template <typename TrackerTraits>
-class PixelVertexProducerCUDAT : public edm::global::EDProducer<> {
-  using TracksSoADevice = TrackSoAHeterogeneousDevice<TrackerTraits>;
-  using TracksSoAHost = TrackSoAHeterogeneousHost<TrackerTraits>;
-  using GPUAlgo = gpuVertexFinder::Producer<TrackerTraits>;
-
-public:
-  explicit PixelVertexProducerCUDAT(const edm::ParameterSet& iConfig);
-  ~PixelVertexProducerCUDAT() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
-  void produceOnGPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const;
-  void produceOnCPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const;
-  void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override;
-
-  bool onGPU_;
-
-  edm::EDGetTokenT<cms::cuda::Product<TracksSoADevice>> tokenGPUTrack_;
-  edm::EDPutTokenT<cms::cuda::Product<ZVertexSoADevice>> tokenGPUVertex_;
-  edm::EDGetTokenT<TracksSoAHost> tokenCPUTrack_;
-  edm::EDPutTokenT<ZVertexSoAHost> tokenCPUVertex_;
-
-  const GPUAlgo gpuAlgo_;
-
-  // Tracking cuts before sending tracks to vertex algo
-  const float ptMin_;
-  const float ptMax_;
-};
-
-template <typename TrackerTraits>
-PixelVertexProducerCUDAT<TrackerTraits>::PixelVertexProducerCUDAT(const edm::ParameterSet& conf)
-    : onGPU_(conf.getParameter<bool>("onGPU")),
-      gpuAlgo_(conf.getParameter<bool>("oneKernel"),
-               conf.getParameter<bool>("useDensity"),
-               conf.getParameter<bool>("useDBSCAN"),
-               conf.getParameter<bool>("useIterative"),
-               conf.getParameter<bool>("doSplitting"),
-               conf.getParameter<int>("minT"),
-               conf.getParameter<double>("eps"),
-               conf.getParameter<double>("errmax"),
-               conf.getParameter<double>("chi2max")),
-      ptMin_(conf.getParameter<double>("PtMin")),  // 0.5 GeV
-      ptMax_(conf.getParameter<double>("PtMax"))   // 75. GeV
-{
-  if (onGPU_) {
-    tokenGPUTrack_ = consumes(conf.getParameter<edm::InputTag>("pixelTrackSrc"));
-    tokenGPUVertex_ = produces();
-  } else {
-    tokenCPUTrack_ = consumes(conf.getParameter<edm::InputTag>("pixelTrackSrc"));
-    tokenCPUVertex_ = produces();
-  }
-}
-
-template <typename TrackerTraits>
-void PixelVertexProducerCUDAT<TrackerTraits>::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-
-  // Only one of these three algos can be used at once.
-  // Maybe this should become a Plugin Factory
-  desc.add<bool>("onGPU", true);
-  desc.add<bool>("oneKernel", true);
-  desc.add<bool>("useDensity", true);
-  desc.add<bool>("useDBSCAN", false);
-  desc.add<bool>("useIterative", false);
-  desc.add<bool>("doSplitting", true);
-
-  desc.add<int>("minT", 2);          // min number of neighbours to be "core"
-  desc.add<double>("eps", 0.07);     // max absolute distance to cluster
-  desc.add<double>("errmax", 0.01);  // max error to be "seed"
-  desc.add<double>("chi2max", 9.);   // max normalized distance to cluster
-
-  desc.add<double>("PtMin", 0.5);
-  desc.add<double>("PtMax", 75.);
-  desc.add<edm::InputTag>("pixelTrackSrc", edm::InputTag("pixelTracksCUDA"));
-
-  descriptions.addWithDefaultLabel(desc);
-}
-
-template <typename TrackerTraits>
-void PixelVertexProducerCUDAT<TrackerTraits>::produceOnGPU(edm::StreamID streamID,
-                                                           edm::Event& iEvent,
-                                                           const edm::EventSetup& iSetup) const {
-  using TracksSoA = TrackSoAHeterogeneousDevice<TrackerTraits>;
-  auto hTracks = iEvent.getHandle(tokenGPUTrack_);
-
-  cms::cuda::ScopedContextProduce ctx{*hTracks};
-  auto& tracks = ctx.get(*hTracks);
-
-  ctx.emplace(iEvent, tokenGPUVertex_, gpuAlgo_.makeAsync(ctx.stream(), tracks.view(), ptMin_, ptMax_));
-}
-
-template <typename TrackerTraits>
-void PixelVertexProducerCUDAT<TrackerTraits>::produceOnCPU(edm::StreamID streamID,
-                                                           edm::Event& iEvent,
-                                                           const edm::EventSetup& iSetup) const {
-  auto& tracks = iEvent.get(tokenCPUTrack_);
-
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-  auto const& tsoa = *tracks;
-  auto maxTracks = tsoa.stride();
-  std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl;
-
-  int32_t nt = 0;
-  for (int32_t it = 0; it < maxTracks; ++it) {
-    auto nHits = TracksUtilities<TrackerTraits>::nHits(tracks.view(), it);
-    assert(nHits == int(tracks.view().hitIndices().size(it)));
-    if (nHits == 0)
-      break;  // this is a guard: maybe we need to move to nTracks...
-    nt++;
-  }
-  std::cout << "found " << nt << " tracks in cpu SoA for Vertexing at " << tracks << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-
-  iEvent.emplace(tokenCPUVertex_, gpuAlgo_.make(tracks.view(), ptMin_, ptMax_));
-}
-
-template <typename TrackerTraits>
-void PixelVertexProducerCUDAT<TrackerTraits>::produce(edm::StreamID streamID,
-                                                      edm::Event& iEvent,
-                                                      const edm::EventSetup& iSetup) const {
-  if (onGPU_) {
-    produceOnGPU(streamID, iEvent, iSetup);
-  } else {
-    produceOnCPU(streamID, iEvent, iSetup);
-  }
-}
-
-using PixelVertexProducerCUDAPhase1 = PixelVertexProducerCUDAT<pixelTopology::Phase1>;
-DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase1);
-
-using PixelVertexProducerCUDAPhase2 = PixelVertexProducerCUDAT<pixelTopology::Phase2>;
-DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase2);
-
-using PixelVertexProducerCUDAHIonPhase1 = PixelVertexProducerCUDAT<pixelTopology::HIonPhase1>;
-DEFINE_FWK_MODULE(PixelVertexProducerCUDAHIonPhase1);
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc
deleted file mode 100644
index 91de2bdb6992b..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-#include "DataFormats/BeamSpot/interface/BeamSpot.h"
-#include "DataFormats/Common/interface/OrphanHandle.h"
-#include "DataFormats/TrackReco/interface/Track.h"
-#include "DataFormats/TrackReco/interface/TrackExtra.h"
-#include "DataFormats/TrackReco/interface/TrackFwd.h"
-#include "DataFormats/VertexReco/interface/Vertex.h"
-#include "DataFormats/VertexReco/interface/VertexFwd.h"
-#include "FWCore/Framework/interface/ConsumesCollector.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/global/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "Geometry/Records/interface/TrackerTopologyRcd.h"
-#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h"
-
-#undef PIXVERTEX_DEBUG_PRODUCE
-
-class PixelVertexProducerFromSoA : public edm::global::EDProducer<> {
-public:
-  using IndToEdm = std::vector<uint32_t>;
-
-  explicit PixelVertexProducerFromSoA(const edm::ParameterSet &iConfig);
-  ~PixelVertexProducerFromSoA() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);
-
-private:
-  void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override;
-
-  edm::EDGetTokenT<ZVertexSoAHost> tokenVertex_;
-  edm::EDGetTokenT<reco::BeamSpot> tokenBeamSpot_;
-  edm::EDGetTokenT<reco::TrackCollection> tokenTracks_;
-  edm::EDGetTokenT<IndToEdm> tokenIndToEdm_;
-};
-
-PixelVertexProducerFromSoA::PixelVertexProducerFromSoA(const edm::ParameterSet &conf)
-    : tokenVertex_(consumes(conf.getParameter<edm::InputTag>("src"))),
-      tokenBeamSpot_(consumes(conf.getParameter<edm::InputTag>("beamSpot"))),
-      tokenTracks_(consumes(conf.getParameter<edm::InputTag>("TrackCollection"))),
-      tokenIndToEdm_(consumes(conf.getParameter<edm::InputTag>("TrackCollection"))) {
-  produces<reco::VertexCollection>();
-}
-
-void PixelVertexProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
-  edm::ParameterSetDescription desc;
-
-  desc.add<edm::InputTag>("TrackCollection", edm::InputTag("pixelTracks"));
-  desc.add<edm::InputTag>("beamSpot", edm::InputTag("offlineBeamSpot"));
-  desc.add<edm::InputTag>("src", edm::InputTag("pixelVerticesSoA"));
-
-  descriptions.add("pixelVertexFromSoA", desc);
-}
-
-void PixelVertexProducerFromSoA::produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &) const {
-  auto vertexes = std::make_unique<reco::VertexCollection>();
-
-  auto tracksHandle = iEvent.getHandle(tokenTracks_);
-  auto tracksSize = tracksHandle->size();
-  auto const &indToEdm = iEvent.get(tokenIndToEdm_);
-  auto bsHandle = iEvent.getHandle(tokenBeamSpot_);
-
-  float x0 = 0, y0 = 0, z0 = 0, dxdz = 0, dydz = 0;
-  std::vector<int32_t> itrk;
-  itrk.reserve(64);  // avoid first relocations
-  if (!bsHandle.isValid()) {
-    edm::LogWarning("PixelVertexProducer") << "No beamspot found. returning vertexes with (0,0,Z) ";
-  } else {
-    const reco::BeamSpot &bs = *bsHandle;
-    x0 = bs.x0();
-    y0 = bs.y0();
-    z0 = bs.z0();
-    dxdz = bs.dxdz();
-    dydz = bs.dydz();
-  }
-
-  auto const &soa = iEvent.get(tokenVertex_);
-
-  int nv = soa.view().nvFinal();
-
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-  std::cout << "converting " << nv << " vertices "
-            << " from " << indToEdm.size() << " tracks" << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-
-  std::set<uint32_t> uind;  // for verifing index consistency
-  for (int j = nv - 1; j >= 0; --j) {
-    auto i = soa.view()[j].sortInd();  // on gpu sorted in ascending order....
-    assert(i < nv);
-    uind.insert(i);
-    assert(itrk.empty());
-    auto z = soa.view()[i].zv();
-    auto x = x0 + dxdz * z;
-    auto y = y0 + dydz * z;
-    z += z0;
-    reco::Vertex::Error err;
-    err(2, 2) = 1.f / soa.view()[i].wv();
-    err(2, 2) *= 2.;  // artifically inflate error
-    //Copy also the tracks (no intention to be efficient....)
-    for (auto k = 0U; k < indToEdm.size(); ++k) {
-      if (soa.view()[k].idv() == int16_t(i))
-        itrk.push_back(k);
-    }
-    auto nt = itrk.size();
-    if (nt == 0) {
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-      std::cout << "vertex " << i << " with no tracks..." << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-      continue;
-    }
-    if (nt < 2) {
-      itrk.clear();
-      continue;
-    }  // remove outliers
-    (*vertexes).emplace_back(reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt);
-    auto &v = (*vertexes).back();
-    v.reserve(itrk.size());
-    for (auto it : itrk) {
-      assert(it < int(indToEdm.size()));
-      auto k = indToEdm[it];
-      if (k > tracksSize) {
-        edm::LogWarning("PixelVertexProducer") << "oops track " << it << " does not exists on CPU " << k;
-        continue;
-      }
-      auto tk = reco::TrackRef(tracksHandle, k);
-      v.add(tk);
-    }
-    itrk.clear();
-  }
-
-  LogDebug("PixelVertexProducer") << ": Found " << vertexes->size() << " vertexes\n";
-  for (unsigned int i = 0; i < vertexes->size(); ++i) {
-    LogDebug("PixelVertexProducer") << "Vertex number " << i << " has " << (*vertexes)[i].tracksSize()
-                                    << " tracks with a position of " << (*vertexes)[i].z() << " +- "
-                                    << std::sqrt((*vertexes)[i].covariance(2, 2));
-  }
-
-  // legacy logic....
-  if (vertexes->empty() && bsHandle.isValid()) {
-    const reco::BeamSpot &bs = *bsHandle;
-
-    GlobalError bse(bs.rotatedCovariance3D());
-    if ((bse.cxx() <= 0.) || (bse.cyy() <= 0.) || (bse.czz() <= 0.)) {
-      AlgebraicSymMatrix33 we;
-      we(0, 0) = 10000;
-      we(1, 1) = 10000;
-      we(2, 2) = 10000;
-      vertexes->push_back(reco::Vertex(bs.position(), we, 0., 0., 0));
-
-      edm::LogInfo("PixelVertexProducer") << "No vertices found. Beamspot with invalid errors " << bse.matrix()
-                                          << "\nWill put Vertex derived from dummy-fake BeamSpot into Event.\n"
-                                          << (*vertexes)[0].x() << "\n"
-                                          << (*vertexes)[0].y() << "\n"
-                                          << (*vertexes)[0].z() << "\n";
-    } else {
-      vertexes->push_back(reco::Vertex(bs.position(), bs.rotatedCovariance3D(), 0., 0., 0));
-
-      edm::LogInfo("PixelVertexProducer") << "No vertices found. Will put Vertex derived from BeamSpot into Event:\n"
-                                          << (*vertexes)[0].x() << "\n"
-                                          << (*vertexes)[0].y() << "\n"
-                                          << (*vertexes)[0].z() << "\n";
-    }
-  } else if (vertexes->empty() && !bsHandle.isValid()) {
-    edm::LogWarning("PixelVertexProducer") << "No beamspot and no vertex found. No vertex returned.";
-  }
-
-  iEvent.put(std::move(vertexes));
-}
-
-DEFINE_FWK_MODULE(PixelVertexProducerFromSoA);
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc
deleted file mode 100644
index b13b6c96f0bd3..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <cuda_runtime.h>
-
-#include "CUDADataFormats/Common/interface/Product.h"
-#include "CUDADataFormats/Common/interface/HostProduct.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-#include "DataFormats/Common/interface/Handle.h"
-#include "FWCore/Framework/interface/ESHandle.h"
-#include "FWCore/Framework/interface/Event.h"
-#include "FWCore/Framework/interface/EventSetup.h"
-#include "FWCore/Framework/interface/MakerMacros.h"
-#include "FWCore/Framework/interface/stream/EDProducer.h"
-#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
-#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
-#include "FWCore/PluginManager/interface/ModuleDef.h"
-#include "FWCore/Utilities/interface/EDGetToken.h"
-#include "FWCore/Utilities/interface/InputTag.h"
-#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
-
-class PixelVertexSoAFromCUDA : public edm::stream::EDProducer<edm::ExternalWork> {
-public:
-  explicit PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig);
-  ~PixelVertexSoAFromCUDA() override = default;
-
-  static void fillDescriptions(edm::ConfigurationDescriptions& descriptions);
-
-private:
-  void acquire(edm::Event const& iEvent,
-               edm::EventSetup const& iSetup,
-               edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
-  void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override;
-
-  edm::EDGetTokenT<cms::cuda::Product<ZVertexSoADevice>> tokenCUDA_;
-  edm::EDPutTokenT<ZVertexSoAHost> tokenSOA_;
-
-  ZVertexSoAHost zvertex_h;
-};
-
-PixelVertexSoAFromCUDA::PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig)
-    : tokenCUDA_(consumes<cms::cuda::Product<ZVertexSoADevice>>(iConfig.getParameter<edm::InputTag>("src"))),
-      tokenSOA_(produces<ZVertexSoAHost>()) {}
-
-void PixelVertexSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
-  edm::ParameterSetDescription desc;
-
-  desc.add<edm::InputTag>("src", edm::InputTag("pixelVerticesCUDA"));
-  descriptions.add("pixelVerticesSoA", desc);
-}
-
-void PixelVertexSoAFromCUDA::acquire(edm::Event const& iEvent,
-                                     edm::EventSetup const& iSetup,
-                                     edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
-  cms::cuda::Product<ZVertexSoADevice> const& inputDataWrapped = iEvent.get(tokenCUDA_);
-  cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)};
-  auto const& zvertex_d = ctx.get(inputDataWrapped);  // Tracks on device
-  zvertex_h = ZVertexSoAHost(ctx.stream());           // Create an instance of Tracks on Host, using the stream
-  cudaCheck(cudaMemcpyAsync(zvertex_h.buffer().get(),
-                            zvertex_d.const_buffer().get(),
-                            zvertex_d.bufferSize(),
-                            cudaMemcpyDeviceToHost,
-                            ctx.stream()));  // Copy data from Device to Host
-}
-
-void PixelVertexSoAFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) {
-  // No copies....
-  iEvent.emplace(tokenSOA_, std::move(zvertex_h));
-}
-
-DEFINE_FWK_MODULE(PixelVertexSoAFromCUDA);
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h
deleted file mode 100644
index e7eadd9c61dda..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h
-#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h
-
-#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h"
-
-template <int32_t S>
-class PixelVertexWorkSpaceSoADevice : public cms::cuda::PortableDeviceCollection<PixelVertexWSSoALayout<>> {
-public:
-  explicit PixelVertexWorkSpaceSoADevice() = default;
-
-  // Constructor which specifies the SoA size and CUDA stream
-  explicit PixelVertexWorkSpaceSoADevice(cudaStream_t stream)
-      : PortableDeviceCollection<PixelVertexWSSoALayout<>>(S, stream) {}
-};
-
-namespace gpuVertexFinder {
-  namespace workSpace {
-    using PixelVertexWorkSpaceSoADevice = PixelVertexWorkSpaceSoADevice<zVertex::utilities::MAXTRACKS>;
-  }
-}  // namespace gpuVertexFinder
-#endif
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h
deleted file mode 100644
index 9a4dc49b87f23..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h
-#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h
-
-#include "CUDADataFormats/Common/interface/PortableHostCollection.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h"
-
-template <int32_t S>
-class PixelVertexWorkSpaceSoAHost : public cms::cuda::PortableHostCollection<PixelVertexWSSoALayout<>> {
-public:
-  explicit PixelVertexWorkSpaceSoAHost() : PortableHostCollection<PixelVertexWSSoALayout<>>(S) {}
-
-  // Constructor which specifies the SoA size and CUDA stream
-  explicit PixelVertexWorkSpaceSoAHost(cudaStream_t stream)
-      : PortableHostCollection<PixelVertexWSSoALayout<>>(S, stream) {}
-};
-
-namespace gpuVertexFinder {
-  namespace workSpace {
-    using PixelVertexWorkSpaceSoAHost = PixelVertexWorkSpaceSoAHost<zVertex::utilities::MAXTRACKS>;
-  }
-}  // namespace gpuVertexFinder
-#endif
diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h
deleted file mode 100644
index cc0828f153330..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h
-#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h
-
-#include <cuda_runtime.h>
-#include "DataFormats/SoATemplate/interface/SoALayout.h"
-
-// Intermediate data used in the vertex reco algos
-// For internal use only
-GENERATE_SOA_LAYOUT(PixelVertexWSSoALayout,
-                    SOA_COLUMN(uint16_t, itrk),            // index of original track
-                    SOA_COLUMN(float, zt),                 // input track z at bs
-                    SOA_COLUMN(float, ezt2),               // input error^2 on the above
-                    SOA_COLUMN(float, ptt2),               // input pt^2 on the above
-                    SOA_COLUMN(uint8_t, izt),              // interized z-position of input tracks
-                    SOA_COLUMN(int32_t, iv),               // vertex index for each associated track
-                    SOA_SCALAR(uint32_t, ntrks),           // number of "selected tracks"
-                    SOA_SCALAR(uint32_t, nvIntermediate))  // the number of vertices after splitting pruning etc.
-
-// Methods that operate on View and ConstView of the WorkSpaceSoALayout.
-namespace gpuVertexFinder {
-  namespace workSpace {
-    using PixelVertexWorkSpaceSoALayout = PixelVertexWSSoALayout<>;
-    using PixelVertexWorkSpaceSoAView = PixelVertexWSSoALayout<>::View;
-    using PixelVertexWorkSpaceSoAConstView = PixelVertexWSSoALayout<>::ConstView;
-
-    namespace utilities {
-      __host__ __device__ inline void init(PixelVertexWorkSpaceSoAView &workspace_view) {
-        workspace_view.ntrks() = 0;
-        workspace_view.nvIntermediate() = 0;
-      }
-    }  // namespace utilities
-  }    // namespace workSpace
-}  // namespace gpuVertexFinder
-
-#endif
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h
deleted file mode 100644
index e38c4bd495899..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h
+++ /dev/null
@@ -1,237 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  // this algo does not really scale as it works in a single block...
-  // enough for <10K tracks we have
-  //
-  // based on Rodrighez&Laio algo
-  //
-  __device__ __forceinline__ void clusterTracksByDensity(VtxSoAView& pdata,
-                                                         WsSoAView& pws,
-                                                         int minT,      // min number of neighbours to be "seed"
-                                                         float eps,     // max absolute distance to cluster
-                                                         float errmax,  // max error to be "seed"
-                                                         float chi2max  // max normalized distance to cluster
-  ) {
-    using namespace gpuVertexFinder;
-    constexpr bool verbose = false;  // in principle the compiler should optmize out if false
-
-    if (verbose && 0 == threadIdx.x)
-      printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
-
-    auto er2mx = errmax * errmax;
-
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ zt = ws.zt().data();
-    float const* __restrict__ ezt2 = ws.ezt2().data();
-
-    uint32_t& nvFinal = data.nvFinal();
-    uint32_t& nvIntermediate = ws.nvIntermediate();
-
-    uint8_t* __restrict__ izt = ws.izt().data();
-    int32_t* __restrict__ nn = data.ndof().data();
-    int32_t* __restrict__ iv = ws.iv().data();
-
-    assert(zt);
-    assert(ezt2);
-    assert(izt);
-    assert(nn);
-    assert(iv);
-
-    using Hist = cms::cuda::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
-    __shared__ Hist hist;
-    __shared__ typename Hist::Counter hws[32];
-    for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
-      hist.off[j] = 0;
-    }
-    __syncthreads();
-
-    if (verbose && 0 == threadIdx.x)
-      printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
-
-    assert((int)nt <= hist.capacity());
-
-    // fill hist  (bin shall be wider than "eps")
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      assert(i < zVertex::utilities::MAXTRACKS);
-      int iz = int(zt[i] * 10.);  // valid if eps<=0.1
-      // iz = std::clamp(iz, INT8_MIN, INT8_MAX);  // sorry c++17 only
-      iz = std::min(std::max(iz, INT8_MIN), INT8_MAX);
-      izt[i] = iz - INT8_MIN;
-      assert(iz - INT8_MIN >= 0);
-      assert(iz - INT8_MIN < 256);
-      hist.count(izt[i]);
-      iv[i] = i;
-      nn[i] = 0;
-    }
-    __syncthreads();
-    if (threadIdx.x < 32)
-      hws[threadIdx.x] = 0;  // used by prefix scan...
-    __syncthreads();
-    hist.finalize(hws);
-    __syncthreads();
-    assert(hist.size() == nt);
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      hist.fill(izt[i], uint16_t(i));
-    }
-    __syncthreads();
-
-    // count neighbours
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (ezt2[i] > er2mx)
-        continue;
-      auto loop = [&](uint32_t j) {
-        if (i == j)
-          return;
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > eps)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;
-        nn[i]++;
-      };
-
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __syncthreads();
-
-    // find closest above me .... (we ignore the possibility of two j at same distance from i)
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      float mdist = eps;
-      auto loop = [&](uint32_t j) {
-        if (nn[j] < nn[i])
-          return;
-        if (nn[j] == nn[i] && zt[j] >= zt[i])
-          return;  // if equal use natural order...
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > mdist)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;  // (break natural order???)
-        mdist = dist;
-        iv[i] = j;  // assign to cluster (better be unique??)
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __syncthreads();
-
-#ifdef GPU_DEBUG
-    //  mini verification
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] != int(i))
-        assert(iv[iv[i]] != int(i));
-    }
-    __syncthreads();
-#endif
-
-    // consolidate graph (percolate index of seed)
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      auto m = iv[i];
-      while (m != iv[m])
-        m = iv[m];
-      iv[i] = m;
-    }
-
-#ifdef GPU_DEBUG
-    __syncthreads();
-    //  mini verification
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] != int(i))
-        assert(iv[iv[i]] != int(i));
-    }
-#endif
-
-#ifdef GPU_DEBUG
-    // and verify that we did not spit any cluster...
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      auto minJ = i;
-      auto mdist = eps;
-      auto loop = [&](uint32_t j) {
-        if (nn[j] < nn[i])
-          return;
-        if (nn[j] == nn[i] && zt[j] >= zt[i])
-          return;  // if equal use natural order...
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > mdist)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;
-        mdist = dist;
-        minJ = j;
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-      // should belong to the same cluster...
-      assert(iv[i] == iv[minJ]);
-      assert(nn[i] <= nn[iv[i]]);
-    }
-    __syncthreads();
-#endif
-
-    __shared__ unsigned int foundClusters;
-    foundClusters = 0;
-    __syncthreads();
-
-    // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold;
-    // mark these tracks with a negative id.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] == int(i)) {
-        if (nn[i] >= minT) {
-          auto old = atomicInc(&foundClusters, 0xffffffff);
-          iv[i] = -(old + 1);
-        } else {  // noise
-          iv[i] = -9998;
-        }
-      }
-    }
-    __syncthreads();
-
-    assert(foundClusters < zVertex::utilities::MAXVTX);
-
-    // propagate the negative id to all the tracks in the cluster.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] >= 0) {
-        // mark each track in a cluster with the same id as the first one
-        iv[i] = iv[iv[i]];
-      }
-    }
-    __syncthreads();
-
-    // adjust the cluster id to be a positive value starting from 0
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      iv[i] = -iv[i] - 1;
-    }
-
-    nvIntermediate = nvFinal = foundClusters;
-
-    if (verbose && 0 == threadIdx.x)
-      printf("found %d proto vertices\n", foundClusters);
-  }
-
-  __global__ void clusterTracksByDensityKernel(VtxSoAView pdata,
-                                               WsSoAView pws,
-                                               int minT,      // min number of neighbours to be "seed"
-                                               float eps,     // max absolute distance to cluster
-                                               float errmax,  // max error to be "seed"
-                                               float chi2max  // max normalized distance to cluster
-  ) {
-    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
-  }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h
deleted file mode 100644
index 588fabd3d0d77..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h
+++ /dev/null
@@ -1,243 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  // this algo does not really scale as it works in a single block...
-  // enough for <10K tracks we have
-  __global__ void clusterTracksDBSCAN(VtxSoAView pdata,
-                                      WsSoAView pws,
-                                      int minT,      // min number of neighbours to be "core"
-                                      float eps,     // max absolute distance to cluster
-                                      float errmax,  // max error to be "seed"
-                                      float chi2max  // max normalized distance to cluster
-  ) {
-    constexpr bool verbose = false;  // in principle the compiler should optmize out if false
-
-    if (verbose && 0 == threadIdx.x)
-      printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
-
-    auto er2mx = errmax * errmax;
-
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ zt = ws.zt().data();
-    float const* __restrict__ ezt2 = ws.ezt2().data();
-
-    uint32_t& nvFinal = data.nvFinal();
-    uint32_t& nvIntermediate = ws.nvIntermediate();
-
-    uint8_t* __restrict__ izt = ws.izt().data();
-    int32_t* __restrict__ nn = data.ndof().data();
-    int32_t* __restrict__ iv = ws.iv().data();
-
-    assert(zt);
-    assert(iv);
-    assert(nn);
-    assert(ezt2);
-
-    using Hist = cms::cuda::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
-    __shared__ Hist hist;
-    __shared__ typename Hist::Counter hws[32];
-    for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
-      hist.off[j] = 0;
-    }
-    __syncthreads();
-
-    if (verbose && 0 == threadIdx.x)
-      printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
-
-    assert((int)nt <= hist.capacity());
-
-    // fill hist  (bin shall be wider than "eps")
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      assert(i < zVertex::utilities::MAXTRACKS);
-      int iz = int(zt[i] * 10.);  // valid if eps<=0.1
-      iz = std::clamp(iz, INT8_MIN, INT8_MAX);
-      izt[i] = iz - INT8_MIN;
-      assert(iz - INT8_MIN >= 0);
-      assert(iz - INT8_MIN < 256);
-      hist.count(izt[i]);
-      iv[i] = i;
-      nn[i] = 0;
-    }
-    __syncthreads();
-    if (threadIdx.x < 32)
-      hws[threadIdx.x] = 0;  // used by prefix scan...
-    __syncthreads();
-    hist.finalize(hws);
-    __syncthreads();
-    assert(hist.size() == nt);
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      hist.fill(izt[i], uint16_t(i));
-    }
-    __syncthreads();
-
-    // count neighbours
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (ezt2[i] > er2mx)
-        continue;
-      auto loop = [&](uint32_t j) {
-        if (i == j)
-          return;
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > eps)
-          return;
-        //        if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
-        nn[i]++;
-      };
-
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __syncthreads();
-
-    // find NN with smaller z...
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (nn[i] < minT)
-        continue;  // DBSCAN core rule
-      float mz = zt[i];
-      auto loop = [&](uint32_t j) {
-        if (zt[j] >= mz)
-          return;
-        if (nn[j] < minT)
-          return;  // DBSCAN core rule
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > eps)
-          return;
-        //        if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
-        mz = zt[j];
-        iv[i] = j;  // assign to cluster (better be unique??)
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __syncthreads();
-
-#ifdef GPU_DEBUG
-    //  mini verification
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] != int(i))
-        assert(iv[iv[i]] != int(i));
-    }
-    __syncthreads();
-#endif
-
-    // consolidate graph (percolate index of seed)
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      auto m = iv[i];
-      while (m != iv[m])
-        m = iv[m];
-      iv[i] = m;
-    }
-
-    __syncthreads();
-
-#ifdef GPU_DEBUG
-    //  mini verification
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] != int(i))
-        assert(iv[iv[i]] != int(i));
-    }
-    __syncthreads();
-#endif
-
-#ifdef GPU_DEBUG
-    // and verify that we did not spit any cluster...
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (nn[i] < minT)
-        continue;  // DBSCAN core rule
-      assert(zt[iv[i]] <= zt[i]);
-      auto loop = [&](uint32_t j) {
-        if (nn[j] < minT)
-          return;  // DBSCAN core rule
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > eps)
-          return;
-        //  if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return;
-        // they should belong to the same cluster, isn't it?
-        if (iv[i] != iv[j]) {
-          printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]);
-          printf("      %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]);
-          ;
-        }
-        assert(iv[i] == iv[j]);
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-    __syncthreads();
-#endif
-
-    // collect edges (assign to closest cluster of closest point??? here to closest point)
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
-      if (nn[i] >= minT)
-        continue;  // DBSCAN edge rule
-      float mdist = eps;
-      auto loop = [&](uint32_t j) {
-        if (nn[j] < minT)
-          return;  // DBSCAN core rule
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > mdist)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;  // needed?
-        mdist = dist;
-        iv[i] = iv[j];  // assign to cluster (better be unique??)
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __shared__ unsigned int foundClusters;
-    foundClusters = 0;
-    __syncthreads();
-
-    // find the number of different clusters, identified by a tracks with clus[i] == i;
-    // mark these tracks with a negative id.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] == int(i)) {
-        if (nn[i] >= minT) {
-          auto old = atomicInc(&foundClusters, 0xffffffff);
-          iv[i] = -(old + 1);
-        } else {  // noise
-          iv[i] = -9998;
-        }
-      }
-    }
-    __syncthreads();
-
-    assert(foundClusters < zVertex::utilities::MAXVTX);
-
-    // propagate the negative id to all the tracks in the cluster.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] >= 0) {
-        // mark each track in a cluster with the same id as the first one
-        iv[i] = iv[iv[i]];
-      }
-    }
-    __syncthreads();
-
-    // adjust the cluster id to be a positive value starting from 0
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      iv[i] = -iv[i] - 1;
-    }
-
-    nvIntermediate = nvFinal = foundClusters;
-
-    if (verbose && 0 == threadIdx.x)
-      printf("found %d proto vertices\n", foundClusters);
-  }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h
deleted file mode 100644
index 5a56e23eedeee..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h
+++ /dev/null
@@ -1,214 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  // this algo does not really scale as it works in a single block...
-  // enough for <10K tracks we have
-  __global__ void clusterTracksIterative(VtxSoAView pdata,
-                                         WsSoAView pws,
-                                         int minT,      // min number of neighbours to be "core"
-                                         float eps,     // max absolute distance to cluster
-                                         float errmax,  // max error to be "seed"
-                                         float chi2max  // max normalized distance to cluster
-  ) {
-    constexpr bool verbose = false;  // in principle the compiler should optmize out if false
-
-    if (verbose && 0 == threadIdx.x)
-      printf("params %d %f %f %f\n", minT, eps, errmax, chi2max);
-
-    auto er2mx = errmax * errmax;
-
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ zt = ws.zt().data();
-    float const* __restrict__ ezt2 = ws.ezt2().data();
-
-    uint32_t& nvFinal = data.nvFinal();
-    uint32_t& nvIntermediate = ws.nvIntermediate();
-
-    uint8_t* __restrict__ izt = ws.izt().data();
-    int32_t* __restrict__ nn = data.ndof().data();
-    int32_t* __restrict__ iv = ws.iv().data();
-
-    assert(zt);
-    assert(nn);
-    assert(iv);
-    assert(ezt2);
-
-    using Hist = cms::cuda::HistoContainer<uint8_t, 256, 16000, 8, uint16_t>;
-    __shared__ Hist hist;
-    __shared__ typename Hist::Counter hws[32];
-    for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) {
-      hist.off[j] = 0;
-    }
-    __syncthreads();
-
-    if (verbose && 0 == threadIdx.x)
-      printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt);
-
-    assert((int)nt <= hist.capacity());
-
-    // fill hist  (bin shall be wider than "eps")
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      assert(i < zVertex::utilities::MAXTRACKS);
-      int iz = int(zt[i] * 10.);  // valid if eps<=0.1
-      iz = std::clamp(iz, INT8_MIN, INT8_MAX);
-      izt[i] = iz - INT8_MIN;
-      assert(iz - INT8_MIN >= 0);
-      assert(iz - INT8_MIN < 256);
-      hist.count(izt[i]);
-      iv[i] = i;
-      nn[i] = 0;
-    }
-    __syncthreads();
-    if (threadIdx.x < 32)
-      hws[threadIdx.x] = 0;  // used by prefix scan...
-    __syncthreads();
-    hist.finalize(hws);
-    __syncthreads();
-    assert(hist.size() == nt);
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      hist.fill(izt[i], uint16_t(i));
-    }
-    __syncthreads();
-
-    // count neighbours
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (ezt2[i] > er2mx)
-        continue;
-      auto loop = [&](uint32_t j) {
-        if (i == j)
-          return;
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > eps)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;
-        nn[i]++;
-      };
-
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __shared__ int nloops;
-    nloops = 0;
-
-    __syncthreads();
-
-    // cluster seeds only
-    bool more = true;
-    while (__syncthreads_or(more)) {
-      if (1 == nloops % 2) {
-        for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-          auto m = iv[i];
-          while (m != iv[m])
-            m = iv[m];
-          iv[i] = m;
-        }
-      } else {
-        more = false;
-        for (auto k = threadIdx.x; k < hist.size(); k += blockDim.x) {
-          auto p = hist.begin() + k;
-          auto i = (*p);
-          auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1));
-          if (nn[i] < minT)
-            continue;  // DBSCAN core rule
-          auto loop = [&](uint32_t j) {
-            assert(i != j);
-            if (nn[j] < minT)
-              return;  // DBSCAN core rule
-            auto dist = std::abs(zt[i] - zt[j]);
-            if (dist > eps)
-              return;
-            if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-              return;
-            auto old = atomicMin(&iv[j], iv[i]);
-            if (old != iv[i]) {
-              // end the loop only if no changes were applied
-              more = true;
-            }
-            atomicMin(&iv[i], old);
-          };
-          ++p;
-          for (; p < hist.end(be); ++p)
-            loop(*p);
-        }  // for i
-      }
-      if (threadIdx.x == 0)
-        ++nloops;
-    }  // while
-
-    // collect edges (assign to closest cluster of closest point??? here to closest point)
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      //    if (nn[i]==0 || nn[i]>=minT) continue;    // DBSCAN edge rule
-      if (nn[i] >= minT)
-        continue;  // DBSCAN edge rule
-      float mdist = eps;
-      auto loop = [&](int j) {
-        if (nn[j] < minT)
-          return;  // DBSCAN core rule
-        auto dist = std::abs(zt[i] - zt[j]);
-        if (dist > mdist)
-          return;
-        if (dist * dist > chi2max * (ezt2[i] + ezt2[j]))
-          return;  // needed?
-        mdist = dist;
-        iv[i] = iv[j];  // assign to cluster (better be unique??)
-      };
-      cms::cuda::forEachInBins(hist, izt[i], 1, loop);
-    }
-
-    __shared__ unsigned int foundClusters;
-    foundClusters = 0;
-    __syncthreads();
-
-    // find the number of different clusters, identified by a tracks with clus[i] == i;
-    // mark these tracks with a negative id.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] == int(i)) {
-        if (nn[i] >= minT) {
-          auto old = atomicInc(&foundClusters, 0xffffffff);
-          iv[i] = -(old + 1);
-        } else {  // noise
-          iv[i] = -9998;
-        }
-      }
-    }
-    __syncthreads();
-
-    assert(foundClusters < zVertex::utilities::MAXVTX);
-
-    // propagate the negative id to all the tracks in the cluster.
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] >= 0) {
-        // mark each track in a cluster with the same id as the first one
-        iv[i] = iv[iv[i]];
-      }
-    }
-    __syncthreads();
-
-    // adjust the cluster id to be a positive value starting from 0
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      iv[i] = -iv[i] - 1;
-    }
-
-    nvIntermediate = nvFinal = foundClusters;
-
-    if (verbose && 0 == threadIdx.x)
-      printf("found %d proto vertices\n", foundClusters);
-  }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h
deleted file mode 100644
index bf2652b8acd5f..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  __device__ __forceinline__ void fitVertices(VtxSoAView& pdata,
-                                              WsSoAView& pws,
-                                              float chi2Max  // for outlier rejection
-  ) {
-    constexpr bool verbose = false;  // in principle the compiler should optmize out if false
-
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ zt = ws.zt().data();
-    float const* __restrict__ ezt2 = ws.ezt2().data();
-    float* __restrict__ zv = data.zv().data();
-    float* __restrict__ wv = data.wv().data();
-    float* __restrict__ chi2 = data.chi2().data();
-    uint32_t& nvFinal = data.nvFinal();
-    uint32_t& nvIntermediate = ws.nvIntermediate();
-
-    int32_t* __restrict__ nn = data.ndof().data();
-    int32_t* __restrict__ iv = ws.iv().data();
-
-    assert(nvFinal <= nvIntermediate);
-    nvFinal = nvIntermediate;
-    auto foundClusters = nvFinal;
-
-    // zero
-    for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
-      zv[i] = 0;
-      wv[i] = 0;
-      chi2[i] = 0;
-    }
-
-    // only for test
-    __shared__ int noise;
-    if (verbose && 0 == threadIdx.x)
-      noise = 0;
-
-    __syncthreads();
-
-    // compute cluster location
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] > 9990) {
-        if (verbose)
-          atomicAdd(&noise, 1);
-        continue;
-      }
-      assert(iv[i] >= 0);
-      assert(iv[i] < int(foundClusters));
-      auto w = 1.f / ezt2[i];
-      atomicAdd_block(&zv[iv[i]], zt[i] * w);
-      atomicAdd_block(&wv[iv[i]], w);
-    }
-
-    __syncthreads();
-    // reuse nn
-    for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) {
-      assert(wv[i] > 0.f);
-      zv[i] /= wv[i];
-      nn[i] = -1;  // ndof
-    }
-    __syncthreads();
-
-    // compute chi2
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] > 9990)
-        continue;
-
-      auto c2 = zv[iv[i]] - zt[i];
-      c2 *= c2 / ezt2[i];
-      if (c2 > chi2Max) {
-        iv[i] = 9999;
-        continue;
-      }
-      atomicAdd_block(&chi2[iv[i]], c2);
-      atomicAdd_block(&nn[iv[i]], 1);
-    }
-    __syncthreads();
-    for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x)
-      if (nn[i] > 0)
-        wv[i] *= float(nn[i]) / chi2[i];
-
-    if (verbose && 0 == threadIdx.x)
-      printf("found %d proto clusters ", foundClusters);
-    if (verbose && 0 == threadIdx.x)
-      printf("and %d noise\n", noise);
-  }
-
-  __global__ void fitVerticesKernel(VtxSoAView pdata,
-                                    WsSoAView pws,
-                                    float chi2Max  // for outlier rejection
-  ) {
-    fitVertices(pdata, pws, chi2Max);
-  }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h b/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h
deleted file mode 100644
index 4d9f2c42068d2..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-#ifdef __CUDA_ARCH__
-#include "HeterogeneousCore/CUDAUtilities/interface/radixSort.h"
-#endif
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  __device__ __forceinline__ void sortByPt2(VtxSoAView& pdata, WsSoAView& pws) {
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ ptt2 = ws.ptt2().data();
-    uint32_t const& nvFinal = data.nvFinal();
-
-    int32_t const* __restrict__ iv = ws.iv().data();
-    float* __restrict__ ptv2 = data.ptv2().data();
-    uint16_t* __restrict__ sortInd = data.sortInd().data();
-
-    assert(ptv2);
-    assert(sortInd);
-
-    if (nvFinal < 1)
-      return;
-
-    // fill indexing
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      data[ws[i].itrk()].idv() = iv[i];
-    }
-
-    // can be done asynchronously at the end of previous event
-    for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) {
-      ptv2[i] = 0;
-    }
-    __syncthreads();
-
-    for (auto i = threadIdx.x; i < nt; i += blockDim.x) {
-      if (iv[i] > 9990)
-        continue;
-      atomicAdd_block(&ptv2[iv[i]], ptt2[i]);
-    }
-    __syncthreads();
-
-    if (1 == nvFinal) {
-      if (threadIdx.x == 0)
-        sortInd[0] = 0;
-      return;
-    }
-#ifdef __CUDA_ARCH__
-    __shared__ uint16_t sws[1024];
-    // sort using only 16 bits
-    radixSort<float, 2>(ptv2, sortInd, sws, nvFinal);
-#else
-    for (uint16_t i = 0; i < nvFinal; ++i)
-      sortInd[i] = i;
-    std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; });
-#endif
-  }
-
-  __global__ void sortByPt2Kernel(VtxSoAView pdata, WsSoAView pws) { sortByPt2(pdata, pws); }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h
deleted file mode 100644
index c5b14b8bdcdd0..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h"
-
-#include "gpuVertexFinder.h"
-
-namespace gpuVertexFinder {
-
-  __device__ __forceinline__ void splitVertices(VtxSoAView& pdata, WsSoAView& pws, float maxChi2) {
-    constexpr bool verbose = false;  // in principle the compiler should optmize out if false
-
-    auto& __restrict__ data = pdata;
-    auto& __restrict__ ws = pws;
-    auto nt = ws.ntrks();
-    float const* __restrict__ zt = ws.zt().data();
-    float const* __restrict__ ezt2 = ws.ezt2().data();
-    float* __restrict__ zv = data.zv().data();
-    float* __restrict__ wv = data.wv().data();
-    float const* __restrict__ chi2 = data.chi2().data();
-    uint32_t& nvFinal = data.nvFinal();
-
-    int32_t const* __restrict__ nn = data.ndof().data();
-    int32_t* __restrict__ iv = ws.iv().data();
-
-    assert(zt);
-    assert(wv);
-    assert(chi2);
-    assert(nn);
-
-    // one vertex per block
-    for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) {
-      if (nn[kv] < 4)
-        continue;
-      if (chi2[kv] < maxChi2 * float(nn[kv]))
-        continue;
-
-      constexpr int MAXTK = 512;
-      assert(nn[kv] < MAXTK);
-      if (nn[kv] >= MAXTK)
-        continue;                      // too bad FIXME
-      __shared__ uint32_t it[MAXTK];   // track index
-      __shared__ float zz[MAXTK];      // z pos
-      __shared__ uint8_t newV[MAXTK];  // 0 or 1
-      __shared__ float ww[MAXTK];      // z weight
-
-      __shared__ uint32_t nq;  // number of track for this vertex
-      nq = 0;
-      __syncthreads();
-
-      // copy to local
-      for (auto k = threadIdx.x; k < nt; k += blockDim.x) {
-        if (iv[k] == int(kv)) {
-          auto old = atomicInc(&nq, MAXTK);
-          zz[old] = zt[k] - zv[kv];
-          newV[old] = zz[old] < 0 ? 0 : 1;
-          ww[old] = 1.f / ezt2[k];
-          it[old] = k;
-        }
-      }
-
-      __shared__ float znew[2], wnew[2];  // the new vertices
-
-      __syncthreads();
-      assert(int(nq) == nn[kv] + 1);
-
-      int maxiter = 20;
-      // kt-min....
-      bool more = true;
-      while (__syncthreads_or(more)) {
-        more = false;
-        if (0 == threadIdx.x) {
-          znew[0] = 0;
-          znew[1] = 0;
-          wnew[0] = 0;
-          wnew[1] = 0;
-        }
-        __syncthreads();
-        for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-          auto i = newV[k];
-          atomicAdd(&znew[i], zz[k] * ww[k]);
-          atomicAdd(&wnew[i], ww[k]);
-        }
-        __syncthreads();
-        if (0 == threadIdx.x) {
-          znew[0] /= wnew[0];
-          znew[1] /= wnew[1];
-        }
-        __syncthreads();
-        for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-          auto d0 = fabs(zz[k] - znew[0]);
-          auto d1 = fabs(zz[k] - znew[1]);
-          auto newer = d0 < d1 ? 0 : 1;
-          more |= newer != newV[k];
-          newV[k] = newer;
-        }
-        --maxiter;
-        if (maxiter <= 0)
-          more = false;
-      }
-
-      // avoid empty vertices
-      if (0 == wnew[0] || 0 == wnew[1])
-        continue;
-
-      // quality cut
-      auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]);
-
-      auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]);
-
-      if (verbose && 0 == threadIdx.x)
-        printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]);
-
-      if (chi2Dist < 4)
-        continue;
-
-      // get a new global vertex
-      __shared__ uint32_t igv;
-      if (0 == threadIdx.x)
-        igv = atomicAdd(&ws.nvIntermediate(), 1);
-      __syncthreads();
-      for (auto k = threadIdx.x; k < nq; k += blockDim.x) {
-        if (1 == newV[k])
-          iv[it[k]] = igv;
-      }
-
-    }  // loop on vertices
-  }
-
-  __global__ void splitVerticesKernel(VtxSoAView pdata, WsSoAView pws, float maxChi2) {
-    splitVertices(pdata, pws, maxChi2);
-  }
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc
deleted file mode 100644
index dc638187977ac..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc
+++ /dev/null
@@ -1,208 +0,0 @@
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-
-#include "PixelVertexWorkSpaceUtilities.h"
-#include "PixelVertexWorkSpaceSoAHost.h"
-#include "PixelVertexWorkSpaceSoADevice.h"
-
-#include "gpuClusterTracksByDensity.h"
-#include "gpuClusterTracksDBSCAN.h"
-#include "gpuClusterTracksIterative.h"
-#include "gpuFitVertices.h"
-#include "gpuSortByPt2.h"
-#include "gpuSplitVertices.h"
-
-#undef PIXVERTEX_DEBUG_PRODUCE
-
-namespace gpuVertexFinder {
-
-  // reject outlier tracks that contribute more than this to the chi2 of the vertex fit
-  constexpr float maxChi2ForFirstFit = 50.f;
-  constexpr float maxChi2ForFinalFit = 5000.f;
-
-  // split vertices with a chi2/NDoF greater than this
-  constexpr float maxChi2ForSplit = 9.f;
-
-  template <typename TrackerTraits>
-  __global__ void loadTracks(
-      TrackSoAConstView<TrackerTraits> tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) {
-    auto const quality = tracks_view.quality();
-    using helper = TracksUtilities<TrackerTraits>;
-    auto first = blockIdx.x * blockDim.x + threadIdx.x;
-    for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) {
-      auto nHits = helper::nHits(tracks_view, idx);
-      assert(nHits >= 3);
-
-      // initialize soa...
-      soa[idx].idv() = -1;
-
-      if (helper::isTriplet(tracks_view, idx))
-        continue;  // no triplets
-      if (quality[idx] < pixelTrack::Quality::highPurity)
-        continue;
-
-      auto pt = tracks_view[idx].pt();
-
-      if (pt < ptMin)
-        continue;
-
-      // clamp pt
-      pt = std::min(pt, ptMax);
-
-      auto& data = pws;
-      auto it = atomicAdd(&data.ntrks(), 1);
-      data[it].itrk() = idx;
-      data[it].zt() = helper::zip(tracks_view, idx);
-      data[it].ezt2() = tracks_view[idx].covariance()(14);
-      data[it].ptt2() = pt * pt;
-    }
-  }
-
-// #define THREE_KERNELS
-#ifndef THREE_KERNELS
-  __global__ void vertexFinderOneKernel(VtxSoAView pdata,
-                                        WsSoAView pws,
-                                        int minT,      // min number of neighbours to be "seed"
-                                        float eps,     // max absolute distance to cluster
-                                        float errmax,  // max error to be "seed"
-                                        float chi2max  // max normalized distance to cluster,
-  ) {
-    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
-    __syncthreads();
-    fitVertices(pdata, pws, maxChi2ForFirstFit);
-    __syncthreads();
-    splitVertices(pdata, pws, maxChi2ForSplit);
-    __syncthreads();
-    fitVertices(pdata, pws, maxChi2ForFinalFit);
-    __syncthreads();
-    sortByPt2(pdata, pws);
-  }
-#else
-  __global__ void vertexFinderKernel1(VtxSoAView pdata,
-                                      WsSoAView pws,
-                                      int minT,      // min number of neighbours to be "seed"
-                                      float eps,     // max absolute distance to cluster
-                                      float errmax,  // max error to be "seed"
-                                      float chi2max  // max normalized distance to cluster,
-  ) {
-    clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
-    __syncthreads();
-    fitVertices(pdata, pws, maxChi2ForFirstFit);
-  }
-
-  __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) {
-    fitVertices(pdata, pws, maxChi2ForFinalFit);
-    __syncthreads();
-    sortByPt2(pdata, pws);
-  }
-#endif
-
-  template <typename TrackerTraits>
-#ifdef __CUDACC__
-  ZVertexSoADevice Producer<TrackerTraits>::makeAsync(cudaStream_t stream,
-                                                      const TrackSoAConstView<TrackerTraits>& tracks_view,
-                                                      float ptMin,
-                                                      float ptMax) const {
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-    std::cout << "producing Vertices on GPU" << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-    ZVertexSoADevice vertices(stream);
-#else
-  ZVertexSoAHost Producer<TrackerTraits>::make(const TrackSoAConstView<TrackerTraits>& tracks_view,
-                                               float ptMin,
-                                               float ptMax) const {
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-    std::cout << "producing Vertices on  CPU" << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-    ZVertexSoAHost vertices;
-#endif
-    auto soa = vertices.view();
-
-    assert(vertices.buffer());
-
-#ifdef __CUDACC__
-    auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream);
-#else
-    auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost();
-#endif
-
-#ifdef __CUDACC__
-    init<<<1, 1, 0, stream>>>(soa, ws_d.view());
-    auto blockSize = 128;
-    auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize;
-    loadTracks<TrackerTraits><<<numberOfBlocks, blockSize, 0, stream>>>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
-    cudaCheck(cudaGetLastError());
-#else
-    init(soa, ws_d.view());
-    loadTracks<TrackerTraits>(tracks_view, soa, ws_d.view(), ptMin, ptMax);
-#endif
-
-#ifdef __CUDACC__
-    // Running too many thread lead to problems when printf is enabled.
-    constexpr int maxThreadsForPrint = 1024 - 128;
-    constexpr int numBlocks = 1024;
-    constexpr int threadsPerBlock = 128;
-
-    if (oneKernel_) {
-      // implemented only for density clustesrs
-#ifndef THREE_KERNELS
-      vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
-#else
-      vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
-      cudaCheck(cudaGetLastError());
-      // one block per vertex...
-      splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
-      cudaCheck(cudaGetLastError());
-      vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
-#endif
-    } else {  // five kernels
-      if (useDensity_) {
-        clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>(
-            soa, ws_d.view(), minT, eps, errmax, chi2max);
-      } else if (useDBSCAN_) {
-        clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
-      } else if (useIterative_) {
-        clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max);
-      }
-      cudaCheck(cudaGetLastError());
-      fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit);
-      cudaCheck(cudaGetLastError());
-      if (doSplitting_) {
-        // one block per vertex...
-        splitVerticesKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(soa, ws_d.view(), maxChi2ForSplit);
-        cudaCheck(cudaGetLastError());
-        fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit);
-        cudaCheck(cudaGetLastError());
-      }
-      sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view());
-    }
-    cudaCheck(cudaGetLastError());
-#else  // __CUDACC__
-    if (useDensity_) {
-      clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max);
-    } else if (useDBSCAN_) {
-      clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max);
-    } else if (useIterative_) {
-      clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max);
-    }
-#ifdef PIXVERTEX_DEBUG_PRODUCE
-    std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl;
-#endif  // PIXVERTEX_DEBUG_PRODUCE
-    fitVertices(soa, ws_d.view(), maxChi2ForFirstFit);
-    // one block per vertex!
-    if (doSplitting_) {
-      splitVertices(soa, ws_d.view(), maxChi2ForSplit);
-      fitVertices(soa, ws_d.view(), maxChi2ForFinalFit);
-    }
-    sortByPt2(soa, ws_d.view());
-#endif
-
-    return vertices;
-  }
-
-  template class Producer<pixelTopology::Phase1>;
-  template class Producer<pixelTopology::Phase2>;
-  template class Producer<pixelTopology::HIonPhase1>;
-}  // namespace gpuVertexFinder
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu
deleted file mode 100644
index 9674eac7d8784..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "gpuVertexFinder.cc"
diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h
deleted file mode 100644
index 640f137bf76f4..0000000000000
--- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h
-#define RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h
-
-#include <cstddef>
-#include <cstdint>
-
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "PixelVertexWorkSpaceUtilities.h"
-#include "PixelVertexWorkSpaceSoAHost.h"
-#include "PixelVertexWorkSpaceSoADevice.h"
-
-namespace gpuVertexFinder {
-
-  using VtxSoAView = zVertex::ZVertexSoAView;
-  using WsSoAView = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView;
-
-  __global__ void init(VtxSoAView pdata, WsSoAView pws) {
-    zVertex::utilities::init(pdata);
-    gpuVertexFinder::workSpace::utilities::init(pws);
-  }
-
-  template <typename TrackerTraits>
-  class Producer {
-    using TkSoAConstView = TrackSoAConstView<TrackerTraits>;
-
-  public:
-    Producer(bool oneKernel,
-             bool useDensity,
-             bool useDBSCAN,
-             bool useIterative,
-             bool doSplitting,
-             int iminT,      // min number of neighbours to be "core"
-             float ieps,     // max absolute distance to cluster
-             float ierrmax,  // max error to be "seed"
-             float ichi2max  // max normalized distance to cluster
-             )
-        : oneKernel_(oneKernel && !(useDBSCAN || useIterative)),
-          useDensity_(useDensity),
-          useDBSCAN_(useDBSCAN),
-          useIterative_(useIterative),
-          doSplitting_(doSplitting),
-          minT(iminT),
-          eps(ieps),
-          errmax(ierrmax),
-          chi2max(ichi2max) {}
-
-    ~Producer() = default;
-
-    ZVertexSoADevice makeAsync(cudaStream_t stream, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const;
-    ZVertexSoAHost make(const TkSoAConstView &tracks_view, float ptMin, float ptMax) const;
-
-  private:
-    const bool oneKernel_;
-    const bool useDensity_;
-    const bool useDBSCAN_;
-    const bool useIterative_;
-    const bool doSplitting_;
-
-    int minT;       // min number of neighbours to be "core"
-    float eps;      // max absolute distance to cluster
-    float errmax;   // max error to be "seed"
-    float chi2max;  // max normalized distance to cluster
-  };
-
-}  // namespace gpuVertexFinder
-
-#endif  // RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h
diff --git a/RecoVertex/PixelVertexFinding/test/BuildFile.xml b/RecoVertex/PixelVertexFinding/test/BuildFile.xml
index 57d83a69fabae..60f60dc1fd575 100644
--- a/RecoVertex/PixelVertexFinding/test/BuildFile.xml
+++ b/RecoVertex/PixelVertexFinding/test/BuildFile.xml
@@ -2,49 +2,12 @@
 <use name="root"/>
 <use name="FWCore/Framework"/>
 <use name="FWCore/ParameterSet"/>
-<use name="HeterogeneousCore/CUDAUtilities"/>
 <use name="MagneticField/Engine"/>
 <use name="MagneticField/Records"/>
 <use name="RecoVertex/KalmanVertexFit"/>
 <use name="SimDataFormats/Track"/>
 <use name="TrackingTools/TransientTrack"/>
 
-<iftool name="cuda-gcc-support">
-
-  <bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderOneKernel_t">
-    <use name="cuda"/>
-    <flags CUDA_FLAGS="-g -DGPU_DEBUG -DONE_KERNEL"/>
-    <flags CXXFLAGS="-g"/>
-  </bin>
-
-  <bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderByDensity_t">
-    <use name="cuda"/>
-    <flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
-    <flags CXXFLAGS="-g"/>
-  </bin>
-
-  <bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderDBSCAN_t">
-    <use name="cuda"/>
-    <flags CUDA_FLAGS="-g -DGPU_DEBUG -DUSE_DBSCAN"/>
-    <flags CXXFLAGS="-g"/>
-  </bin>
-
-  <bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderIterative_t">
-    <use name="cuda"/>
-    <flags CUDA_FLAGS="-g -DGPU_DEBUG -DUSE_ITERATIVE"/>
-    <flags CXXFLAGS="-g"/>
-  </bin>
-
-</iftool>
-
-<bin file="cpuVertexFinder_t.cpp" name="cpuVertexFinderByDensity_t">
-  <flags CXXFLAGS="-g  -DGPU_DEBUG"/>
-</bin>
-
-<bin file="cpuVertexFinder_t.cpp" name="cpuVertexFinderIterative_t">
-  <flags CXXFLAGS="-g -DGPU_DEBUG -DUSE_ITERATIVE"/>
-</bin>
-
 <bin file="alpaka/VertexFinder_t.cc alpaka/VertexFinder_t.dev.cc" name="deviceVertexFinderOneKernel_t">
   <use name="alpaka"/>
   <!-- work around for cms-sw/cmssw#45179 -->
diff --git a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h b/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h
deleted file mode 100644
index c2e165ddb1e98..0000000000000
--- a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h
+++ /dev/null
@@ -1,359 +0,0 @@
-#include <cmath>
-#include <cstdint>
-#include <iostream>
-#include <random>
-#include <vector>
-
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
-// PixelTrackUtilities only included in order to compile SoALayout with Eigen columns
-#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h"
-#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h"
-
-#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h"
-#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h"
-#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h"
-#ifdef USE_DBSCAN
-#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h"
-#define CLUSTERIZE gpuVertexFinder::clusterTracksDBSCAN
-#elif USE_ITERATIVE
-#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h"
-#define CLUSTERIZE gpuVertexFinder::clusterTracksIterative
-#else
-#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h"
-#define CLUSTERIZE gpuVertexFinder::clusterTracksByDensityKernel
-#endif
-#include "RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h"
-#include "RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h"
-#include "RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h"
-
-#ifdef ONE_KERNEL
-#ifdef __CUDACC__
-__global__ void vertexFinderOneKernel(gpuVertexFinder::VtxSoAView pdata,
-                                      gpuVertexFinder::WsSoAView pws,
-                                      int minT,      // min number of neighbours to be "seed"
-                                      float eps,     // max absolute distance to cluster
-                                      float errmax,  // max error to be "seed"
-                                      float chi2max  // max normalized distance to cluster,
-) {
-  gpuVertexFinder::clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max);
-  __syncthreads();
-  gpuVertexFinder::fitVertices(pdata, pws, 50.);
-  __syncthreads();
-  gpuVertexFinder::splitVertices(pdata, pws, 9.f);
-  __syncthreads();
-  gpuVertexFinder::fitVertices(pdata, pws, 5000.);
-  __syncthreads();
-  gpuVertexFinder::sortByPt2(pdata, pws);
-}
-#endif
-#endif
-
-struct Event {
-  std::vector<float> zvert;
-  std::vector<uint16_t> itrack;
-  std::vector<float> ztrack;
-  std::vector<float> eztrack;
-  std::vector<float> pttrack;
-  std::vector<uint16_t> ivert;
-};
-
-struct ClusterGenerator {
-  explicit ClusterGenerator(float nvert, float ntrack)
-      : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {}
-
-  void operator()(Event& ev) {
-    int nclus = clusGen(reng);
-    ev.zvert.resize(nclus);
-    ev.itrack.resize(nclus);
-    for (auto& z : ev.zvert) {
-      z = 3.5f * gauss(reng);
-    }
-
-    ev.ztrack.clear();
-    ev.eztrack.clear();
-    ev.ivert.clear();
-    ev.pttrack.clear();
-    for (int iv = 0; iv < nclus; ++iv) {
-      auto nt = trackGen(reng);
-      ev.itrack[iv] = nt;
-      for (int it = 0; it < nt; ++it) {
-        auto err = errgen(reng);  // reality is not flat....
-        ev.ztrack.push_back(ev.zvert[iv] + err * gauss(reng));
-        ev.eztrack.push_back(err * err);
-        ev.ivert.push_back(iv);
-        ev.pttrack.push_back((iv == 5 ? 1.f : 0.5f) + ptGen(reng));
-        ev.pttrack.back() *= ev.pttrack.back();
-      }
-    }
-    // add noise
-    auto nt = 2 * trackGen(reng);
-    for (int it = 0; it < nt; ++it) {
-      auto err = 0.03f;
-      ev.ztrack.push_back(rgen(reng));
-      ev.eztrack.push_back(err * err);
-      ev.ivert.push_back(9999);
-      ev.pttrack.push_back(0.5f + ptGen(reng));
-      ev.pttrack.back() *= ev.pttrack.back();
-    }
-  }
-
-  std::mt19937 reng;
-  std::uniform_real_distribution<float> rgen;
-  std::uniform_real_distribution<float> errgen;
-  std::poisson_distribution<int> clusGen;
-  std::poisson_distribution<int> trackGen;
-  std::normal_distribution<float> gauss;
-  std::exponential_distribution<float> ptGen;
-};
-
-__global__ void print(gpuVertexFinder::VtxSoAView pdata, gpuVertexFinder::WsSoAView pws) {
-  auto& __restrict__ ws = pws;
-  printf("nt,nv %d %d,%d\n", ws.ntrks(), pdata.nvFinal(), ws.nvIntermediate());
-}
-
-int main() {
-#ifdef __CUDACC__
-  cudaStream_t stream;
-  cms::cudatest::requireDevices();
-  cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
-
-  ZVertexSoADevice onGPU_d(stream);
-  gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(stream);
-#else
-
-  ZVertexSoAHost onGPU_d;
-  gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_d;
-#endif
-
-  Event ev;
-
-  float eps = 0.1f;
-  std::array<float, 3> par{{eps, 0.01f, 9.0f}};
-  for (int nav = 30; nav < 80; nav += 20) {
-    ClusterGenerator gen(nav, 10);
-
-    for (int i = 8; i < 20; ++i) {
-      auto kk = i / 4;  // M param
-
-      gen(ev);
-
-#ifdef __CUDACC__
-      gpuVertexFinder::init<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
-#else
-      gpuVertexFinder::init(onGPU_d.view(), ws_d.view());
-#endif
-
-      std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl;
-      auto nt = ev.ztrack.size();
-#ifdef __CUDACC__
-      cudaCheck(cudaMemcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t), cudaMemcpyHostToDevice));
-      cudaCheck(cudaMemcpy(
-          ws_d.view().zt().data(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size(), cudaMemcpyHostToDevice));
-      cudaCheck(cudaMemcpy(
-          ws_d.view().ezt2().data(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
-      cudaCheck(cudaMemcpy(
-          ws_d.view().ptt2().data(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice));
-#else
-      ::memcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t));
-      ::memcpy(ws_d.view().zt().data(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size());
-      ::memcpy(ws_d.view().ezt2().data(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size());
-      ::memcpy(ws_d.view().ptt2().data(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size());
-#endif
-
-      std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl;
-
-      if ((i % 4) == 0)
-        par = {{eps, 0.02f, 12.0f}};
-      if ((i % 4) == 1)
-        par = {{eps, 0.02f, 9.0f}};
-      if ((i % 4) == 2)
-        par = {{eps, 0.01f, 9.0f}};
-      if ((i % 4) == 3)
-        par = {{0.7f * eps, 0.01f, 9.0f}};
-
-      uint32_t nv = 0;
-#ifdef __CUDACC__
-      print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
-      cudaCheck(cudaGetLastError());
-      cudaDeviceSynchronize();
-
-#ifdef ONE_KERNEL
-      cms::cuda::launch(vertexFinderOneKernel, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
-#else
-      cms::cuda::launch(CLUSTERIZE, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
-#endif
-      print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view());
-
-      cudaCheck(cudaGetLastError());
-      cudaDeviceSynchronize();
-
-      cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
-      cudaCheck(cudaGetLastError());
-      cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
-
-#else
-      print(onGPU_d.view(), ws_d.view());
-      CLUSTERIZE(onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]);
-      print(onGPU_d.view(), ws_d.view());
-      gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
-      nv = onGPU_d.view().nvFinal();
-#endif
-
-      if (nv == 0) {
-        std::cout << "NO VERTICES???" << std::endl;
-        continue;
-      }
-
-      float* zv = nullptr;
-      float* wv = nullptr;
-      float* ptv2 = nullptr;
-      int32_t* nn = nullptr;
-      uint16_t* ind = nullptr;
-
-      // keep chi2 separated...
-      float chi2[2 * nv];  // make space for splitting...
-
-#ifdef __CUDACC__
-      float hzv[2 * nv];
-      float hwv[2 * nv];
-      float hptv2[2 * nv];
-      int32_t hnn[2 * nv];
-      uint16_t hind[2 * nv];
-
-      zv = hzv;
-      wv = hwv;
-      ptv2 = hptv2;
-      nn = hnn;
-      ind = hind;
-#else
-      zv = onGPU_d.view().zv().data();
-      wv = onGPU_d.view().wv().data();
-      ptv2 = onGPU_d.view().ptv2().data();
-      nn = onGPU_d.view().ndof().data();
-      ind = onGPU_d.view().sortInd().data();
-#endif
-
-#ifdef __CUDACC__
-      cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-#else
-      memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float));
-#endif
-
-      for (auto j = 0U; j < nv; ++j)
-        if (nn[j] > 0)
-          chi2[j] /= float(nn[j]);
-      {
-        auto mx = std::minmax_element(chi2, chi2 + nv);
-        std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
-      }
-
-#ifdef __CUDACC__
-      cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f);
-      cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-#else
-      gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f);
-      nv = onGPU_d.view().nvFinal();
-      memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float));
-#endif
-
-      for (auto j = 0U; j < nv; ++j)
-        if (nn[j] > 0)
-          chi2[j] /= float(nn[j]);
-      {
-        auto mx = std::minmax_element(chi2, chi2 + nv);
-        std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
-      }
-
-#ifdef __CUDACC__
-      // one vertex per block!!!
-      cms::cuda::launch(gpuVertexFinder::splitVerticesKernel, {1024, 64}, onGPU_d.view(), ws_d.view(), 9.f);
-      cudaCheck(cudaMemcpy(&nv, &ws_d.view().nvIntermediate(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
-#else
-      gpuVertexFinder::splitVertices(onGPU_d.view(), ws_d.view(), 9.f);
-      nv = ws_d.view().nvIntermediate();
-#endif
-      std::cout << "after split " << nv << std::endl;
-
-#ifdef __CUDACC__
-      cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 5000.f);
-      cudaCheck(cudaGetLastError());
-
-      cms::cuda::launch(gpuVertexFinder::sortByPt2Kernel, {1, 256}, onGPU_d.view(), ws_d.view());
-      cudaCheck(cudaGetLastError());
-      cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost));
-#else
-      gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 5000.f);
-      gpuVertexFinder::sortByPt2(onGPU_d.view(), ws_d.view());
-      nv = onGPU_d.view().nvFinal();
-      memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float));
-#endif
-
-      if (nv == 0) {
-        std::cout << "NO VERTICES???" << std::endl;
-        continue;
-      }
-
-#ifdef __CUDACC__
-      cudaCheck(cudaMemcpy(zv, onGPU_d.view().zv().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(wv, onGPU_d.view().wv().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(ptv2, onGPU_d.view().ptv2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost));
-      cudaCheck(cudaMemcpy(ind, onGPU_d.view().sortInd().data(), nv * sizeof(uint16_t), cudaMemcpyDeviceToHost));
-#endif
-      for (auto j = 0U; j < nv; ++j)
-        if (nn[j] > 0)
-          chi2[j] /= float(nn[j]);
-      {
-        auto mx = std::minmax_element(chi2, chi2 + nv);
-        std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
-      }
-
-      {
-        auto mx = std::minmax_element(wv, wv + nv);
-        std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl;
-      }
-
-      {
-        auto mx = std::minmax_element(ptv2, ptv2 + nv);
-        std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
-        std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv - 1]] << " at " << ind[0] << ' '
-                  << ind[nv - 1] << std::endl;
-      }
-
-      float dd[nv];
-      for (auto kv = 0U; kv < nv; ++kv) {
-        auto zr = zv[kv];
-        auto md = 500.0f;
-        for (auto zs : ev.ztrack) {
-          auto d = std::abs(zr - zs);
-          md = std::min(d, md);
-        }
-        dd[kv] = md;
-      }
-      if (i == 6) {
-        for (auto d : dd)
-          std::cout << d << ' ';
-        std::cout << std::endl;
-      }
-      auto mx = std::minmax_element(dd, dd + nv);
-      float rms = 0;
-      for (auto d : dd)
-        rms += d * d;
-      rms = std::sqrt(rms) / (nv - 1);
-      std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;
-
-    }  // loop on events
-  }  // lopp on ave vert
-
-  return 0;
-}
diff --git a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp b/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp
deleted file mode 100644
index a7906fe0d03f5..0000000000000
--- a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "VertexFinder_t.h"
diff --git a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu b/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu
deleted file mode 100644
index a7906fe0d03f5..0000000000000
--- a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "VertexFinder_t.h"