diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml deleted file mode 100644 index cf07e3b540f24..0000000000000 --- a/CUDADataFormats/Track/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/Track/README.md b/CUDADataFormats/Track/README.md deleted file mode 100644 index 8f66d9e4c4467..0000000000000 --- a/CUDADataFormats/Track/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# Track CUDA Data Formats - -`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for -storing information about `Track`s created during the Pixel-local Reconstruction -chain. It stores data in an SoA manner. It combines the data contained in the -deprecated `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` classes. - -The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`, -while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h` - -Both formats use the same SoA Layout (`TrackSoAHeterogeneousLayout`) which is generated -via the `GENERATE_SOA_LAYOUT` macro in the `PixelTrackUtilities.h` file. - -## Notes - --`hitIndices` and `detIndices`, instances of `HitContainer`, have been added into the -layout as `SOA_SCALAR`s, meaning that they manage their own data independently from the SoA -`Layout`. This could be improved in the future, if `HitContainer` (aka a `OneToManyAssoc` of fixed size) -is replaced, but there don't seem to be any conflicts in including it in the `Layout` like this. -- Host and Device classes should **not** be created via inheritance, as they're done here, -but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). - -## TrackSoAHeterogeneousHost - -The version of the data format to be used for storing `Track` data on the CPU. -Instances of this class are to be used for: - -- Having a place to copy data to host from device, via `cudaMemcpy`, or -- Running host-side algorithms using data stored in an SoA manner. - -## TrackSoAHeterogeneousDevice - -The version of the data format to be used for storing `Track` data on the GPU. - -Instances of `TrackSoAHeterogeneousDevice` are to be created on host and be -used on device only. To do so, the instance's `view()` method is to be called -to pass a `View` to any kernel launched. Accessing data from the `view()` is not -possible on the host side. - -## Utilities - -`PixelTrackUtilities.h` contains a collection of methods which were originally -defined as class methods inside either `TrackSoAHeterogeneousT` and `TrajectoryStateSoAT` -which have been adapted to operate on `View` instances, so that they are callable -from within `__global__` kernels, on both CPU and CPU. - -## Use case - -See `test/TrackSoAHeterogeneous_test.cpp` for a simple example of instantiation, -processing and copying from device to host. diff --git a/CUDADataFormats/Track/interface/PixelTrackUtilities.h b/CUDADataFormats/Track/interface/PixelTrackUtilities.h deleted file mode 100644 index 6d7ea258be8d2..0000000000000 --- a/CUDADataFormats/Track/interface/PixelTrackUtilities.h +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef CUDADataFormats_Track_PixelTrackUtilities_h -#define CUDADataFormats_Track_PixelTrackUtilities_h - -#include -#include -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -namespace pixelTrack { - - enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity, notQuality }; - constexpr uint32_t qualitySize{uint8_t(Quality::notQuality)}; - const std::string qualityName[qualitySize]{"bad", "edup", "dup", "loose", "strict", "tight", "highPurity"}; - inline Quality qualityByName(std::string const &name) { - auto qp = std::find(qualityName, qualityName + qualitySize, name) - qualityName; - return static_cast(qp); - } - -} // namespace pixelTrack - -template -struct TrackSoA { - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; - static constexpr int32_t H = TrackerTraits::avgHitsPerTrack; - // Aliases in order to not confuse the GENERATE_SOA_LAYOUT - // macro with weird colons and angled brackets. - using Vector5f = Eigen::Matrix; - using Vector15f = Eigen::Matrix; - using Quality = pixelTrack::Quality; - - using hindex_type = uint32_t; - - using HitContainer = cms::cuda::OneToManyAssoc; - - GENERATE_SOA_LAYOUT(TrackSoALayout, - SOA_COLUMN(Quality, quality), - SOA_COLUMN(float, chi2), - SOA_COLUMN(int8_t, nLayers), - SOA_COLUMN(float, eta), - SOA_COLUMN(float, pt), - SOA_EIGEN_COLUMN(Vector5f, state), - SOA_EIGEN_COLUMN(Vector15f, covariance), - SOA_SCALAR(int, nTracks), - SOA_SCALAR(HitContainer, hitIndices), - SOA_SCALAR(HitContainer, detIndices)) -}; - -// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods. - -template -struct TracksUtilities { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using hindex_type = typename TrackSoA::hindex_type; - - // State at the Beam spot - // phi,tip,1/pt,cotan(theta),zip - static __host__ __device__ inline float charge(const TrackSoAConstView &tracks, int32_t i) { - return std::copysign(1.f, tracks[i].state()(2)); - } - - static constexpr __host__ __device__ inline float phi(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(0); - } - - static constexpr __host__ __device__ inline float tip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(1); - } - - static constexpr __host__ __device__ inline float zip(const TrackSoAConstView &tracks, int32_t i) { - return tracks[i].state()(4); - } - - static constexpr __host__ __device__ inline bool isTriplet(const TrackSoAConstView &tracks, int i) { - return tracks[i].nLayers() == 3; - } - - template - static constexpr __host__ __device__ inline void copyFromCircle( - TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) { - tracks[i].state() << cp.template cast(), lp.template cast(); - - tracks[i].state()(2) = tracks[i].state()(2) * b; - auto cov = tracks[i].covariance(); - cov(0) = ccov(0, 0); - cov(1) = ccov(0, 1); - cov(2) = b * float(ccov(0, 2)); - cov(4) = cov(3) = 0; - cov(5) = ccov(1, 1); - cov(6) = b * float(ccov(1, 2)); - cov(8) = cov(7) = 0; - cov(9) = b * b * float(ccov(2, 2)); - cov(11) = cov(10) = 0; - cov(12) = lcov(0, 0); - cov(13) = lcov(0, 1); - cov(14) = lcov(1, 1); - } - - template - static constexpr __host__ __device__ inline void copyFromDense(TrackSoAView &tracks, - V5 const &v, - M5 const &cov, - int32_t i) { - tracks[i].state() = v.template cast(); - for (int j = 0, ind = 0; j < 5; ++j) - for (auto k = j; k < 5; ++k) - tracks[i].covariance()(ind++) = cov(j, k); - } - - template - static constexpr __host__ __device__ inline void copyToDense(const TrackSoAConstView &tracks, - V5 &v, - M5 &cov, - int32_t i) { - v = tracks[i].state().template cast(); - for (int j = 0, ind = 0; j < 5; ++j) { - cov(j, j) = tracks[i].covariance()(ind++); - for (auto k = j + 1; k < 5; ++k) - cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++); - } - } - - static constexpr __host__ __device__ inline int computeNumberOfLayers(const TrackSoAConstView &tracks, int32_t i) { - auto pdet = tracks.detIndices().begin(i); - int nl = 1; - auto ol = pixelTopology::getLayer(*pdet); - for (; pdet < tracks.detIndices().end(i); ++pdet) { - auto il = pixelTopology::getLayer(*pdet); - if (il != ol) - ++nl; - ol = il; - } - return nl; - } - - static constexpr __host__ __device__ inline int nHits(const TrackSoAConstView &tracks, int i) { - return tracks.detIndices().size(i); - } -}; - -namespace pixelTrack { - - template - struct QualityCutsT {}; - - template - struct QualityCutsT> { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using tracksHelper = TracksUtilities; - // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3]))) - float chi2Coeff[4]; - float chi2MaxPt; // GeV - float chi2Scale; - - struct Region { - float maxTip; // cm - float minPt; // GeV - float maxZip; // cm - }; - - Region triplet; - Region quadruplet; - - __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { - // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip) - // default cuts: - // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm - // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm - // (see CAHitNtupletGeneratorGPU.cc) - auto const ®ion = (nHits > 3) ? quadruplet : triplet; - return (std::abs(tracksHelper::tip(tracks, it)) < region.maxTip) and (tracks.pt(it) > region.minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < region.maxZip); - } - - __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const { - auto roughLog = [](float x) { - // max diff [0.5,12] at 1.25 0.16143 - // average diff 0.0662998 - union IF { - uint32_t i; - float f; - }; - IF z; - z.f = x; - uint32_t lsb = 1 < 21; - z.i += lsb; - z.i >>= 21; - auto f = z.i & 3; - int ex = int(z.i >> 2) - 127; - - // log2(1+0.25*f) - // averaged over bins - const float frac[4] = {0.160497f, 0.452172f, 0.694562f, 0.901964f}; - return float(ex) + frac[f]; - }; - - float pt = std::min(tracks.pt(it), chi2MaxPt); - float chi2Cut = chi2Scale * (chi2Coeff[0] + roughLog(pt) * chi2Coeff[1]); - if (tracks.chi2(it) >= chi2Cut) { -#ifdef NTUPLE_FIT_DEBUG - printf("Bad chi2 %d pt %f eta %f chi2 %f\n", it, tracks.pt(it), tracks.eta(it), tracks.chi2(it)); -#endif - return true; - } - return false; - } - }; - - template - struct QualityCutsT> { - using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; - using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - using tracksHelper = TracksUtilities; - - float maxChi2; - float minPt; - float maxTip; - float maxZip; - - __device__ __forceinline__ bool isHP(const TrackSoAConstView &tracks, int nHits, int it) const { - return (std::abs(tracksHelper::tip(tracks, it)) < maxTip) and (tracks.pt(it) > minPt) and - (std::abs(tracksHelper::zip(tracks, it)) < maxZip); - } - __device__ __forceinline__ bool strictCut(const TrackSoAConstView &tracks, int it) const { - return tracks.chi2(it) >= maxChi2; - } - }; - -} // namespace pixelTrack - -template -using TrackLayout = typename TrackSoA::template TrackSoALayout<>; -template -using TrackSoAView = typename TrackSoA::template TrackSoALayout<>::View; -template -using TrackSoAConstView = typename TrackSoA::template TrackSoALayout<>::ConstView; - -template struct TracksUtilities; -template struct TracksUtilities; - -#endif diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h deleted file mode 100644 index 04d286a767ab0..0000000000000 --- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef CUDADataFormats_Track_TrackHeterogeneousDevice_H -#define CUDADataFormats_Track_TrackHeterogeneousDevice_H - -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" - -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TrackSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection> { -public: - using cms::cuda::PortableDeviceCollection>::view; - using cms::cuda::PortableDeviceCollection>::const_view; - using cms::cuda::PortableDeviceCollection>::buffer; - using cms::cuda::PortableDeviceCollection>::bufferSize; - - TrackSoAHeterogeneousDevice() = default; // cms::cuda::Product needs this - - // Constructor which specifies the SoA size - explicit TrackSoAHeterogeneousDevice(cudaStream_t stream) - : cms::cuda::PortableDeviceCollection>(TrackerTraits::maxNumberOfTuples, stream) {} -}; - -namespace pixelTrack { - - using TrackSoADevicePhase1 = TrackSoAHeterogeneousDevice; - using TrackSoADevicePhase2 = TrackSoAHeterogeneousDevice; - using TrackSoADeviceHIonPhase1 = TrackSoAHeterogeneousDevice; - -} // namespace pixelTrack - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h deleted file mode 100644 index 39e83491e1769..0000000000000 --- a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef CUDADataFormats_Track_TrackHeterogeneousHost_H -#define CUDADataFormats_Track_TrackHeterogeneousHost_H - -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" - -// TODO: The class is created via inheritance of the PortableHostCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TrackSoAHeterogeneousHost : public cms::cuda::PortableHostCollection> { -public: - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime - explicit TrackSoAHeterogeneousHost() : cms::cuda::PortableHostCollection>(S) {} - - using cms::cuda::PortableHostCollection>::view; - using cms::cuda::PortableHostCollection>::const_view; - using cms::cuda::PortableHostCollection>::buffer; - using cms::cuda::PortableHostCollection>::bufferSize; - - // Constructor which specifies the SoA size - explicit TrackSoAHeterogeneousHost(cudaStream_t stream) - : cms::cuda::PortableHostCollection>(S, stream) {} -}; - -namespace pixelTrack { - - using TrackSoAHostPhase1 = TrackSoAHeterogeneousHost; - using TrackSoAHostPhase2 = TrackSoAHeterogeneousHost; - using TrackSoAHostHIonPhase1 = TrackSoAHeterogeneousHost; -} // namespace pixelTrack - -#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml deleted file mode 100644 index 32256c87ed577..0000000000000 --- a/CUDADataFormats/Track/test/BuildFile.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp deleted file mode 100644 index dafa75e2e18d7..0000000000000 --- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/** - Simple test for the pixelTrack::TrackSoA data structure - which inherits from PortableDeviceCollection. - - Creates an instance of the class (automatically allocates - memory on device), passes the view of the SoA data to - the CUDA kernels which: - - Fill the SoA with data. - - Verify that the data written is correct. - - Then, the SoA data are copied back to Host, where - a temporary host-side view (tmp_view) is created using - the same Layout to access the data on host and print it. - */ - -#include -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -namespace testTrackSoA { - - template - void runKernels(TrackSoAView &tracks_view, cudaStream_t stream); -} - -int main() { - cms::cudatest::requireDevices(); - - cudaStream_t stream; - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - - // Inner scope to deallocate memory before destroying the stream - { - // Instantiate tracks on device. PortableDeviceCollection allocates - // SoA on device automatically. - TrackSoAHeterogeneousDevice tracks_d(stream); - testTrackSoA::runKernels(tracks_d.view(), stream); - - // Instantate tracks on host. This is where the data will be - // copied to from device. - TrackSoAHeterogeneousHost tracks_h(stream); - - cudaCheck(cudaMemcpyAsync( - tracks_h.buffer().get(), tracks_d.const_buffer().get(), tracks_d.bufferSize(), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaStreamSynchronize(stream)); - - // Print results - std::cout << "pt" - << "\t" - << "eta" - << "\t" - << "chi2" - << "\t" - << "quality" - << "\t" - << "nLayers" - << "\t" - << "hitIndices off" << std::endl; - - for (int i = 0; i < 10; ++i) { - std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2() - << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t" - << tracks_h.view().hitIndices().off[i] << std::endl; - } - } - cudaCheck(cudaStreamDestroy(stream)); - - return 0; -} diff --git a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu b/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu deleted file mode 100644 index 8e8595eb43e94..0000000000000 --- a/CUDADataFormats/Track/test/TrackSoAHeterogeneous_test.cu +++ /dev/null @@ -1,63 +0,0 @@ -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "HeterogeneousCore/CUDAUtilities/interface/OneToManyAssoc.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -namespace testTrackSoA { - - // Kernel which fills the TrackSoAView with data - // to test writing to it - template - __global__ void fill(TrackSoAView tracks_view) { - int i = threadIdx.x; - if (i == 0) { - tracks_view.nTracks() = 420; - } - - for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) { - tracks_view[j].pt() = (float)j; - tracks_view[j].eta() = (float)j; - tracks_view[j].chi2() = (float)j; - tracks_view[j].quality() = (pixelTrack::Quality)(j % 256); - tracks_view[j].nLayers() = j % 128; - tracks_view.hitIndices().off[j] = j; - } - } - - // Kernel which reads from the TrackSoAView to verify - // that it was written correctly from the fill kernel - template - __global__ void verify(TrackSoAConstView tracks_view) { - int i = threadIdx.x; - - if (i == 0) { - printf("SoA size: % d, block dims: % d\n", tracks_view.metadata().size(), blockDim.x); - assert(tracks_view.nTracks() == 420); - } - for (int j = i; j < tracks_view.metadata().size(); j += blockDim.x) { - assert(abs(tracks_view[j].pt() - (float)j) < .0001); - assert(abs(tracks_view[j].eta() - (float)j) < .0001); - assert(abs(tracks_view[j].chi2() - (float)j) < .0001); - assert(tracks_view[j].quality() == (pixelTrack::Quality)(j % 256)); - assert(tracks_view[j].nLayers() == j % 128); - assert(tracks_view.hitIndices().off[j] == j); - } - } - - // Host function which invokes the two kernels above - template - void runKernels(TrackSoAView& tracks_view, cudaStream_t stream) { - fill<<<1, 1024, 0, stream>>>(tracks_view); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaDeviceSynchronize()); - - verify<<<1, 1024, 0, stream>>>(tracks_view); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaDeviceSynchronize()); - } - - template void runKernels(TrackSoAView& tracks_view, - cudaStream_t stream); - template void runKernels(TrackSoAView& tracks_view, - cudaStream_t stream); - -} // namespace testTrackSoA diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp deleted file mode 100644 index d6ff539a642b0..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu deleted file mode 100644 index d6ff539a642b0..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu +++ /dev/null @@ -1 +0,0 @@ -#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h deleted file mode 100644 index 6ba0eaa5c986e..0000000000000 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h +++ /dev/null @@ -1,85 +0,0 @@ -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" - -using Vector5d = Eigen::Matrix; -using Matrix5d = Eigen::Matrix; -using helper = TracksUtilities; - -__host__ __device__ Matrix5d loadCov(Vector5d const& e) { - Matrix5d cov; - for (int i = 0; i < 5; ++i) - cov(i, i) = e(i) * e(i); - for (int i = 0; i < 5; ++i) { - for (int j = 0; j < i; ++j) { - double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j)); // this makes the matrix pos defined - cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v; - cov(j, i) = cov(i, j); - } - } - return cov; -} - -template -__global__ void testTSSoA(TrackSoAView ts) { - Vector5d par0; - par0 << 0.2, 0.1, 3.5, 0.8, 0.1; - Vector5d e0; - e0 << 0.01, 0.01, 0.035, -0.03, -0.01; - auto cov0 = loadCov(e0); - - int first = threadIdx.x + blockIdx.x * blockDim.x; - - for (int i = first; i < ts.metadata().size(); i += blockDim.x * gridDim.x) { - helper::copyFromDense(ts, par0, cov0, i); - Vector5d par1; - Matrix5d cov1; - helper::copyToDense(ts, par1, cov1, i); - Vector5d delV = par1 - par0; - Matrix5d delM = cov1 - cov0; - for (int j = 0; j < 5; ++j) { - assert(std::abs(delV(j)) < 1.e-5); - for (auto k = j; k < 5; ++k) { - assert(cov0(k, j) == cov0(j, k)); - assert(cov1(k, j) == cov1(j, k)); - assert(std::abs(delM(k, j)) < 1.e-5); - } - } - } -} - -#ifdef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#endif - -int main() { -#ifdef __CUDACC__ - cms::cudatest::requireDevices(); - cudaStream_t stream; - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); -#endif - -#ifdef __CUDACC__ - // Since we are going to copy data from ts_d to ts_h, we - // need to initialize the Host collection with a stream. - TrackSoAHeterogeneousHost ts_h(stream); - TrackSoAHeterogeneousDevice ts_d(stream); -#else - // If CUDA is not available, Host collection must not be initialized - // with a stream. - TrackSoAHeterogeneousHost ts_h; -#endif - -#ifdef __CUDACC__ - testTSSoA<<<1, 64, 0, stream>>>(ts_d.view()); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpyAsync( - ts_h.buffer().get(), ts_d.const_buffer().get(), ts_d.bufferSize(), cudaMemcpyDeviceToHost, stream)); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaStreamSynchronize(stream)); -#else - testTSSoA(ts_h.view()); -#endif -} diff --git a/CUDADataFormats/Vertex/BuildFile.xml b/CUDADataFormats/Vertex/BuildFile.xml deleted file mode 100644 index c6b918ec4b12b..0000000000000 --- a/CUDADataFormats/Vertex/BuildFile.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/CUDADataFormats/Vertex/README.md b/CUDADataFormats/Vertex/README.md deleted file mode 100644 index 3e495d15f776e..0000000000000 --- a/CUDADataFormats/Vertex/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Vertex CUDA Data Formats - -`CUDADataFormat`s meant to be used on Host (CPU) or Device (CUDA GPU) for -storing information about vertices created during the Pixel-local Reconstruction -chain. It stores data in an SoA manner. It contains the data that was previously -contained in the deprecated `ZVertexSoA` class. - -The host format is inheriting from `CUDADataFormats/Common/interface/PortableHostCollection.h`, -while the device format is inheriting from `CUDADataFormats/Common/interface/PortableDeviceCollection.h` - -Both formats use the same SoA Layout (`ZVertexSoAHeterogeneousLayout`) which is generated -via the `GENERATE_SOA_LAYOUT` macro in the `ZVertexUtilities.h` file. - -## Notes - -- Initially, `ZVertexSoA` had distinct array sizes for each attribute (e.g. `zv` was `MAXVTX` elements -long, `ndof` was `MAXTRACKS` elements long). All columns are now of uniform `MAXTRACKS` size, -meaning that there will be some wasted space (appx. 190kB). -- Host and Device classes should **not** be created via inheritance, as they're done here, -but via composition. See [this discussion](https://github.com/cms-sw/cmssw/pull/40465#discussion_r1066039309). - -## ZVertexHeterogeneousHost - -The version of the data format to be used for storing vertex data on the CPU. -Instances of this class are to be used for: - -- Having a place to copy data to host from device, via `cudaMemcpy`, or -- Running host-side algorithms using data stored in an SoA manner. - -## ZVertexHeterogeneousDevice - -The version of the data format to be used for storing vertex data on the GPU. - -Instances of `ZVertexHeterogeneousDevice` are to be created on host and be -used on device only. To do so, the instance's `view()` method is to be called -to pass a `View` to any kernel launched. Accessing data from the `view()` is not -possible on the host side. - -## Utilities - -Apart from `ZVertexSoAHeterogeneousLayout`, `ZVertexUtilities.h` also contains -a collection of methods which were originally -defined as class methods inside the `ZVertexSoA` class -which have been adapted to operate on `View` instances, so that they are callable -from within `__global__` kernels, on both CPU and CPU. diff --git a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h b/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h deleted file mode 100644 index 417a960951fb1..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexHeterogeneous.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef CUDADataFormatsVertexZVertexHeterogeneous_H -#define CUDADataFormatsVertexZVertexHeterogeneous_H - -#include "CUDADataFormats/Vertex/interface/ZVertexSoA.h" -#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" - -using ZVertexHeterogeneous = HeterogeneousSoA; -#ifndef __CUDACC__ -#include "CUDADataFormats/Common/interface/Product.h" -using ZVertexCUDAProduct = cms::cuda::Product; -#endif - -#endif diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoA.h b/CUDADataFormats/Vertex/interface/ZVertexSoA.h deleted file mode 100644 index 95106050f3d7a..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoA.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexSoA_h -#define CUDADataFormats_Vertex_ZVertexSoA_h - -#include -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" - -// SOA for vertices -// These vertices are clusterized and fitted only along the beam line (z) -// to obtain their global coordinate the beam spot position shall be added (eventually correcting for the beam angle as well) -struct ZVertexSoA { - static constexpr uint32_t MAXTRACKS = 128 * 1024; - static constexpr uint32_t MAXVTX = 1024; - - int16_t idv[MAXTRACKS]; // vertex index for each associated (original) track (-1 == not associate) - float zv[MAXVTX]; // output z-posistion of found vertices - float wv[MAXVTX]; // output weight (1/error^2) on the above - float chi2[MAXVTX]; // vertices chi2 - float ptv2[MAXVTX]; // vertices pt^2 - int32_t ndof[MAXTRACKS]; // vertices number of dof (reused as workspace for the number of nearest neighbours FIXME) - uint16_t sortInd[MAXVTX]; // sorted index (by pt2) ascending - uint32_t nvFinal; // the number of vertices - - __host__ __device__ void init() { nvFinal = 0; } -}; - -#endif // CUDADataFormats_Vertex_ZVertexSoA_h diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h deleted file mode 100644 index ae662d7fd5f9a..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H -#define CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H - -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" - -// TODO: The class is created via inheritance of the PortableDeviceCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class ZVertexSoAHeterogeneousDevice : public cms::cuda::PortableDeviceCollection> { -public: - ZVertexSoAHeterogeneousDevice() = default; // cms::cuda::Product needs this - - // Constructor which specifies the SoA size - explicit ZVertexSoAHeterogeneousDevice(cudaStream_t stream) - : PortableDeviceCollection>(S, stream) {} -}; - -using ZVertexSoADevice = ZVertexSoAHeterogeneousDevice; - -#endif // CUDADataFormats_Vertex_ZVertexHeterogeneousDevice_H diff --git a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h b/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h deleted file mode 100644 index 6b62d615e1d11..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H -#define CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H - -#include - -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" - -// TODO: The class is created via inheritance of the PortableHostCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class ZVertexSoAHeterogeneousHost : public cms::cuda::PortableHostCollection> { -public: - explicit ZVertexSoAHeterogeneousHost() : cms::cuda::PortableHostCollection>(S) {} - - // Constructor which specifies the SoA size and CUDA stream - explicit ZVertexSoAHeterogeneousHost(cudaStream_t stream) - : PortableHostCollection>(S, stream) {} -}; - -using ZVertexSoAHost = ZVertexSoAHeterogeneousHost; - -#endif // CUDADataFormats_Vertex_ZVertexHeterogeneousHost_H diff --git a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h b/CUDADataFormats/Vertex/interface/ZVertexUtilities.h deleted file mode 100644 index 2403652377971..0000000000000 --- a/CUDADataFormats/Vertex/interface/ZVertexUtilities.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef CUDADataFormats_Vertex_ZVertexUtilities_h -#define CUDADataFormats_Vertex_ZVertexUtilities_h - -#include -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -GENERATE_SOA_LAYOUT(ZVertexSoAHeterogeneousLayout, - SOA_COLUMN(int16_t, idv), - SOA_COLUMN(float, zv), - SOA_COLUMN(float, wv), - SOA_COLUMN(float, chi2), - SOA_COLUMN(float, ptv2), - SOA_COLUMN(int32_t, ndof), - SOA_COLUMN(uint16_t, sortInd), - SOA_SCALAR(uint32_t, nvFinal)) - -// Previous ZVertexSoA class methods. -// They operate on View and ConstView of the ZVertexSoA. -namespace zVertex { - // Common types for both Host and Device code - using ZVertexSoALayout = ZVertexSoAHeterogeneousLayout<>; - using ZVertexSoAView = ZVertexSoAHeterogeneousLayout<>::View; - using ZVertexSoAConstView = ZVertexSoAHeterogeneousLayout<>::ConstView; - - namespace utilities { - - static constexpr uint32_t MAXTRACKS = 128 * 1024; - static constexpr uint32_t MAXVTX = 1024; - - __host__ __device__ inline void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; } - - } // namespace utilities -} // namespace zVertex - -#endif diff --git a/RecoTauTag/HLTProducers/BuildFile.xml b/RecoTauTag/HLTProducers/BuildFile.xml index 79cdf947dd105..865bdb508e891 100644 --- a/RecoTauTag/HLTProducers/BuildFile.xml +++ b/RecoTauTag/HLTProducers/BuildFile.xml @@ -1,20 +1,18 @@ - - - + - - - + + + - - - + + + + + + - - - - + diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc deleted file mode 100644 index 051766a05606b..0000000000000 --- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc +++ /dev/null @@ -1,822 +0,0 @@ -/* - * \class L2TauTagProducer - * - * L2Tau identification using Convolutional NN. - * - * \author Valeria D'Amante, Università di Siena and INFN Pisa - * Konstantin Androsov, EPFL and ETHZ -*/ -#include -#include -#include - -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/Frameworkfwd.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "DataFormats/Math/interface/deltaR.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/isFinite.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" -#include "Geometry/CaloGeometry/interface/CaloCellGeometry.h" -#include "Geometry/CaloGeometry/interface/CaloGeometry.h" -#include "Geometry/CaloTopology/interface/HcalTopology.h" -#include "Geometry/Records/interface/CaloGeometryRecord.h" -#include "DataFormats/CaloRecHit/interface/CaloRecHit.h" -#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" -#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" -#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h" -#include "DataFormats/HcalDetId/interface/HcalDetId.h" -#include "DataFormats/HcalRecHit/interface/HBHERecHit.h" -#include "DataFormats/HcalRecHit/interface/HcalRecHitDefs.h" -#include "DataFormats/HcalRecHit/interface/HFRecHit.h" -#include "DataFormats/HcalRecHit/interface/HORecHit.h" -#include "DataFormats/HLTReco/interface/TriggerTypeDefs.h" -#include "DataFormats/HLTReco/interface/TriggerFilterObjectWithRefs.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" -#include "DataFormats/TrackReco/interface/HitPattern.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" - -namespace L2TauTagNNv1 { - constexpr int nCellEta = 5; - constexpr int nCellPhi = 5; - constexpr int nVars = 31; - constexpr float dR_max = 0.5; - enum class NNInputs { - nVertices = 0, - l1Tau_pt, - l1Tau_eta, - l1Tau_hwIso, - EcalEnergySum, - EcalSize, - EcalEnergyStdDev, - EcalDeltaEta, - EcalDeltaPhi, - EcalChi2, - EcalEnergySumForPositiveChi2, - EcalSizeForPositiveChi2, - HcalEnergySum, - HcalSize, - HcalEnergyStdDev, - HcalDeltaEta, - HcalDeltaPhi, - HcalChi2, - HcalEnergySumForPositiveChi2, - HcalSizeForPositiveChi2, - PatatrackPtSum, - PatatrackSize, - PatatrackSizeWithVertex, - PatatrackPtSumWithVertex, - PatatrackChargeSum, - PatatrackDeltaEta, - PatatrackDeltaPhi, - PatatrackChi2OverNdof, - PatatrackNdof, - PatatrackDxy, - PatatrackDz - }; - - const std::map varNameMap = { - {NNInputs::nVertices, "nVertices"}, - {NNInputs::l1Tau_pt, "l1Tau_pt"}, - {NNInputs::l1Tau_eta, "l1Tau_eta"}, - {NNInputs::l1Tau_hwIso, "l1Tau_hwIso"}, - {NNInputs::EcalEnergySum, "EcalEnergySum"}, - {NNInputs::EcalSize, "EcalSize"}, - {NNInputs::EcalEnergyStdDev, "EcalEnergyStdDev"}, - {NNInputs::EcalDeltaEta, "EcalDeltaEta"}, - {NNInputs::EcalDeltaPhi, "EcalDeltaPhi"}, - {NNInputs::EcalChi2, "EcalChi2"}, - {NNInputs::EcalEnergySumForPositiveChi2, "EcalEnergySumForPositiveChi2"}, - {NNInputs::EcalSizeForPositiveChi2, "EcalSizeForPositiveChi2"}, - {NNInputs::HcalEnergySum, "HcalEnergySum"}, - {NNInputs::HcalSize, "HcalSize"}, - {NNInputs::HcalEnergyStdDev, "HcalEnergyStdDev"}, - {NNInputs::HcalDeltaEta, "HcalDeltaEta"}, - {NNInputs::HcalDeltaPhi, "HcalDeltaPhi"}, - {NNInputs::HcalChi2, "HcalChi2"}, - {NNInputs::HcalEnergySumForPositiveChi2, "HcalEnergySumForPositiveChi2"}, - {NNInputs::HcalSizeForPositiveChi2, "HcalSizeForPositiveChi2"}, - {NNInputs::PatatrackPtSum, "PatatrackPtSum"}, - {NNInputs::PatatrackSize, "PatatrackSize"}, - {NNInputs::PatatrackSizeWithVertex, "PatatrackSizeWithVertex"}, - {NNInputs::PatatrackPtSumWithVertex, "PatatrackPtSumWithVertex"}, - {NNInputs::PatatrackChargeSum, "PatatrackChargeSum"}, - {NNInputs::PatatrackDeltaEta, "PatatrackDeltaEta"}, - {NNInputs::PatatrackDeltaPhi, "PatatrackDeltaPhi"}, - {NNInputs::PatatrackChi2OverNdof, "PatatrackChi2OverNdof"}, - {NNInputs::PatatrackNdof, "PatatrackNdof"}, - {NNInputs::PatatrackDxy, "PatatrackDxy"}, - {NNInputs::PatatrackDz, "PatatrackDz"}}; -} // namespace L2TauTagNNv1 -namespace { - inline float& getCellImpl( - tensorflow::Tensor& cellGridMatrix, int tau_idx, int phi_idx, int eta_idx, L2TauTagNNv1::NNInputs NNInput_idx) { - return cellGridMatrix.tensor()(tau_idx, phi_idx, eta_idx, static_cast(NNInput_idx)); - } -} // namespace -struct normDictElement { - float mean; - float std; - float min; - float max; -}; - -struct L2TauNNProducerCacheData { - L2TauNNProducerCacheData() : graphDef(nullptr), session(nullptr) {} - tensorflow::GraphDef* graphDef; - tensorflow::Session* session; - std::vector normVec; -}; - -class L2TauNNProducer : public edm::stream::EDProducer> { -public: - using TrackSoAHost = pixelTrack::TrackSoAHostPhase1; - - struct caloRecHitCollections { - const HBHERecHitCollection* hbhe; - const HORecHitCollection* ho; - const EcalRecHitCollection* eb; - const EcalRecHitCollection* ee; - const CaloGeometry* geometry; - }; - - struct InputDescTau { - std::string CollectionName; - edm::EDGetTokenT inputToken_; - }; - - static constexpr float dR2_max = L2TauTagNNv1::dR_max * L2TauTagNNv1::dR_max; - static constexpr float dEta_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellEta); - static constexpr float dPhi_width = 2 * L2TauTagNNv1::dR_max / static_cast(L2TauTagNNv1::nCellPhi); - - explicit L2TauNNProducer(const edm::ParameterSet&, const L2TauNNProducerCacheData*); - static void fillDescriptions(edm::ConfigurationDescriptions&); - static std::unique_ptr initializeGlobalCache(const edm::ParameterSet&); - static void globalEndJob(L2TauNNProducerCacheData*); - -private: - void checknan(tensorflow::Tensor& tensor, int debugLevel); - void standardizeTensor(tensorflow::Tensor& tensor); - std::vector getTauScore(const tensorflow::Tensor& cellGridMatrix); - void produce(edm::Event& event, const edm::EventSetup& eventsetup) override; - void fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus); - void fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const caloRecHitCollections& caloRecHits); - void fillPatatracks(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const TrackSoAHost& patatracks_tsoa, - const ZVertexSoAHost& patavtx_soa, - const reco::BeamSpot& beamspot, - const MagneticField* magfi); - void selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa, - const TrackSoAHost& patatracks_tsoa, - std::vector& trkGood, - std::vector& vtxGood); - - std::pair impactParameter(int it, - const TrackSoAHost& patatracks_tsoa, - float patatrackPhi, - const reco::BeamSpot& beamspot, - const MagneticField* magfi); - template - std::tuple getEtaPhiIndices(const VPos& position, const LVec& tau_p4); - template - std::tuple getEtaPhiIndices(float eta, float phi, const LVec& tau_p4); - -private: - const int debugLevel_; - const edm::EDGetTokenT tauTriggerToken_; - std::vector L1TauDesc_; - const edm::EDGetTokenT hbheToken_; - const edm::EDGetTokenT hoToken_; - const edm::EDGetTokenT ebToken_; - const edm::EDGetTokenT eeToken_; - const edm::ESGetToken geometryToken_; - const edm::ESGetToken bFieldToken_; - const edm::EDGetTokenT pataVerticesToken_; - const edm::EDGetTokenT pataTracksToken_; - const edm::EDGetTokenT beamSpotToken_; - const unsigned int maxVtx_; - const float fractionSumPt2_; - const float minSumPt2_; - const float trackPtMin_; - const float trackPtMax_; - const float trackChi2Max_; - std::string inputTensorName_; - std::string outputTensorName_; - const L2TauNNProducerCacheData* L2cacheData_; -}; - -std::unique_ptr L2TauNNProducer::initializeGlobalCache(const edm::ParameterSet& cfg) { - std::unique_ptr cacheData = std::make_unique(); - cacheData->normVec.reserve(L2TauTagNNv1::nVars); - - auto const graphPath = edm::FileInPath(cfg.getParameter("graphPath")).fullPath(); - - cacheData->graphDef = tensorflow::loadGraphDef(graphPath); - cacheData->session = tensorflow::createSession(cacheData->graphDef); - - boost::property_tree::ptree loadPtreeRoot; - auto const normalizationDict = edm::FileInPath(cfg.getParameter("normalizationDict")).fullPath(); - boost::property_tree::read_json(normalizationDict, loadPtreeRoot); - for (const auto& [key, val] : L2TauTagNNv1::varNameMap) { - boost::property_tree::ptree var = loadPtreeRoot.get_child(val); - normDictElement current_element; - current_element.mean = var.get_child("mean").get_value(); - current_element.std = var.get_child("std").get_value(); - current_element.min = var.get_child("min").get_value(); - current_element.max = var.get_child("max").get_value(); - cacheData->normVec.push_back(current_element); - } - return cacheData; -} -void L2TauNNProducer::globalEndJob(L2TauNNProducerCacheData* cacheData) { - if (cacheData->graphDef != nullptr) { - delete cacheData->graphDef; - } - tensorflow::closeSession(cacheData->session); -} -void L2TauNNProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("debugLevel", 0)->setComment("set debug level for printing out info"); - edm::ParameterSetDescription l1TausPset; - l1TausPset.add("L1CollectionName", "DoubleTau")->setComment("Name of collections"); - l1TausPset.add("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")) - ->setComment("Which trigger should the L1 Taus collection pass"); - edm::ParameterSet l1TausPSetDefault; - l1TausPSetDefault.addParameter("L1CollectionName", "DoubleTau"); - l1TausPSetDefault.addParameter("L1TauTrigger", edm::InputTag("hltL1sDoubleTauBigOR")); - desc.addVPSet("L1Taus", l1TausPset, {l1TausPSetDefault}); - desc.add("hbheInput", edm::InputTag("hltHbhereco"))->setComment("HBHE recHit collection"); - desc.add("hoInput", edm::InputTag("hltHoreco"))->setComment("HO recHit Collection"); - desc.add("ebInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEB"))->setComment("EB recHit Collection"); - desc.add("eeInput", edm::InputTag("hltEcalRecHit:EcalRecHitsEE"))->setComment("EE recHit Collection"); - desc.add("pataVertices", edm::InputTag("hltPixelVerticesSoA")) - ->setComment("patatrack vertices collection"); - desc.add("pataTracks", edm::InputTag("hltPixelTracksSoA"))->setComment("patatrack collection"); - desc.add("BeamSpot", edm::InputTag("hltOnlineBeamSpot"))->setComment("BeamSpot Collection"); - desc.add("maxVtx", 100)->setComment("max output collection size (number of accepted vertices)"); - desc.add("fractionSumPt2", 0.3)->setComment("threshold on sumPt2 fraction of the leading vertex"); - desc.add("minSumPt2", 0.)->setComment("min sumPt2"); - desc.add("track_pt_min", 1.0)->setComment("min track p_T"); - desc.add("track_pt_max", 10.0)->setComment("max track p_T"); - desc.add("track_chi2_max", 99999.)->setComment("max track chi2"); - desc.add("graphPath", "RecoTauTag/TrainingFiles/data/L2TauNNTag/L2TauTag_Run3v1.pb") - ->setComment("path to the saved CNN"); - desc.add("normalizationDict", "RecoTauTag/TrainingFiles/data/L2TauNNTag/NormalizationDict.json") - ->setComment("path to the dictionary for variable standardization"); - descriptions.addWithDefaultLabel(desc); -} - -L2TauNNProducer::L2TauNNProducer(const edm::ParameterSet& cfg, const L2TauNNProducerCacheData* cacheData) - : debugLevel_(cfg.getParameter("debugLevel")), - hbheToken_(consumes(cfg.getParameter("hbheInput"))), - hoToken_(consumes(cfg.getParameter("hoInput"))), - ebToken_(consumes(cfg.getParameter("ebInput"))), - eeToken_(consumes(cfg.getParameter("eeInput"))), - geometryToken_(esConsumes()), - bFieldToken_(esConsumes()), - pataVerticesToken_(consumes(cfg.getParameter("pataVertices"))), - pataTracksToken_(consumes(cfg.getParameter("pataTracks"))), - beamSpotToken_(consumes(cfg.getParameter("BeamSpot"))), - maxVtx_(cfg.getParameter("maxVtx")), - fractionSumPt2_(cfg.getParameter("fractionSumPt2")), - minSumPt2_(cfg.getParameter("minSumPt2")), - trackPtMin_(cfg.getParameter("track_pt_min")), - trackPtMax_(cfg.getParameter("track_pt_max")), - trackChi2Max_(cfg.getParameter("track_chi2_max")) { - if (cacheData->graphDef == nullptr) { - throw cms::Exception("InvalidCacheData") << "Invalid Cache Data."; - } - inputTensorName_ = cacheData->graphDef->node(0).name(); - outputTensorName_ = cacheData->graphDef->node(cacheData->graphDef->node_size() - 1).name(); - L2cacheData_ = cacheData; - std::vector L1TauCollections = cfg.getParameter>("L1Taus"); - L1TauDesc_.reserve(L1TauCollections.size()); - for (const auto& l1TauInput : L1TauCollections) { - InputDescTau toInsert; - toInsert.CollectionName = l1TauInput.getParameter("L1CollectionName"); - toInsert.inputToken_ = - consumes(l1TauInput.getParameter("L1TauTrigger")); - L1TauDesc_.push_back(toInsert); - } - for (const auto& desc : L1TauDesc_) - produces>(desc.CollectionName); -} - -void L2TauNNProducer::checknan(tensorflow::Tensor& tensor, int debugLevel) { - using NNInputs = L2TauTagNNv1::NNInputs; - std::vector tensor_shape(tensor.shape().dims()); - for (int d = 0; d < tensor.shape().dims(); d++) { - tensor_shape.at(d) = tensor.shape().dim_size(d); - } - if (tensor_shape.size() != 4) { - throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; - } - for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { - for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { - for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { - for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); - }; - auto nonstd_var = getCell(static_cast(var_idx)); - if (edm::isNotFinite(nonstd_var)) { - edm::LogWarning("InputVar") << "var is nan \nvar name= " - << L2TauTagNNv1::varNameMap.at(static_cast(var_idx)) - << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx - << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx; - if (debugLevel > 2) { - edm::LogWarning("InputVar") << "other vars in same cell \n"; - if (var_idx + 1 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 1)) - << "\t = " << getCell(static_cast(var_idx + 1)); - if (var_idx + 2 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 2)) - << "\t = " << getCell(static_cast(var_idx + 2)); - if (var_idx + 3 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 3)) - << "\t = " << getCell(static_cast(var_idx + 3)); - if (var_idx + 4 < tensor_shape.at(3)) - edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast(var_idx + 4)) - << "\t = " << getCell(static_cast(var_idx + 4)); - } - } - } - } - } - } -} - -void L2TauNNProducer::standardizeTensor(tensorflow::Tensor& tensor) { - using NNInputs = L2TauTagNNv1::NNInputs; - std::vector tensor_shape(tensor.shape().dims()); - for (int d = 0; d < tensor.shape().dims(); d++) { - tensor_shape.at(d) = tensor.shape().dim_size(d); - } - if (tensor_shape.size() != 4) { - throw cms::Exception("InvalidTensor") << "Tensor shape does not have 4 dimensions!"; - } - for (int tau_idx = 0; tau_idx < tensor_shape.at(0); tau_idx++) { - for (int phi_idx = 0; phi_idx < tensor_shape.at(1); phi_idx++) { - for (int eta_idx = 0; eta_idx < tensor_shape.at(2); eta_idx++) { - for (int var_idx = 0; var_idx < tensor_shape.at(3); var_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(tensor, tau_idx, phi_idx, eta_idx, input); - }; - float mean = L2cacheData_->normVec.at(var_idx).mean; - float std = L2cacheData_->normVec.at(var_idx).std; - float min = L2cacheData_->normVec.at(var_idx).min; - float max = L2cacheData_->normVec.at(var_idx).max; - float nonstd_var = getCell(static_cast(var_idx)); - float std_var = static_cast((nonstd_var - mean) / std); - if (std_var > max) { - std_var = static_cast(max); - } else if (std_var < min) { - std_var = static_cast(min); - } - getCell(static_cast(var_idx)) = std_var; - } - } - } - } -} - -void L2TauNNProducer::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector& allTaus) { - using NNInputs = L2TauTagNNv1::NNInputs; - - const int nTaus = allTaus.size(); - for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) { - for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - getCell(NNInputs::l1Tau_pt) = allTaus[tau_idx]->pt(); - getCell(NNInputs::l1Tau_eta) = allTaus[tau_idx]->eta(); - getCell(NNInputs::l1Tau_hwIso) = allTaus[tau_idx]->hwIso(); - } - } - } -} - -template -std::tuple L2TauNNProducer::getEtaPhiIndices(float eta, float phi, const LVec& tau_p4) { - const float deta = eta - tau_p4.eta(); - const float dphi = reco::deltaPhi(phi, tau_p4.phi()); - const int eta_idx = static_cast(floor((deta + L2TauTagNNv1::dR_max) / dEta_width)); - const int phi_idx = static_cast(floor((dphi + L2TauTagNNv1::dR_max) / dPhi_width)); - return std::make_tuple(deta, dphi, eta_idx, phi_idx); -} - -template -std::tuple L2TauNNProducer::getEtaPhiIndices(const VPos& position, const LVec& tau_p4) { - return getEtaPhiIndices(position.eta(), position.phi(), tau_p4); -} - -void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const caloRecHitCollections& caloRecHits) { - using NNInputs = L2TauTagNNv1::NNInputs; - - const int nTaus = allTaus.size(); - float deta, dphi; - int eta_idx = 0; - int phi_idx = 0; - int tau_idx = 0; - - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { - // calorechit_EE - for (const auto& caloRecHit_ee : *caloRecHits.ee) { - if (caloRecHit_ee.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ee.id())->getPosition(); - const float eeCalEn = caloRecHit_ee.energy(); - const float eeCalChi2 = caloRecHit_ee.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::EcalEnergySum) += eeCalEn; - getCell(NNInputs::EcalSize) += 1.; - getCell(NNInputs::EcalEnergyStdDev) += eeCalEn * eeCalEn; - getCell(NNInputs::EcalDeltaEta) += deta * eeCalEn; - getCell(NNInputs::EcalDeltaPhi) += dphi * eeCalEn; - if (eeCalChi2 >= 0) { - getCell(NNInputs::EcalChi2) += eeCalChi2 * eeCalEn; - getCell(NNInputs::EcalEnergySumForPositiveChi2) += eeCalEn; - getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_EB - for (const auto& caloRecHit_eb : *caloRecHits.eb) { - if (caloRecHit_eb.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_eb.id())->getPosition(); - const float ebCalEn = caloRecHit_eb.energy(); - const float ebCalChi2 = caloRecHit_eb.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::EcalEnergySum) += ebCalEn; - getCell(NNInputs::EcalSize) += 1.; - getCell(NNInputs::EcalEnergyStdDev) += ebCalEn * ebCalEn; - getCell(NNInputs::EcalDeltaEta) += deta * ebCalEn; - getCell(NNInputs::EcalDeltaPhi) += dphi * ebCalEn; - if (ebCalChi2 >= 0) { - getCell(NNInputs::EcalChi2) += ebCalChi2 * ebCalEn; - getCell(NNInputs::EcalEnergySumForPositiveChi2) += ebCalEn; - getCell(NNInputs::EcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_HBHE - for (const auto& caloRecHit_hbhe : *caloRecHits.hbhe) { - if (caloRecHit_hbhe.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_hbhe.id())->getPosition(); - const float hbheCalEn = caloRecHit_hbhe.energy(); - const float hbheCalChi2 = caloRecHit_hbhe.chi2(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::HcalEnergySum) += hbheCalEn; - getCell(NNInputs::HcalEnergyStdDev) += hbheCalEn * hbheCalEn; - getCell(NNInputs::HcalSize) += 1.; - getCell(NNInputs::HcalDeltaEta) += deta * hbheCalEn; - getCell(NNInputs::HcalDeltaPhi) += dphi * hbheCalEn; - if (hbheCalChi2 >= 0) { - getCell(NNInputs::HcalChi2) += hbheCalChi2 * hbheCalEn; - getCell(NNInputs::HcalEnergySumForPositiveChi2) += hbheCalEn; - getCell(NNInputs::HcalSizeForPositiveChi2) += 1.; - } - } - } - - // calorechit_HO - for (const auto& caloRecHit_ho : *caloRecHits.ho) { - if (caloRecHit_ho.energy() <= 0) - continue; - const auto& position = caloRecHits.geometry->getGeometry(caloRecHit_ho.id())->getPosition(); - const float hoCalEn = caloRecHit_ho.energy(); - if (reco::deltaR2(position, allTaus[tau_idx]->polarP4()) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(position, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::HcalEnergySum) += hoCalEn; - getCell(NNInputs::HcalEnergyStdDev) += hoCalEn * hoCalEn; - getCell(NNInputs::HcalSize) += 1.; - getCell(NNInputs::HcalDeltaEta) += deta * hoCalEn; - getCell(NNInputs::HcalDeltaPhi) += dphi * hoCalEn; - } - } - - // normalize to sum and define stdDev - for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - /* normalize eCal vars*/ - if (getCell(NNInputs::EcalEnergySum) > 0.) { - getCell(NNInputs::EcalDeltaEta) /= getCell(NNInputs::EcalEnergySum); - getCell(NNInputs::EcalDeltaPhi) /= getCell(NNInputs::EcalEnergySum); - } - if (getCell(NNInputs::EcalEnergySumForPositiveChi2) > 0.) { - getCell(NNInputs::EcalChi2) /= getCell(NNInputs::EcalEnergySumForPositiveChi2); - } - if (getCell(NNInputs::EcalSize) > 1.) { - // (stdDev - (enSum*enSum)/size) / (size-1) - getCell(NNInputs::EcalEnergyStdDev) = - (getCell(NNInputs::EcalEnergyStdDev) - - (getCell(NNInputs::EcalEnergySum) * getCell(NNInputs::EcalEnergySum)) / getCell(NNInputs::EcalSize)) / - (getCell(NNInputs::EcalSize) - 1); - } else { - getCell(NNInputs::EcalEnergyStdDev) = 0.; - } - /* normalize hCal Vars */ - if (getCell(NNInputs::HcalEnergySum) > 0.) { - getCell(NNInputs::HcalDeltaEta) /= getCell(NNInputs::HcalEnergySum); - getCell(NNInputs::HcalDeltaPhi) /= getCell(NNInputs::HcalEnergySum); - } - if (getCell(NNInputs::HcalEnergySumForPositiveChi2) > 0.) { - getCell(NNInputs::HcalChi2) /= getCell(NNInputs::HcalEnergySumForPositiveChi2); - } - if (getCell(NNInputs::HcalSize) > 1.) { - // (stdDev - (enSum*enSum)/size) / (size-1) - getCell(NNInputs::HcalEnergyStdDev) = - (getCell(NNInputs::HcalEnergyStdDev) - - (getCell(NNInputs::HcalEnergySum) * getCell(NNInputs::HcalEnergySum)) / getCell(NNInputs::HcalSize)) / - (getCell(NNInputs::HcalSize) - 1); - } else { - getCell(NNInputs::HcalEnergyStdDev) = 0.; - } - } - } - } -} - -void L2TauNNProducer::selectGoodTracksAndVertices(const ZVertexSoAHost& patavtx_soa, - const TrackSoAHost& patatracks_tsoa, - std::vector& trkGood, - std::vector& vtxGood) { - using patatrackHelpers = TracksUtilities; - const auto maxTracks = patatracks_tsoa.view().metadata().size(); - const int nv = patavtx_soa.view().nvFinal(); - trkGood.clear(); - trkGood.reserve(maxTracks); - vtxGood.clear(); - vtxGood.reserve(nv); - auto const quality = patatracks_tsoa.view().quality(); - - // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). - std::vector pTSquaredSum(nv, 0); - std::vector nTrkAssociated(nv, 0); - - for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { - auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx); - if (nHits == 0) { - break; - } - int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); - if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { - auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); - ++nTrkAssociated[vtx_ass_to_track]; - if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) { - patatrackPt = std::min(patatrackPt, trackPtMax_); - pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; - } - } - if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { - trkGood.push_back(trk_idx); - } - } - if (nv > 0) { - const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_; - for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) { - auto vtx_idx = patavtx_soa.view()[j].sortInd(); - assert(vtx_idx < nv); - if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && - pTSquaredSum[vtx_idx] > minSumPt2_) { - vtxGood.push_back(vtx_idx); - } - } - } -} - -std::pair L2TauNNProducer::impactParameter(int it, - const TrackSoAHost& patatracks_tsoa, - float patatrackPhi, - const reco::BeamSpot& beamspot, - const MagneticField* magfi) { - /* dxy and dz */ - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - TracksUtilities::copyToDense(patatracks_tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - float sp = std::sin(patatrackPhi); - float cp = std::cos(patatrackPhi); - Surface::RotationType Rotation(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - GlobalPoint BeamSpotPoint(beamspot.x0(), beamspot.y0(), beamspot.z0()); - Plane impPointPlane(BeamSpotPoint, Rotation); - GlobalTrajectoryParameters gp( - impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), magfi); - GlobalPoint vv = gp.position(); - math::XYZPoint pos(vv.x(), vv.y(), vv.z()); - GlobalVector pp = gp.momentum(); - math::XYZVector mom(pp.x(), pp.y(), pp.z()); - auto lambda = M_PI_2 - pp.theta(); - auto phi = pp.phi(); - float patatrackDxy = -vv.x() * std::sin(phi) + vv.y() * std::cos(phi); - float patatrackDz = - (vv.z() * std::cos(lambda) - (vv.x() * std::cos(phi) + vv.y() * std::sin(phi)) * std::sin(lambda)) / - std::cos(lambda); - return std::make_pair(patatrackDxy, patatrackDz); -} - -void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix, - const std::vector& allTaus, - const TrackSoAHost& patatracks_tsoa, - const ZVertexSoAHost& patavtx_soa, - const reco::BeamSpot& beamspot, - const MagneticField* magfi) { - using NNInputs = L2TauTagNNv1::NNInputs; - using patatrackHelpers = TracksUtilities; - float deta, dphi; - int eta_idx = 0; - int phi_idx = 0; - int tau_idx = 0; - - auto getCell = [&](NNInputs input) -> float& { - return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input); - }; - - std::vector trkGood; - std::vector vtxGood; - - selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood); - - const int nTaus = allTaus.size(); - for (tau_idx = 0; tau_idx < nTaus; tau_idx++) { - const float tauEta = allTaus[tau_idx]->eta(); - const float tauPhi = allTaus[tau_idx]->phi(); - - for (const auto it : trkGood) { - const float patatrackPt = patatracks_tsoa.const_view()[it].pt(); - if (patatrackPt <= 0) - continue; - const float patatrackPhi = patatrackHelpers::phi(patatracks_tsoa.const_view(), it); - const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); - const float patatrackCharge = patatrackHelpers::charge(patatracks_tsoa.const_view(), it); - const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); - const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it); - if (nHits <= 0) - continue; - const int patatrackNdof = 2 * std::min(6, nHits) - 5; - - const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); - if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { - std::tie(deta, dphi, eta_idx, phi_idx) = - getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); - getCell(NNInputs::PatatrackPtSum) += patatrackPt; - getCell(NNInputs::PatatrackSize) += 1.; - getCell(NNInputs::PatatrackChargeSum) += patatrackCharge; - getCell(NNInputs::PatatrackDeltaEta) += deta * patatrackPt; - getCell(NNInputs::PatatrackDeltaPhi) += dphi * patatrackPt; - getCell(NNInputs::PatatrackChi2OverNdof) += patatrackChi2OverNdof * patatrackPt; - getCell(NNInputs::PatatrackNdof) += patatrackNdof * patatrackPt; - std::pair impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi); - getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt; - getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt; - if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) { - getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt; - getCell(NNInputs::PatatrackSizeWithVertex) += 1.; - } - } - } - - // normalize to sum and define stdDev - for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) { - for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) { - getCell(NNInputs::nVertices) = vtxGood.size(); - if (getCell(NNInputs::PatatrackPtSum) > 0.) { - getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackChi2OverNdof) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackNdof) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDxy) /= getCell(NNInputs::PatatrackPtSum); - getCell(NNInputs::PatatrackDz) /= getCell(NNInputs::PatatrackPtSum); - } - } - } - } -} - -std::vector L2TauNNProducer::getTauScore(const tensorflow::Tensor& cellGridMatrix) { - const int nTau = cellGridMatrix.shape().dim_size(0); - std::vector pred_vector(nTau); - if (nTau > 0) { - // Only run the inference if there are taus to process - std::vector pred_tensor; - tensorflow::run(L2cacheData_->session, {{inputTensorName_, cellGridMatrix}}, {outputTensorName_}, &pred_tensor); - for (int tau_idx = 0; tau_idx < nTau; ++tau_idx) { - pred_vector[tau_idx] = pred_tensor[0].matrix()(tau_idx, 0); - } - } - return pred_vector; -} - -void L2TauNNProducer::produce(edm::Event& event, const edm::EventSetup& eventsetup) { - std::vector> TauCollectionMap(L1TauDesc_.size()); - l1t::TauVectorRef allTaus; - - for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { - l1t::TauVectorRef l1Taus; - auto const& l1TriggeredTaus = event.get(L1TauDesc_[inp_idx].inputToken_); - l1TriggeredTaus.getObjects(trigger::TriggerL1Tau, l1Taus); - TauCollectionMap.at(inp_idx).resize(l1Taus.size()); - - for (size_t l1_idx = 0; l1_idx < l1Taus.size(); l1_idx++) { - size_t tau_idx; - const auto iter = std::find(allTaus.begin(), allTaus.end(), l1Taus[l1_idx]); - if (iter != allTaus.end()) { - tau_idx = std::distance(allTaus.begin(), iter); - } else { - allTaus.push_back(l1Taus[l1_idx]); - tau_idx = allTaus.size() - 1; - } - TauCollectionMap.at(inp_idx).at(l1_idx) = tau_idx; - } - } - const auto ebCal = event.getHandle(ebToken_); - const auto eeCal = event.getHandle(eeToken_); - const auto hbhe = event.getHandle(hbheToken_); - const auto ho = event.getHandle(hoToken_); - auto const& patatracks_SoA = event.get(pataTracksToken_); - auto const& vertices_SoA = event.get(pataVerticesToken_); - const auto bsHandle = event.getHandle(beamSpotToken_); - - auto const fieldESH = eventsetup.getHandle(bFieldToken_); - auto const geometry = eventsetup.getHandle(geometryToken_); - - caloRecHitCollections caloRecHits; - caloRecHits.hbhe = &*hbhe; - caloRecHits.ho = &*ho; - caloRecHits.eb = &*ebCal; - caloRecHits.ee = &*eeCal; - caloRecHits.geometry = &*geometry; - - const int nTaus = allTaus.size(); - tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT, - {nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars}); - const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars; - for (int input_idx = 0; input_idx < n_inputs; ++input_idx) { - cellGridMatrix.flat()(input_idx) = 0; - } - fillL1TauVars(cellGridMatrix, allTaus); - - fillCaloRecHits(cellGridMatrix, allTaus, caloRecHits); - - fillPatatracks(cellGridMatrix, allTaus, patatracks_SoA, vertices_SoA, *bsHandle, fieldESH.product()); - - standardizeTensor(cellGridMatrix); - - if (debugLevel_ > 0) { - checknan(cellGridMatrix, debugLevel_); - } - - std::vector tau_score = getTauScore(cellGridMatrix); - - for (size_t inp_idx = 0; inp_idx < L1TauDesc_.size(); inp_idx++) { - const size_t nTau = TauCollectionMap[inp_idx].size(); - auto tau_tags = std::make_unique>(nTau); - for (size_t tau_pos = 0; tau_pos < nTau; ++tau_pos) { - const auto tau_idx = TauCollectionMap[inp_idx][tau_pos]; - if (debugLevel_ > 0) { - edm::LogInfo("DebugInfo") << event.id().event() << " \t " << (allTaus[tau_idx])->pt() << " \t " - << tau_score.at(tau_idx) << std::endl; - } - (*tau_tags)[tau_pos] = tau_score.at(tau_idx); - } - event.put(std::move(tau_tags), L1TauDesc_[inp_idx].CollectionName); - } -} -//define this as a plug-in -#include "FWCore/Framework/interface/MakerMacros.h" -DEFINE_FWK_MODULE(L2TauNNProducer); diff --git a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml index 6c8c102293651..eae412ec800cc 100644 --- a/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml +++ b/RecoTracker/PixelTrackFitting/plugins/BuildFile.xml @@ -1,10 +1,7 @@ - - - diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc deleted file mode 100644 index 6bff9a7c42292..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpCUDA.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include -#include // needed here by soa layout - -#include "CUDADataFormats/Common/interface/Product.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDAnalyzer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" - -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" - -template -class PixelTrackDumpCUDAT : public edm::global::EDAnalyzer<> { -public: - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - - using VertexSoAHost = ZVertexSoAHost; - using VertexSoADevice = ZVertexSoADevice; - - explicit PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig); - ~PixelTrackDumpCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const override; - const bool m_onGPU; - edm::EDGetTokenT> tokenGPUTrack_; - edm::EDGetTokenT> tokenGPUVertex_; - edm::EDGetTokenT tokenSoATrack_; - edm::EDGetTokenT tokenSoAVertex_; -}; - -template -PixelTrackDumpCUDAT::PixelTrackDumpCUDAT(const edm::ParameterSet& iConfig) - : m_onGPU(iConfig.getParameter("onGPU")) { - if (m_onGPU) { - tokenGPUTrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); - tokenGPUVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); - } else { - tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); - tokenSoAVertex_ = consumes(iConfig.getParameter("pixelVertexSrc")); - } -} - -template -void PixelTrackDumpCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("onGPU", true); - desc.add("pixelTrackSrc", edm::InputTag("pixelTracksCUDA")); - desc.add("pixelVertexSrc", edm::InputTag("pixelVerticesCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackDumpCUDAT::analyze(edm::StreamID streamID, - edm::Event const& iEvent, - const edm::EventSetup& iSetup) const { - if (m_onGPU) { - auto const& hTracks = iEvent.get(tokenGPUTrack_); - cms::cuda::ScopedContextProduce ctx{hTracks}; - - auto const& tracks = ctx.get(hTracks); - auto const* tsoa = &tracks; - assert(tsoa->buffer()); - - auto const& vertices = ctx.get(iEvent.get(tokenGPUVertex_)); - auto const* vsoa = &vertices; - assert(vsoa->buffer()); - - } else { - auto const& tsoa = iEvent.get(tokenSoATrack_); - assert(tsoa.buffer()); - - auto const& vsoa = iEvent.get(tokenSoAVertex_); - assert(vsoa.buffer()); - } -} - -using PixelTrackDumpCUDAPhase1 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase1); - -using PixelTrackDumpCUDAPhase2 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAPhase2); - -using PixelTrackDumpCUDAHIonPhase1 = PixelTrackDumpCUDAT; -DEFINE_FWK_MODULE(PixelTrackDumpCUDAHIonPhase1); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc deleted file mode 100644 index a7dd1b3ddf209..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc +++ /dev/null @@ -1,270 +0,0 @@ -#include -#include -//#include -#include -#include -#include -#include -#include - -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/Common/interface/OrphanHandle.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/TrackReco/interface/Track.h" -#include "DataFormats/TrackReco/interface/TrackExtra.h" -#include "DataFormats/TrackReco/interface/TrackFwd.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" - -#include "storeTracks.h" - -/** - * This class creates "legacy" reco::Track - * objects from the output of SoA CA. - */ -template -class PixelTrackProducerFromSoAT : public edm::global::EDProducer<> { - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TracksHelpers = TracksUtilities; - using HMSstorage = HostProduct; - using IndToEdm = std::vector; - -public: - explicit PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig); - ~PixelTrackProducerFromSoAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; - - // Event Data tokens - const edm::EDGetTokenT tBeamSpot_; - const edm::EDGetTokenT tokenTrack_; - const edm::EDGetTokenT cpuHits_; - const edm::EDGetTokenT hmsToken_; - // Event Setup tokens - const edm::ESGetToken idealMagneticFieldToken_; - const edm::ESGetToken ttTopoToken_; - - int32_t const minNumberOfHits_; - pixelTrack::Quality const minQuality_; -}; - -template -PixelTrackProducerFromSoAT::PixelTrackProducerFromSoAT(const edm::ParameterSet &iConfig) - : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), - tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), - cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), - hmsToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), - idealMagneticFieldToken_(esConsumes()), - ttTopoToken_(esConsumes()), - minNumberOfHits_(iConfig.getParameter("minNumberOfHits")), - minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))) { - if (minQuality_ == pixelTrack::Quality::notQuality) { - throw cms::Exception("PixelTrackConfiguration") - << iConfig.getParameter("minQuality") + " is not a pixelTrack::Quality"; - } - if (minQuality_ < pixelTrack::Quality::dup) { - throw cms::Exception("PixelTrackConfiguration") - << iConfig.getParameter("minQuality") + " not supported"; - } - produces(); - produces(); - // TrackCollection refers to TrackingRechit and TrackExtra - // collections, need to declare its production after them to work - // around a rare race condition in framework scheduling - produces(); - produces(); -} - -template -void PixelTrackProducerFromSoAT::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { - edm::ParameterSetDescription desc; - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("trackSrc", edm::InputTag("pixelTracksSoA")); - desc.add("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy")); - desc.add("minNumberOfHits", 0); - desc.add("minQuality", "loose"); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackProducerFromSoAT::produce(edm::StreamID streamID, - edm::Event &iEvent, - const edm::EventSetup &iSetup) const { - // enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity }; - reco::TrackBase::TrackQuality recoQuality[] = {reco::TrackBase::undefQuality, - reco::TrackBase::undefQuality, - reco::TrackBase::discarded, - reco::TrackBase::loose, - reco::TrackBase::tight, - reco::TrackBase::tight, - reco::TrackBase::highPurity}; - assert(reco::TrackBase::highPurity == recoQuality[int(pixelTrack::Quality::highPurity)]); - - // std::cout << "Converting gpu helix in reco tracks" << std::endl; - - auto indToEdmP = std::make_unique(); - auto &indToEdm = *indToEdmP; - - auto const &idealField = iSetup.getData(idealMagneticFieldToken_); - - pixeltrackfitting::TracksWithRecHits tracks; - - auto const &httopo = iSetup.getData(ttTopoToken_); - - const auto &bsh = iEvent.get(tBeamSpot_); - GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - - auto const &rechits = iEvent.get(cpuHits_); - std::vector hitmap; - auto const &rcs = rechits.data(); - auto const nhits = rcs.size(); - - hitmap.resize(nhits, nullptr); - - auto const *hitsModuleStart = iEvent.get(hmsToken_).get(); - - for (auto const &hit : rcs) { - auto const &thit = static_cast(hit); - auto const detI = thit.det()->index(); - auto const &clus = thit.firstClusterRef(); - assert(clus.isPixel()); - auto const idx = hitsModuleStart[detI] + clus.pixelCluster().originalId(); - if (idx >= hitmap.size()) - hitmap.resize(idx + 256, nullptr); // only in case of hit overflow in one module - - assert(nullptr == hitmap[idx]); - hitmap[idx] = &hit; - } - - std::vector hits; - hits.reserve(5); - - auto const &tsoa = iEvent.get(tokenTrack_); - auto const quality = tsoa.view().quality(); - auto const &hitIndices = tsoa.view().hitIndices(); - auto nTracks = tsoa.view().nTracks(); - - tracks.reserve(nTracks); - - int32_t nt = 0; - - // sort index by pt - std::vector sortIdxs(nTracks); - std::iota(sortIdxs.begin(), sortIdxs.end(), 0); - // sort good-quality tracks by pt, keep bad-quality tracks in the bottom - std::sort(sortIdxs.begin(), sortIdxs.end(), [&](int32_t const i1, int32_t const i2) { - if (quality[i1] >= minQuality_ && quality[i2] >= minQuality_) - return tsoa.view()[i1].pt() > tsoa.view()[i2].pt(); - else - return quality[i1] > quality[i2]; - }); - - // store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) - indToEdm.resize(sortIdxs.size(), -1); - for (const auto &it : sortIdxs) { - auto nHits = TracksHelpers::nHits(tsoa.view(), it); - assert(nHits >= 3); - auto q = quality[it]; - - if (q < minQuality_) - continue; - if (nHits < minNumberOfHits_) // move to nLayers? - continue; - indToEdm[it] = nt; - ++nt; - - hits.resize(nHits); - auto b = hitIndices.begin(it); - for (int iHit = 0; iHit < nHits; ++iHit) - hits[iHit] = hitmap[*(b + iHit)]; - - // mind: this values are respect the beamspot! - float chi2 = tsoa.view()[it].chi2(); - float phi = TracksHelpers::phi(tsoa.view(), it); - - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - TracksHelpers::template copyToDense(tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - AlgebraicSymMatrix55 m; - for (int i = 0; i < 5; ++i) - for (int j = i; j < 5; ++j) - m(i, j) = ocov(i, j); - - float sp = std::sin(phi); - float cp = std::cos(phi); - Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - - Plane impPointPlane(bs, rot); - GlobalTrajectoryParameters gp( - impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), &idealField); - JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, idealField); - - AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); - - int ndof = 2 * hits.size() - 5; - chi2 = chi2 * ndof; - GlobalPoint vv = gp.position(); - math::XYZPoint pos(vv.x(), vv.y(), vv.z()); - GlobalVector pp = gp.momentum(); - math::XYZVector mom(pp.x(), pp.y(), pp.z()); - - auto track = std::make_unique(chi2, ndof, pos, mom, gp.charge(), CurvilinearTrajectoryError(mo)); - - // bad and edup not supported as fit not present or not reliable - auto tkq = recoQuality[int(q)]; - track->setQuality(tkq); - // loose,tight and HP are inclusive - if (reco::TrackBase::highPurity == tkq) { - track->setQuality(reco::TrackBase::tight); - track->setQuality(reco::TrackBase::loose); - } else if (reco::TrackBase::tight == tkq) { - track->setQuality(reco::TrackBase::loose); - } - track->setQuality(tkq); - // filter??? - tracks.emplace_back(track.release(), hits); - } - // std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl; - - // store tracks - storeTracks(iEvent, tracks, httopo); - iEvent.put(std::move(indToEdmP)); -} - -using PixelTrackProducerFromSoAPhase1 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase1); - -using PixelTrackProducerFromSoAPhase2 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAPhase2); - -using PixelTrackProducerFromSoAHIonPhase1 = PixelTrackProducerFromSoAT; -DEFINE_FWK_MODULE(PixelTrackProducerFromSoAHIonPhase1); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc deleted file mode 100644 index fc2c76ff00155..0000000000000 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include // needed here by soa layout - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -// Switch on to enable checks and printout for found tracks -// #define PIXEL_DEBUG_PRODUCE - -template -class PixelTrackSoAFromCUDAT : public edm::stream::EDProducer { - using TrackSoAHost = TrackSoAHeterogeneousHost; - using TrackSoADevice = TrackSoAHeterogeneousDevice; - -public: - explicit PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig); - ~PixelTrackSoAFromCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - edm::EDGetTokenT> tokenCUDA_; - edm::EDPutTokenT tokenSOA_; - - TrackSoAHost tracks_h_; -}; - -template -PixelTrackSoAFromCUDAT::PixelTrackSoAFromCUDAT(const edm::ParameterSet& iConfig) - : tokenCUDA_(consumes(iConfig.getParameter("src"))), tokenSOA_(produces()) {} - -template -void PixelTrackSoAFromCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("src", edm::InputTag("pixelTracksCUDA")); - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelTrackSoAFromCUDAT::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - auto const& tracks_d = ctx.get(inputDataWrapped); // Tracks on device - tracks_h_ = TrackSoAHost(ctx.stream()); // Create an instance of Tracks on Host, using the stream - cudaCheck(cudaMemcpyAsync(tracks_h_.buffer().get(), - tracks_d.const_buffer().get(), - tracks_d.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); // Copy data from Device to Host -} - -template -void PixelTrackSoAFromCUDAT::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) { - auto maxTracks = tracks_h_.view().metadata().size(); - auto nTracks = tracks_h_.view().nTracks(); - - assert(nTracks < maxTracks); - if (nTracks == maxTracks - 1) { - edm::LogWarning("PixelTracks") << "Unsorted reconstructed pixel tracks truncated to " << maxTracks - 1 - << " candidates"; - } - -#ifdef PIXEL_DEBUG_PRODUCE - std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl; - std::cout << "found " << nTracks << " tracks in cpu SoA at " << &tsoa << std::endl; - - int32_t nt = 0; - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = TracksUtilities::nHits(tracks_h_.view(), it); - assert(nHits == int(tracks_h_.view().hitIndices().size(it))); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - nt++; - } - assert(nTracks == nt); -#endif - - // DO NOT make a copy (actually TWO....) - iEvent.emplace(tokenSOA_, std::move(tracks_h_)); - assert(!tracks_h_.buffer()); -} - -using PixelTrackSoAFromCUDAPhase1 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase1); - -using PixelTrackSoAFromCUDAPhase2 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAPhase2); - -using PixelTrackSoAFromCUDAHIonPhase1 = PixelTrackSoAFromCUDAT; -DEFINE_FWK_MODULE(PixelTrackSoAFromCUDAHIonPhase1); diff --git a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml index 5999fef50b346..ada2991b1cdb2 100644 --- a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml +++ b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml @@ -1,4 +1,3 @@ - diff --git a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc b/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc deleted file mode 100644 index 8122ccbac3130..0000000000000 --- a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc +++ /dev/null @@ -1,179 +0,0 @@ -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/GeometrySurface/interface/Plane.h" -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" -#include "DataFormats/TrackingRecHit/interface/InvalidTrackingRecHit.h" -#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/CommonDetUnit/interface/GeomDet.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" -#include "RecoTracker/PixelTrackFitting/interface/FitUtils.h" -#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" -#include "TrackingTools/MaterialEffects/interface/PropagatorWithMaterial.h" -#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" -#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" -#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" -#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" - -/* - produces seeds directly from cuda produced tuples -*/ -template -class SeedProducerFromSoAT : public edm::global::EDProducer<> { -public: - explicit SeedProducerFromSoAT(const edm::ParameterSet& iConfig); - ~SeedProducerFromSoAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - // Event data tokens - const edm::EDGetTokenT tBeamSpot_; - const edm::EDGetTokenT> tokenTrack_; - // Event setup tokens - const edm::ESGetToken idealMagneticFieldToken_; - const edm::ESGetToken trackerDigiGeometryToken_; - const edm::ESGetToken trackerPropagatorToken_; - int32_t minNumberOfHits_; -}; - -template -SeedProducerFromSoAT::SeedProducerFromSoAT(const edm::ParameterSet& iConfig) - : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), - tokenTrack_(consumes(iConfig.getParameter("src"))), - idealMagneticFieldToken_(esConsumes()), - trackerDigiGeometryToken_(esConsumes()), - trackerPropagatorToken_(esConsumes(edm::ESInputTag("PropagatorWithMaterial"))), - minNumberOfHits_(iConfig.getParameter("minNumberOfHits")) - -{ - produces(); -} - -template -void SeedProducerFromSoAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("src", edm::InputTag("pixelTrackSoA")); - desc.add("minNumberOfHits", 0); - - descriptions.addWithDefaultLabel(desc); -} - -template -void SeedProducerFromSoAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - // std::cout << "Converting gpu helix to trajectory seed" << std::endl; - auto result = std::make_unique(); - - using trackHelper = TracksUtilities; - - auto const& fieldESH = iSetup.getHandle(idealMagneticFieldToken_); - auto const& tracker = iSetup.getHandle(trackerDigiGeometryToken_); - auto const& dus = tracker->detUnits(); - - auto const& propagatorHandle = iSetup.getHandle(trackerPropagatorToken_); - const Propagator* propagator = &(*propagatorHandle); - - const auto& bsh = iEvent.get(tBeamSpot_); - // std::cout << "beamspot " << bsh.x0() << ' ' << bsh.y0() << ' ' << bsh.z0() << std::endl; - GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - - auto const& tsoa = iEvent.get(tokenTrack_); - - auto const quality = tsoa.view().quality(); - auto const& detIndices = tsoa.view().detIndices(); - auto maxTracks = tsoa.view().metadata().size(); - - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = trackHelper::nHits(tsoa.view(), it); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - - auto q = quality[it]; - if (q != pixelTrack::Quality::loose) - continue; // FIXME - if (nHits < minNumberOfHits_) - continue; - - // fill hits with invalid just to hold the detId - auto b = detIndices.begin(it); - edm::OwnVector hits; - for (int iHit = 0; iHit < nHits; ++iHit) { - auto const* det = dus[*(b + iHit)]; - // FIXME at some point get a proper type ... - hits.push_back(new InvalidTrackingRecHit(*det, TrackingRecHit::bad)); - } - - // mind: this values are respect the beamspot! - - float phi = trackHelper::nHits(tsoa.view(), it); - - riemannFit::Vector5d ipar, opar; - riemannFit::Matrix5d icov, ocov; - trackHelper::copyToDense(tsoa.view(), ipar, icov, it); - riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); - - LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); - AlgebraicSymMatrix55 m; - for (int i = 0; i < 5; ++i) - for (int j = i; j < 5; ++j) - m(i, j) = ocov(i, j); - - float sp = std::sin(phi); - float cp = std::cos(phi); - Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); - - Plane impPointPlane(bs, rot); - GlobalTrajectoryParameters gp(impPointPlane.toGlobal(lpar.position()), - impPointPlane.toGlobal(lpar.momentum()), - lpar.charge(), - fieldESH.product()); - - JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, *fieldESH.product()); - - AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); - - FreeTrajectoryState fts(gp, CurvilinearTrajectoryError(mo)); - - auto const& lastHit = hits.back(); - - TrajectoryStateOnSurface outerState = propagator->propagate(fts, *lastHit.surface()); - - if (!outerState.isValid()) { - edm::LogError("SeedFromGPU") << " was trying to create a seed from:\n" - << fts << "\n propagating to: " << lastHit.geographicalId().rawId(); - continue; - } - - auto const& pTraj = trajectoryStateTransform::persistentState(outerState, lastHit.geographicalId().rawId()); - - result->emplace_back(pTraj, hits, alongMomentum); - } - - iEvent.put(std::move(result)); -} - -using SeedProducerFromSoAPhase1 = SeedProducerFromSoAT; -DEFINE_FWK_MODULE(SeedProducerFromSoAPhase1); - -using SeedProducerFromSoAPhase2 = SeedProducerFromSoAT; -DEFINE_FWK_MODULE(SeedProducerFromSoAPhase2); diff --git a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml index 85603d97c5f3b..ff50cc1bdbaee 100644 --- a/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml +++ b/RecoVertex/PixelVertexFinding/plugins/BuildFile.xml @@ -20,19 +20,8 @@ - - - - - - - - - - - - - + + diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc deleted file mode 100644 index a1f4101252319..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerCUDA.cc +++ /dev/null @@ -1,166 +0,0 @@ -#include - -#include "CUDADataFormats/Common/interface/Product.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" - -#include "gpuVertexFinder.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -template -class PixelVertexProducerCUDAT : public edm::global::EDProducer<> { - using TracksSoADevice = TrackSoAHeterogeneousDevice; - using TracksSoAHost = TrackSoAHeterogeneousHost; - using GPUAlgo = gpuVertexFinder::Producer; - -public: - explicit PixelVertexProducerCUDAT(const edm::ParameterSet& iConfig); - ~PixelVertexProducerCUDAT() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void produceOnGPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const; - void produceOnCPU(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const; - void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; - - bool onGPU_; - - edm::EDGetTokenT> tokenGPUTrack_; - edm::EDPutTokenT> tokenGPUVertex_; - edm::EDGetTokenT tokenCPUTrack_; - edm::EDPutTokenT tokenCPUVertex_; - - const GPUAlgo gpuAlgo_; - - // Tracking cuts before sending tracks to vertex algo - const float ptMin_; - const float ptMax_; -}; - -template -PixelVertexProducerCUDAT::PixelVertexProducerCUDAT(const edm::ParameterSet& conf) - : onGPU_(conf.getParameter("onGPU")), - gpuAlgo_(conf.getParameter("oneKernel"), - conf.getParameter("useDensity"), - conf.getParameter("useDBSCAN"), - conf.getParameter("useIterative"), - conf.getParameter("doSplitting"), - conf.getParameter("minT"), - conf.getParameter("eps"), - conf.getParameter("errmax"), - conf.getParameter("chi2max")), - ptMin_(conf.getParameter("PtMin")), // 0.5 GeV - ptMax_(conf.getParameter("PtMax")) // 75. GeV -{ - if (onGPU_) { - tokenGPUTrack_ = consumes(conf.getParameter("pixelTrackSrc")); - tokenGPUVertex_ = produces(); - } else { - tokenCPUTrack_ = consumes(conf.getParameter("pixelTrackSrc")); - tokenCPUVertex_ = produces(); - } -} - -template -void PixelVertexProducerCUDAT::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - // Only one of these three algos can be used at once. - // Maybe this should become a Plugin Factory - desc.add("onGPU", true); - desc.add("oneKernel", true); - desc.add("useDensity", true); - desc.add("useDBSCAN", false); - desc.add("useIterative", false); - desc.add("doSplitting", true); - - desc.add("minT", 2); // min number of neighbours to be "core" - desc.add("eps", 0.07); // max absolute distance to cluster - desc.add("errmax", 0.01); // max error to be "seed" - desc.add("chi2max", 9.); // max normalized distance to cluster - - desc.add("PtMin", 0.5); - desc.add("PtMax", 75.); - desc.add("pixelTrackSrc", edm::InputTag("pixelTracksCUDA")); - - descriptions.addWithDefaultLabel(desc); -} - -template -void PixelVertexProducerCUDAT::produceOnGPU(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - using TracksSoA = TrackSoAHeterogeneousDevice; - auto hTracks = iEvent.getHandle(tokenGPUTrack_); - - cms::cuda::ScopedContextProduce ctx{*hTracks}; - auto& tracks = ctx.get(*hTracks); - - ctx.emplace(iEvent, tokenGPUVertex_, gpuAlgo_.makeAsync(ctx.stream(), tracks.view(), ptMin_, ptMax_)); -} - -template -void PixelVertexProducerCUDAT::produceOnCPU(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - auto& tracks = iEvent.get(tokenCPUTrack_); - -#ifdef PIXVERTEX_DEBUG_PRODUCE - auto const& tsoa = *tracks; - auto maxTracks = tsoa.stride(); - std::cout << "size of SoA " << sizeof(tsoa) << " stride " << maxTracks << std::endl; - - int32_t nt = 0; - for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = TracksUtilities::nHits(tracks.view(), it); - assert(nHits == int(tracks.view().hitIndices().size(it))); - if (nHits == 0) - break; // this is a guard: maybe we need to move to nTracks... - nt++; - } - std::cout << "found " << nt << " tracks in cpu SoA for Vertexing at " << tracks << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - - iEvent.emplace(tokenCPUVertex_, gpuAlgo_.make(tracks.view(), ptMin_, ptMax_)); -} - -template -void PixelVertexProducerCUDAT::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { - if (onGPU_) { - produceOnGPU(streamID, iEvent, iSetup); - } else { - produceOnCPU(streamID, iEvent, iSetup); - } -} - -using PixelVertexProducerCUDAPhase1 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase1); - -using PixelVertexProducerCUDAPhase2 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAPhase2); - -using PixelVertexProducerCUDAHIonPhase1 = PixelVertexProducerCUDAT; -DEFINE_FWK_MODULE(PixelVertexProducerCUDAHIonPhase1); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc deleted file mode 100644 index 91de2bdb6992b..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoA.cc +++ /dev/null @@ -1,178 +0,0 @@ -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "DataFormats/BeamSpot/interface/BeamSpot.h" -#include "DataFormats/Common/interface/OrphanHandle.h" -#include "DataFormats/TrackReco/interface/Track.h" -#include "DataFormats/TrackReco/interface/TrackExtra.h" -#include "DataFormats/TrackReco/interface/TrackFwd.h" -#include "DataFormats/VertexReco/interface/Vertex.h" -#include "DataFormats/VertexReco/interface/VertexFwd.h" -#include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -class PixelVertexProducerFromSoA : public edm::global::EDProducer<> { -public: - using IndToEdm = std::vector; - - explicit PixelVertexProducerFromSoA(const edm::ParameterSet &iConfig); - ~PixelVertexProducerFromSoA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); - -private: - void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; - - edm::EDGetTokenT tokenVertex_; - edm::EDGetTokenT tokenBeamSpot_; - edm::EDGetTokenT tokenTracks_; - edm::EDGetTokenT tokenIndToEdm_; -}; - -PixelVertexProducerFromSoA::PixelVertexProducerFromSoA(const edm::ParameterSet &conf) - : tokenVertex_(consumes(conf.getParameter("src"))), - tokenBeamSpot_(consumes(conf.getParameter("beamSpot"))), - tokenTracks_(consumes(conf.getParameter("TrackCollection"))), - tokenIndToEdm_(consumes(conf.getParameter("TrackCollection"))) { - produces(); -} - -void PixelVertexProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { - edm::ParameterSetDescription desc; - - desc.add("TrackCollection", edm::InputTag("pixelTracks")); - desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); - desc.add("src", edm::InputTag("pixelVerticesSoA")); - - descriptions.add("pixelVertexFromSoA", desc); -} - -void PixelVertexProducerFromSoA::produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &) const { - auto vertexes = std::make_unique(); - - auto tracksHandle = iEvent.getHandle(tokenTracks_); - auto tracksSize = tracksHandle->size(); - auto const &indToEdm = iEvent.get(tokenIndToEdm_); - auto bsHandle = iEvent.getHandle(tokenBeamSpot_); - - float x0 = 0, y0 = 0, z0 = 0, dxdz = 0, dydz = 0; - std::vector itrk; - itrk.reserve(64); // avoid first relocations - if (!bsHandle.isValid()) { - edm::LogWarning("PixelVertexProducer") << "No beamspot found. returning vertexes with (0,0,Z) "; - } else { - const reco::BeamSpot &bs = *bsHandle; - x0 = bs.x0(); - y0 = bs.y0(); - z0 = bs.z0(); - dxdz = bs.dxdz(); - dydz = bs.dydz(); - } - - auto const &soa = iEvent.get(tokenVertex_); - - int nv = soa.view().nvFinal(); - -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "converting " << nv << " vertices " - << " from " << indToEdm.size() << " tracks" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - - std::set uind; // for verifing index consistency - for (int j = nv - 1; j >= 0; --j) { - auto i = soa.view()[j].sortInd(); // on gpu sorted in ascending order.... - assert(i < nv); - uind.insert(i); - assert(itrk.empty()); - auto z = soa.view()[i].zv(); - auto x = x0 + dxdz * z; - auto y = y0 + dydz * z; - z += z0; - reco::Vertex::Error err; - err(2, 2) = 1.f / soa.view()[i].wv(); - err(2, 2) *= 2.; // artifically inflate error - //Copy also the tracks (no intention to be efficient....) - for (auto k = 0U; k < indToEdm.size(); ++k) { - if (soa.view()[k].idv() == int16_t(i)) - itrk.push_back(k); - } - auto nt = itrk.size(); - if (nt == 0) { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "vertex " << i << " with no tracks..." << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - continue; - } - if (nt < 2) { - itrk.clear(); - continue; - } // remove outliers - (*vertexes).emplace_back(reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); - auto &v = (*vertexes).back(); - v.reserve(itrk.size()); - for (auto it : itrk) { - assert(it < int(indToEdm.size())); - auto k = indToEdm[it]; - if (k > tracksSize) { - edm::LogWarning("PixelVertexProducer") << "oops track " << it << " does not exists on CPU " << k; - continue; - } - auto tk = reco::TrackRef(tracksHandle, k); - v.add(tk); - } - itrk.clear(); - } - - LogDebug("PixelVertexProducer") << ": Found " << vertexes->size() << " vertexes\n"; - for (unsigned int i = 0; i < vertexes->size(); ++i) { - LogDebug("PixelVertexProducer") << "Vertex number " << i << " has " << (*vertexes)[i].tracksSize() - << " tracks with a position of " << (*vertexes)[i].z() << " +- " - << std::sqrt((*vertexes)[i].covariance(2, 2)); - } - - // legacy logic.... - if (vertexes->empty() && bsHandle.isValid()) { - const reco::BeamSpot &bs = *bsHandle; - - GlobalError bse(bs.rotatedCovariance3D()); - if ((bse.cxx() <= 0.) || (bse.cyy() <= 0.) || (bse.czz() <= 0.)) { - AlgebraicSymMatrix33 we; - we(0, 0) = 10000; - we(1, 1) = 10000; - we(2, 2) = 10000; - vertexes->push_back(reco::Vertex(bs.position(), we, 0., 0., 0)); - - edm::LogInfo("PixelVertexProducer") << "No vertices found. Beamspot with invalid errors " << bse.matrix() - << "\nWill put Vertex derived from dummy-fake BeamSpot into Event.\n" - << (*vertexes)[0].x() << "\n" - << (*vertexes)[0].y() << "\n" - << (*vertexes)[0].z() << "\n"; - } else { - vertexes->push_back(reco::Vertex(bs.position(), bs.rotatedCovariance3D(), 0., 0., 0)); - - edm::LogInfo("PixelVertexProducer") << "No vertices found. Will put Vertex derived from BeamSpot into Event:\n" - << (*vertexes)[0].x() << "\n" - << (*vertexes)[0].y() << "\n" - << (*vertexes)[0].z() << "\n"; - } - } else if (vertexes->empty() && !bsHandle.isValid()) { - edm::LogWarning("PixelVertexProducer") << "No beamspot and no vertex found. No vertex returned."; - } - - iEvent.put(std::move(vertexes)); -} - -DEFINE_FWK_MODULE(PixelVertexProducerFromSoA); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc deleted file mode 100644 index b13b6c96f0bd3..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexSoAFromCUDA.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include - -#include "CUDADataFormats/Common/interface/Product.h" -#include "CUDADataFormats/Common/interface/HostProduct.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "DataFormats/Common/interface/Handle.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Utilities/interface/EDGetToken.h" -#include "FWCore/Utilities/interface/InputTag.h" -#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" - -class PixelVertexSoAFromCUDA : public edm::stream::EDProducer { -public: - explicit PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig); - ~PixelVertexSoAFromCUDA() override = default; - - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - -private: - void acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; - void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - - edm::EDGetTokenT> tokenCUDA_; - edm::EDPutTokenT tokenSOA_; - - ZVertexSoAHost zvertex_h; -}; - -PixelVertexSoAFromCUDA::PixelVertexSoAFromCUDA(const edm::ParameterSet& iConfig) - : tokenCUDA_(consumes>(iConfig.getParameter("src"))), - tokenSOA_(produces()) {} - -void PixelVertexSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - - desc.add("src", edm::InputTag("pixelVerticesCUDA")); - descriptions.add("pixelVerticesSoA", desc); -} - -void PixelVertexSoAFromCUDA::acquire(edm::Event const& iEvent, - edm::EventSetup const& iSetup, - edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); - cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; - auto const& zvertex_d = ctx.get(inputDataWrapped); // Tracks on device - zvertex_h = ZVertexSoAHost(ctx.stream()); // Create an instance of Tracks on Host, using the stream - cudaCheck(cudaMemcpyAsync(zvertex_h.buffer().get(), - zvertex_d.const_buffer().get(), - zvertex_d.bufferSize(), - cudaMemcpyDeviceToHost, - ctx.stream())); // Copy data from Device to Host -} - -void PixelVertexSoAFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) { - // No copies.... - iEvent.emplace(tokenSOA_, std::move(zvertex_h)); -} - -DEFINE_FWK_MODULE(PixelVertexSoAFromCUDA); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h deleted file mode 100644 index e7eadd9c61dda..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoADevice_h - -#include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" - -template -class PixelVertexWorkSpaceSoADevice : public cms::cuda::PortableDeviceCollection> { -public: - explicit PixelVertexWorkSpaceSoADevice() = default; - - // Constructor which specifies the SoA size and CUDA stream - explicit PixelVertexWorkSpaceSoADevice(cudaStream_t stream) - : PortableDeviceCollection>(S, stream) {} -}; - -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoADevice = PixelVertexWorkSpaceSoADevice; - } -} // namespace gpuVertexFinder -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h deleted file mode 100644 index 9a4dc49b87f23..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceSoAHost_h - -#include "CUDADataFormats/Common/interface/PortableHostCollection.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" - -template -class PixelVertexWorkSpaceSoAHost : public cms::cuda::PortableHostCollection> { -public: - explicit PixelVertexWorkSpaceSoAHost() : PortableHostCollection>(S) {} - - // Constructor which specifies the SoA size and CUDA stream - explicit PixelVertexWorkSpaceSoAHost(cudaStream_t stream) - : PortableHostCollection>(S, stream) {} -}; - -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoAHost = PixelVertexWorkSpaceSoAHost; - } -} // namespace gpuVertexFinder -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h b/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h deleted file mode 100644 index cc0828f153330..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h -#define RecoVertex_PixelVertexFinding_plugins_PixelVertexWorkSpaceUtilities_h - -#include -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -// Intermediate data used in the vertex reco algos -// For internal use only -GENERATE_SOA_LAYOUT(PixelVertexWSSoALayout, - SOA_COLUMN(uint16_t, itrk), // index of original track - SOA_COLUMN(float, zt), // input track z at bs - SOA_COLUMN(float, ezt2), // input error^2 on the above - SOA_COLUMN(float, ptt2), // input pt^2 on the above - SOA_COLUMN(uint8_t, izt), // interized z-position of input tracks - SOA_COLUMN(int32_t, iv), // vertex index for each associated track - SOA_SCALAR(uint32_t, ntrks), // number of "selected tracks" - SOA_SCALAR(uint32_t, nvIntermediate)) // the number of vertices after splitting pruning etc. - -// Methods that operate on View and ConstView of the WorkSpaceSoALayout. -namespace gpuVertexFinder { - namespace workSpace { - using PixelVertexWorkSpaceSoALayout = PixelVertexWSSoALayout<>; - using PixelVertexWorkSpaceSoAView = PixelVertexWSSoALayout<>::View; - using PixelVertexWorkSpaceSoAConstView = PixelVertexWSSoALayout<>::ConstView; - - namespace utilities { - __host__ __device__ inline void init(PixelVertexWorkSpaceSoAView &workspace_view) { - workspace_view.ntrks() = 0; - workspace_view.nvIntermediate() = 0; - } - } // namespace utilities - } // namespace workSpace -} // namespace gpuVertexFinder - -#endif diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h deleted file mode 100644 index e38c4bd495899..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h +++ /dev/null @@ -1,237 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - // - // based on Rodrighez&Laio algo - // - __device__ __forceinline__ void clusterTracksByDensity(VtxSoAView& pdata, - WsSoAView& pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - using namespace gpuVertexFinder; - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt().data(); - float const* __restrict__ ezt2 = ws.ezt2().data(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt().data(); - int32_t* __restrict__ nn = data.ndof().data(); - int32_t* __restrict__ iv = ws.iv().data(); - - assert(zt); - assert(ezt2); - assert(izt); - assert(nn); - assert(iv); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - // iz = std::clamp(iz, INT8_MIN, INT8_MAX); // sorry c++17 only - iz = std::min(std::max(iz, INT8_MIN), INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - - // find closest above me .... (we ignore the possibility of two j at same distance from i) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - float mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < nn[i]) - return; - if (nn[j] == nn[i] && zt[j] >= zt[i]) - return; // if equal use natural order... - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // (break natural order???) - mdist = dist; - iv[i] = j; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - - // consolidate graph (percolate index of seed) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - -#ifdef GPU_DEBUG - __syncthreads(); - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } -#endif - -#ifdef GPU_DEBUG - // and verify that we did not spit any cluster... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto minJ = i; - auto mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < nn[i]) - return; - if (nn[j] == nn[i] && zt[j] >= zt[i]) - return; // if equal use natural order... - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - mdist = dist; - minJ = j; - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - // should belong to the same cluster... - assert(iv[i] == iv[minJ]); - assert(nn[i] <= nn[iv[i]]); - } - __syncthreads(); -#endif - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i and density larger than threshold; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - - __global__ void clusterTracksByDensityKernel(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksByDensity_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h deleted file mode 100644 index 588fabd3d0d77..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - __global__ void clusterTracksDBSCAN(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "core" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt().data(); - float const* __restrict__ ezt2 = ws.ezt2().data(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt().data(); - int32_t* __restrict__ nn = data.ndof().data(); - int32_t* __restrict__ iv = ws.iv().data(); - - assert(zt); - assert(iv); - assert(nn); - assert(ezt2); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - iz = std::clamp(iz, INT8_MIN, INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - - // find NN with smaller z... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (nn[i] < minT) - continue; // DBSCAN core rule - float mz = zt[i]; - auto loop = [&](uint32_t j) { - if (zt[j] >= mz) - return; - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - mz = zt[j]; - iv[i] = j; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - - // consolidate graph (percolate index of seed) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - - __syncthreads(); - -#ifdef GPU_DEBUG - // mini verification - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] != int(i)) - assert(iv[iv[i]] != int(i)); - } - __syncthreads(); -#endif - -#ifdef GPU_DEBUG - // and verify that we did not spit any cluster... - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (nn[i] < minT) - continue; // DBSCAN core rule - assert(zt[iv[i]] <= zt[i]); - auto loop = [&](uint32_t j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - // if (dist*dist>chi2max*(ezt2[i]+ezt2[j])) return; - // they should belong to the same cluster, isn't it? - if (iv[i] != iv[j]) { - printf("ERROR %d %d %f %f %d\n", i, iv[i], zt[i], zt[iv[i]], iv[iv[i]]); - printf(" %d %d %f %f %d\n", j, iv[j], zt[j], zt[iv[j]], iv[iv[j]]); - ; - } - assert(iv[i] == iv[j]); - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - __syncthreads(); -#endif - - // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule - if (nn[i] >= minT) - continue; // DBSCAN edge rule - float mdist = eps; - auto loop = [&](uint32_t j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // needed? - mdist = dist; - iv[i] = iv[j]; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksDBSCAN_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h b/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h deleted file mode 100644 index 5a56e23eedeee..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h +++ /dev/null @@ -1,214 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h -#define RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - // this algo does not really scale as it works in a single block... - // enough for <10K tracks we have - __global__ void clusterTracksIterative(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "core" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - if (verbose && 0 == threadIdx.x) - printf("params %d %f %f %f\n", minT, eps, errmax, chi2max); - - auto er2mx = errmax * errmax; - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt().data(); - float const* __restrict__ ezt2 = ws.ezt2().data(); - - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - uint8_t* __restrict__ izt = ws.izt().data(); - int32_t* __restrict__ nn = data.ndof().data(); - int32_t* __restrict__ iv = ws.iv().data(); - - assert(zt); - assert(nn); - assert(iv); - assert(ezt2); - - using Hist = cms::cuda::HistoContainer; - __shared__ Hist hist; - __shared__ typename Hist::Counter hws[32]; - for (auto j = threadIdx.x; j < Hist::totbins(); j += blockDim.x) { - hist.off[j] = 0; - } - __syncthreads(); - - if (verbose && 0 == threadIdx.x) - printf("booked hist with %d bins, size %d for %d tracks\n", hist.nbins(), hist.capacity(), nt); - - assert((int)nt <= hist.capacity()); - - // fill hist (bin shall be wider than "eps") - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - assert(i < zVertex::utilities::MAXTRACKS); - int iz = int(zt[i] * 10.); // valid if eps<=0.1 - iz = std::clamp(iz, INT8_MIN, INT8_MAX); - izt[i] = iz - INT8_MIN; - assert(iz - INT8_MIN >= 0); - assert(iz - INT8_MIN < 256); - hist.count(izt[i]); - iv[i] = i; - nn[i] = 0; - } - __syncthreads(); - if (threadIdx.x < 32) - hws[threadIdx.x] = 0; // used by prefix scan... - __syncthreads(); - hist.finalize(hws); - __syncthreads(); - assert(hist.size() == nt); - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - hist.fill(izt[i], uint16_t(i)); - } - __syncthreads(); - - // count neighbours - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (ezt2[i] > er2mx) - continue; - auto loop = [&](uint32_t j) { - if (i == j) - return; - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - nn[i]++; - }; - - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ int nloops; - nloops = 0; - - __syncthreads(); - - // cluster seeds only - bool more = true; - while (__syncthreads_or(more)) { - if (1 == nloops % 2) { - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - auto m = iv[i]; - while (m != iv[m]) - m = iv[m]; - iv[i] = m; - } - } else { - more = false; - for (auto k = threadIdx.x; k < hist.size(); k += blockDim.x) { - auto p = hist.begin() + k; - auto i = (*p); - auto be = std::min(Hist::bin(izt[i]) + 1, int(hist.nbins() - 1)); - if (nn[i] < minT) - continue; // DBSCAN core rule - auto loop = [&](uint32_t j) { - assert(i != j); - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > eps) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; - auto old = atomicMin(&iv[j], iv[i]); - if (old != iv[i]) { - // end the loop only if no changes were applied - more = true; - } - atomicMin(&iv[i], old); - }; - ++p; - for (; p < hist.end(be); ++p) - loop(*p); - } // for i - } - if (threadIdx.x == 0) - ++nloops; - } // while - - // collect edges (assign to closest cluster of closest point??? here to closest point) - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - // if (nn[i]==0 || nn[i]>=minT) continue; // DBSCAN edge rule - if (nn[i] >= minT) - continue; // DBSCAN edge rule - float mdist = eps; - auto loop = [&](int j) { - if (nn[j] < minT) - return; // DBSCAN core rule - auto dist = std::abs(zt[i] - zt[j]); - if (dist > mdist) - return; - if (dist * dist > chi2max * (ezt2[i] + ezt2[j])) - return; // needed? - mdist = dist; - iv[i] = iv[j]; // assign to cluster (better be unique??) - }; - cms::cuda::forEachInBins(hist, izt[i], 1, loop); - } - - __shared__ unsigned int foundClusters; - foundClusters = 0; - __syncthreads(); - - // find the number of different clusters, identified by a tracks with clus[i] == i; - // mark these tracks with a negative id. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] == int(i)) { - if (nn[i] >= minT) { - auto old = atomicInc(&foundClusters, 0xffffffff); - iv[i] = -(old + 1); - } else { // noise - iv[i] = -9998; - } - } - } - __syncthreads(); - - assert(foundClusters < zVertex::utilities::MAXVTX); - - // propagate the negative id to all the tracks in the cluster. - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] >= 0) { - // mark each track in a cluster with the same id as the first one - iv[i] = iv[iv[i]]; - } - } - __syncthreads(); - - // adjust the cluster id to be a positive value starting from 0 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - iv[i] = -iv[i] - 1; - } - - nvIntermediate = nvFinal = foundClusters; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto vertices\n", foundClusters); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuClusterTracksIterative_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h deleted file mode 100644 index bf2652b8acd5f..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h -#define RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void fitVertices(VtxSoAView& pdata, - WsSoAView& pws, - float chi2Max // for outlier rejection - ) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt().data(); - float const* __restrict__ ezt2 = ws.ezt2().data(); - float* __restrict__ zv = data.zv().data(); - float* __restrict__ wv = data.wv().data(); - float* __restrict__ chi2 = data.chi2().data(); - uint32_t& nvFinal = data.nvFinal(); - uint32_t& nvIntermediate = ws.nvIntermediate(); - - int32_t* __restrict__ nn = data.ndof().data(); - int32_t* __restrict__ iv = ws.iv().data(); - - assert(nvFinal <= nvIntermediate); - nvFinal = nvIntermediate; - auto foundClusters = nvFinal; - - // zero - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) { - zv[i] = 0; - wv[i] = 0; - chi2[i] = 0; - } - - // only for test - __shared__ int noise; - if (verbose && 0 == threadIdx.x) - noise = 0; - - __syncthreads(); - - // compute cluster location - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) { - if (verbose) - atomicAdd(&noise, 1); - continue; - } - assert(iv[i] >= 0); - assert(iv[i] < int(foundClusters)); - auto w = 1.f / ezt2[i]; - atomicAdd_block(&zv[iv[i]], zt[i] * w); - atomicAdd_block(&wv[iv[i]], w); - } - - __syncthreads(); - // reuse nn - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) { - assert(wv[i] > 0.f); - zv[i] /= wv[i]; - nn[i] = -1; // ndof - } - __syncthreads(); - - // compute chi2 - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) - continue; - - auto c2 = zv[iv[i]] - zt[i]; - c2 *= c2 / ezt2[i]; - if (c2 > chi2Max) { - iv[i] = 9999; - continue; - } - atomicAdd_block(&chi2[iv[i]], c2); - atomicAdd_block(&nn[iv[i]], 1); - } - __syncthreads(); - for (auto i = threadIdx.x; i < foundClusters; i += blockDim.x) - if (nn[i] > 0) - wv[i] *= float(nn[i]) / chi2[i]; - - if (verbose && 0 == threadIdx.x) - printf("found %d proto clusters ", foundClusters); - if (verbose && 0 == threadIdx.x) - printf("and %d noise\n", noise); - } - - __global__ void fitVerticesKernel(VtxSoAView pdata, - WsSoAView pws, - float chi2Max // for outlier rejection - ) { - fitVertices(pdata, pws, chi2Max); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuFitVertices_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h b/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h deleted file mode 100644 index 4d9f2c42068d2..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h -#define RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" -#ifdef __CUDA_ARCH__ -#include "HeterogeneousCore/CUDAUtilities/interface/radixSort.h" -#endif - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void sortByPt2(VtxSoAView& pdata, WsSoAView& pws) { - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ ptt2 = ws.ptt2().data(); - uint32_t const& nvFinal = data.nvFinal(); - - int32_t const* __restrict__ iv = ws.iv().data(); - float* __restrict__ ptv2 = data.ptv2().data(); - uint16_t* __restrict__ sortInd = data.sortInd().data(); - - assert(ptv2); - assert(sortInd); - - if (nvFinal < 1) - return; - - // fill indexing - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - data[ws[i].itrk()].idv() = iv[i]; - } - - // can be done asynchronously at the end of previous event - for (auto i = threadIdx.x; i < nvFinal; i += blockDim.x) { - ptv2[i] = 0; - } - __syncthreads(); - - for (auto i = threadIdx.x; i < nt; i += blockDim.x) { - if (iv[i] > 9990) - continue; - atomicAdd_block(&ptv2[iv[i]], ptt2[i]); - } - __syncthreads(); - - if (1 == nvFinal) { - if (threadIdx.x == 0) - sortInd[0] = 0; - return; - } -#ifdef __CUDA_ARCH__ - __shared__ uint16_t sws[1024]; - // sort using only 16 bits - radixSort(ptv2, sortInd, sws, nvFinal); -#else - for (uint16_t i = 0; i < nvFinal; ++i) - sortInd[i] = i; - std::sort(sortInd, sortInd + nvFinal, [&](auto i, auto j) { return ptv2[i] < ptv2[j]; }); -#endif - } - - __global__ void sortByPt2Kernel(VtxSoAView pdata, WsSoAView pws) { sortByPt2(pdata, pws); } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuSortByPt2_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h b/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h deleted file mode 100644 index c5b14b8bdcdd0..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h -#define RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h - -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" - -#include "gpuVertexFinder.h" - -namespace gpuVertexFinder { - - __device__ __forceinline__ void splitVertices(VtxSoAView& pdata, WsSoAView& pws, float maxChi2) { - constexpr bool verbose = false; // in principle the compiler should optmize out if false - - auto& __restrict__ data = pdata; - auto& __restrict__ ws = pws; - auto nt = ws.ntrks(); - float const* __restrict__ zt = ws.zt().data(); - float const* __restrict__ ezt2 = ws.ezt2().data(); - float* __restrict__ zv = data.zv().data(); - float* __restrict__ wv = data.wv().data(); - float const* __restrict__ chi2 = data.chi2().data(); - uint32_t& nvFinal = data.nvFinal(); - - int32_t const* __restrict__ nn = data.ndof().data(); - int32_t* __restrict__ iv = ws.iv().data(); - - assert(zt); - assert(wv); - assert(chi2); - assert(nn); - - // one vertex per block - for (auto kv = blockIdx.x; kv < nvFinal; kv += gridDim.x) { - if (nn[kv] < 4) - continue; - if (chi2[kv] < maxChi2 * float(nn[kv])) - continue; - - constexpr int MAXTK = 512; - assert(nn[kv] < MAXTK); - if (nn[kv] >= MAXTK) - continue; // too bad FIXME - __shared__ uint32_t it[MAXTK]; // track index - __shared__ float zz[MAXTK]; // z pos - __shared__ uint8_t newV[MAXTK]; // 0 or 1 - __shared__ float ww[MAXTK]; // z weight - - __shared__ uint32_t nq; // number of track for this vertex - nq = 0; - __syncthreads(); - - // copy to local - for (auto k = threadIdx.x; k < nt; k += blockDim.x) { - if (iv[k] == int(kv)) { - auto old = atomicInc(&nq, MAXTK); - zz[old] = zt[k] - zv[kv]; - newV[old] = zz[old] < 0 ? 0 : 1; - ww[old] = 1.f / ezt2[k]; - it[old] = k; - } - } - - __shared__ float znew[2], wnew[2]; // the new vertices - - __syncthreads(); - assert(int(nq) == nn[kv] + 1); - - int maxiter = 20; - // kt-min.... - bool more = true; - while (__syncthreads_or(more)) { - more = false; - if (0 == threadIdx.x) { - znew[0] = 0; - znew[1] = 0; - wnew[0] = 0; - wnew[1] = 0; - } - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - auto i = newV[k]; - atomicAdd(&znew[i], zz[k] * ww[k]); - atomicAdd(&wnew[i], ww[k]); - } - __syncthreads(); - if (0 == threadIdx.x) { - znew[0] /= wnew[0]; - znew[1] /= wnew[1]; - } - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - auto d0 = fabs(zz[k] - znew[0]); - auto d1 = fabs(zz[k] - znew[1]); - auto newer = d0 < d1 ? 0 : 1; - more |= newer != newV[k]; - newV[k] = newer; - } - --maxiter; - if (maxiter <= 0) - more = false; - } - - // avoid empty vertices - if (0 == wnew[0] || 0 == wnew[1]) - continue; - - // quality cut - auto dist2 = (znew[0] - znew[1]) * (znew[0] - znew[1]); - - auto chi2Dist = dist2 / (1.f / wnew[0] + 1.f / wnew[1]); - - if (verbose && 0 == threadIdx.x) - printf("inter %d %f %f\n", 20 - maxiter, chi2Dist, dist2 * wv[kv]); - - if (chi2Dist < 4) - continue; - - // get a new global vertex - __shared__ uint32_t igv; - if (0 == threadIdx.x) - igv = atomicAdd(&ws.nvIntermediate(), 1); - __syncthreads(); - for (auto k = threadIdx.x; k < nq; k += blockDim.x) { - if (1 == newV[k]) - iv[it[k]] = igv; - } - - } // loop on vertices - } - - __global__ void splitVerticesKernel(VtxSoAView pdata, WsSoAView pws, float maxChi2) { - splitVertices(pdata, pws, maxChi2); - } - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuSplitVertices_h diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc deleted file mode 100644 index dc638187977ac..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cc +++ /dev/null @@ -1,208 +0,0 @@ -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" - -#include "PixelVertexWorkSpaceUtilities.h" -#include "PixelVertexWorkSpaceSoAHost.h" -#include "PixelVertexWorkSpaceSoADevice.h" - -#include "gpuClusterTracksByDensity.h" -#include "gpuClusterTracksDBSCAN.h" -#include "gpuClusterTracksIterative.h" -#include "gpuFitVertices.h" -#include "gpuSortByPt2.h" -#include "gpuSplitVertices.h" - -#undef PIXVERTEX_DEBUG_PRODUCE - -namespace gpuVertexFinder { - - // reject outlier tracks that contribute more than this to the chi2 of the vertex fit - constexpr float maxChi2ForFirstFit = 50.f; - constexpr float maxChi2ForFinalFit = 5000.f; - - // split vertices with a chi2/NDoF greater than this - constexpr float maxChi2ForSplit = 9.f; - - template - __global__ void loadTracks( - TrackSoAConstView tracks_view, VtxSoAView soa, WsSoAView pws, float ptMin, float ptMax) { - auto const quality = tracks_view.quality(); - using helper = TracksUtilities; - auto first = blockIdx.x * blockDim.x + threadIdx.x; - for (int idx = first, nt = tracks_view.nTracks(); idx < nt; idx += gridDim.x * blockDim.x) { - auto nHits = helper::nHits(tracks_view, idx); - assert(nHits >= 3); - - // initialize soa... - soa[idx].idv() = -1; - - if (helper::isTriplet(tracks_view, idx)) - continue; // no triplets - if (quality[idx] < pixelTrack::Quality::highPurity) - continue; - - auto pt = tracks_view[idx].pt(); - - if (pt < ptMin) - continue; - - // clamp pt - pt = std::min(pt, ptMax); - - auto& data = pws; - auto it = atomicAdd(&data.ntrks(), 1); - data[it].itrk() = idx; - data[it].zt() = helper::zip(tracks_view, idx); - data[it].ezt2() = tracks_view[idx].covariance()(14); - data[it].ptt2() = pt * pt; - } - } - -// #define THREE_KERNELS -#ifndef THREE_KERNELS - __global__ void vertexFinderOneKernel(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFirstFit); - __syncthreads(); - splitVertices(pdata, pws, maxChi2ForSplit); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFinalFit); - __syncthreads(); - sortByPt2(pdata, pws); - } -#else - __global__ void vertexFinderKernel1(VtxSoAView pdata, - WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, - ) { - clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - fitVertices(pdata, pws, maxChi2ForFirstFit); - } - - __global__ void vertexFinderKernel2(VtxSoAView pdata, WsSoAView pws) { - fitVertices(pdata, pws, maxChi2ForFinalFit); - __syncthreads(); - sortByPt2(pdata, pws); - } -#endif - - template -#ifdef __CUDACC__ - ZVertexSoADevice Producer::makeAsync(cudaStream_t stream, - const TrackSoAConstView& tracks_view, - float ptMin, - float ptMax) const { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "producing Vertices on GPU" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - ZVertexSoADevice vertices(stream); -#else - ZVertexSoAHost Producer::make(const TrackSoAConstView& tracks_view, - float ptMin, - float ptMax) const { -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "producing Vertices on CPU" << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - ZVertexSoAHost vertices; -#endif - auto soa = vertices.view(); - - assert(vertices.buffer()); - -#ifdef __CUDACC__ - auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice(stream); -#else - auto ws_d = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost(); -#endif - -#ifdef __CUDACC__ - init<<<1, 1, 0, stream>>>(soa, ws_d.view()); - auto blockSize = 128; - auto numberOfBlocks = (tracks_view.metadata().size() + blockSize - 1) / blockSize; - loadTracks<<>>(tracks_view, soa, ws_d.view(), ptMin, ptMax); - cudaCheck(cudaGetLastError()); -#else - init(soa, ws_d.view()); - loadTracks(tracks_view, soa, ws_d.view(), ptMin, ptMax); -#endif - -#ifdef __CUDACC__ - // Running too many thread lead to problems when printf is enabled. - constexpr int maxThreadsForPrint = 1024 - 128; - constexpr int numBlocks = 1024; - constexpr int threadsPerBlock = 128; - - if (oneKernel_) { - // implemented only for density clustesrs -#ifndef THREE_KERNELS - vertexFinderOneKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); -#else - vertexFinderKernel1<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - cudaCheck(cudaGetLastError()); - // one block per vertex... - splitVerticesKernel<<>>(soa, ws_d.view(), maxChi2ForSplit); - cudaCheck(cudaGetLastError()); - vertexFinderKernel2<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view()); -#endif - } else { // five kernels - if (useDensity_) { - clusterTracksByDensityKernel<<<1, maxThreadsForPrint, 0, stream>>>( - soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useDBSCAN_) { - clusterTracksDBSCAN<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useIterative_) { - clusterTracksIterative<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), minT, eps, errmax, chi2max); - } - cudaCheck(cudaGetLastError()); - fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFirstFit); - cudaCheck(cudaGetLastError()); - if (doSplitting_) { - // one block per vertex... - splitVerticesKernel<<>>(soa, ws_d.view(), maxChi2ForSplit); - cudaCheck(cudaGetLastError()); - fitVerticesKernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view(), maxChi2ForFinalFit); - cudaCheck(cudaGetLastError()); - } - sortByPt2Kernel<<<1, maxThreadsForPrint, 0, stream>>>(soa, ws_d.view()); - } - cudaCheck(cudaGetLastError()); -#else // __CUDACC__ - if (useDensity_) { - clusterTracksByDensity(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useDBSCAN_) { - clusterTracksDBSCAN(soa, ws_d.view(), minT, eps, errmax, chi2max); - } else if (useIterative_) { - clusterTracksIterative(soa, ws_d.view(), minT, eps, errmax, chi2max); - } -#ifdef PIXVERTEX_DEBUG_PRODUCE - std::cout << "found " << ws_d.view().nvIntermediate() << " vertices " << std::endl; -#endif // PIXVERTEX_DEBUG_PRODUCE - fitVertices(soa, ws_d.view(), maxChi2ForFirstFit); - // one block per vertex! - if (doSplitting_) { - splitVertices(soa, ws_d.view(), maxChi2ForSplit); - fitVertices(soa, ws_d.view(), maxChi2ForFinalFit); - } - sortByPt2(soa, ws_d.view()); -#endif - - return vertices; - } - - template class Producer; - template class Producer; - template class Producer; -} // namespace gpuVertexFinder diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu deleted file mode 100644 index 9674eac7d8784..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.cu +++ /dev/null @@ -1 +0,0 @@ -#include "gpuVertexFinder.cc" diff --git a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h b/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h deleted file mode 100644 index 640f137bf76f4..0000000000000 --- a/RecoVertex/PixelVertexFinding/plugins/gpuVertexFinder.h +++ /dev/null @@ -1,70 +0,0 @@ -#ifndef RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h -#define RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h - -#include -#include - -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "PixelVertexWorkSpaceUtilities.h" -#include "PixelVertexWorkSpaceSoAHost.h" -#include "PixelVertexWorkSpaceSoADevice.h" - -namespace gpuVertexFinder { - - using VtxSoAView = zVertex::ZVertexSoAView; - using WsSoAView = gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAView; - - __global__ void init(VtxSoAView pdata, WsSoAView pws) { - zVertex::utilities::init(pdata); - gpuVertexFinder::workSpace::utilities::init(pws); - } - - template - class Producer { - using TkSoAConstView = TrackSoAConstView; - - public: - Producer(bool oneKernel, - bool useDensity, - bool useDBSCAN, - bool useIterative, - bool doSplitting, - int iminT, // min number of neighbours to be "core" - float ieps, // max absolute distance to cluster - float ierrmax, // max error to be "seed" - float ichi2max // max normalized distance to cluster - ) - : oneKernel_(oneKernel && !(useDBSCAN || useIterative)), - useDensity_(useDensity), - useDBSCAN_(useDBSCAN), - useIterative_(useIterative), - doSplitting_(doSplitting), - minT(iminT), - eps(ieps), - errmax(ierrmax), - chi2max(ichi2max) {} - - ~Producer() = default; - - ZVertexSoADevice makeAsync(cudaStream_t stream, const TkSoAConstView &tracks_view, float ptMin, float ptMax) const; - ZVertexSoAHost make(const TkSoAConstView &tracks_view, float ptMin, float ptMax) const; - - private: - const bool oneKernel_; - const bool useDensity_; - const bool useDBSCAN_; - const bool useIterative_; - const bool doSplitting_; - - int minT; // min number of neighbours to be "core" - float eps; // max absolute distance to cluster - float errmax; // max error to be "seed" - float chi2max; // max normalized distance to cluster - }; - -} // namespace gpuVertexFinder - -#endif // RecoVertex_PixelVertexFinding_plugins_gpuVertexFinder_h diff --git a/RecoVertex/PixelVertexFinding/test/BuildFile.xml b/RecoVertex/PixelVertexFinding/test/BuildFile.xml index 57d83a69fabae..60f60dc1fd575 100644 --- a/RecoVertex/PixelVertexFinding/test/BuildFile.xml +++ b/RecoVertex/PixelVertexFinding/test/BuildFile.xml @@ -2,49 +2,12 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h b/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h deleted file mode 100644 index c2e165ddb1e98..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/VertexFinder_t.h +++ /dev/null @@ -1,359 +0,0 @@ -#include -#include -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" -#include "HeterogeneousCore/CUDAUtilities/interface/launch.h" -#include "HeterogeneousCore/CUDAUtilities/interface/allocate_device.h" -#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h" -// PixelTrackUtilities only included in order to compile SoALayout with Eigen columns -#include "CUDADataFormats/Track/interface/PixelTrackUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexUtilities.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousHost.h" -#include "CUDADataFormats/Vertex/interface/ZVertexSoAHeterogeneousDevice.h" - -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceUtilities.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoAHost.h" -#include "RecoVertex/PixelVertexFinding/plugins/PixelVertexWorkSpaceSoADevice.h" -#ifdef USE_DBSCAN -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksDBSCAN.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksDBSCAN -#elif USE_ITERATIVE -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksIterative.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksIterative -#else -#include "RecoVertex/PixelVertexFinding/plugins/gpuClusterTracksByDensity.h" -#define CLUSTERIZE gpuVertexFinder::clusterTracksByDensityKernel -#endif -#include "RecoVertex/PixelVertexFinding/plugins/gpuFitVertices.h" -#include "RecoVertex/PixelVertexFinding/plugins/gpuSortByPt2.h" -#include "RecoVertex/PixelVertexFinding/plugins/gpuSplitVertices.h" - -#ifdef ONE_KERNEL -#ifdef __CUDACC__ -__global__ void vertexFinderOneKernel(gpuVertexFinder::VtxSoAView pdata, - gpuVertexFinder::WsSoAView pws, - int minT, // min number of neighbours to be "seed" - float eps, // max absolute distance to cluster - float errmax, // max error to be "seed" - float chi2max // max normalized distance to cluster, -) { - gpuVertexFinder::clusterTracksByDensity(pdata, pws, minT, eps, errmax, chi2max); - __syncthreads(); - gpuVertexFinder::fitVertices(pdata, pws, 50.); - __syncthreads(); - gpuVertexFinder::splitVertices(pdata, pws, 9.f); - __syncthreads(); - gpuVertexFinder::fitVertices(pdata, pws, 5000.); - __syncthreads(); - gpuVertexFinder::sortByPt2(pdata, pws); -} -#endif -#endif - -struct Event { - std::vector zvert; - std::vector itrack; - std::vector ztrack; - std::vector eztrack; - std::vector pttrack; - std::vector ivert; -}; - -struct ClusterGenerator { - explicit ClusterGenerator(float nvert, float ntrack) - : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {} - - void operator()(Event& ev) { - int nclus = clusGen(reng); - ev.zvert.resize(nclus); - ev.itrack.resize(nclus); - for (auto& z : ev.zvert) { - z = 3.5f * gauss(reng); - } - - ev.ztrack.clear(); - ev.eztrack.clear(); - ev.ivert.clear(); - ev.pttrack.clear(); - for (int iv = 0; iv < nclus; ++iv) { - auto nt = trackGen(reng); - ev.itrack[iv] = nt; - for (int it = 0; it < nt; ++it) { - auto err = errgen(reng); // reality is not flat.... - ev.ztrack.push_back(ev.zvert[iv] + err * gauss(reng)); - ev.eztrack.push_back(err * err); - ev.ivert.push_back(iv); - ev.pttrack.push_back((iv == 5 ? 1.f : 0.5f) + ptGen(reng)); - ev.pttrack.back() *= ev.pttrack.back(); - } - } - // add noise - auto nt = 2 * trackGen(reng); - for (int it = 0; it < nt; ++it) { - auto err = 0.03f; - ev.ztrack.push_back(rgen(reng)); - ev.eztrack.push_back(err * err); - ev.ivert.push_back(9999); - ev.pttrack.push_back(0.5f + ptGen(reng)); - ev.pttrack.back() *= ev.pttrack.back(); - } - } - - std::mt19937 reng; - std::uniform_real_distribution rgen; - std::uniform_real_distribution errgen; - std::poisson_distribution clusGen; - std::poisson_distribution trackGen; - std::normal_distribution gauss; - std::exponential_distribution ptGen; -}; - -__global__ void print(gpuVertexFinder::VtxSoAView pdata, gpuVertexFinder::WsSoAView pws) { - auto& __restrict__ ws = pws; - printf("nt,nv %d %d,%d\n", ws.ntrks(), pdata.nvFinal(), ws.nvIntermediate()); -} - -int main() { -#ifdef __CUDACC__ - cudaStream_t stream; - cms::cudatest::requireDevices(); - cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); - - ZVertexSoADevice onGPU_d(stream); - gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoADevice ws_d(stream); -#else - - ZVertexSoAHost onGPU_d; - gpuVertexFinder::workSpace::PixelVertexWorkSpaceSoAHost ws_d; -#endif - - Event ev; - - float eps = 0.1f; - std::array par{{eps, 0.01f, 9.0f}}; - for (int nav = 30; nav < 80; nav += 20) { - ClusterGenerator gen(nav, 10); - - for (int i = 8; i < 20; ++i) { - auto kk = i / 4; // M param - - gen(ev); - -#ifdef __CUDACC__ - gpuVertexFinder::init<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); -#else - gpuVertexFinder::init(onGPU_d.view(), ws_d.view()); -#endif - - std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl; - auto nt = ev.ztrack.size(); -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy( - ws_d.view().zt().data(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size(), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy( - ws_d.view().ezt2().data(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice)); - cudaCheck(cudaMemcpy( - ws_d.view().ptt2().data(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size(), cudaMemcpyHostToDevice)); -#else - ::memcpy(&ws_d.view().ntrks(), &nt, sizeof(uint32_t)); - ::memcpy(ws_d.view().zt().data(), ev.ztrack.data(), sizeof(float) * ev.ztrack.size()); - ::memcpy(ws_d.view().ezt2().data(), ev.eztrack.data(), sizeof(float) * ev.eztrack.size()); - ::memcpy(ws_d.view().ptt2().data(), ev.pttrack.data(), sizeof(float) * ev.eztrack.size()); -#endif - - std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i % 4) << std::endl; - - if ((i % 4) == 0) - par = {{eps, 0.02f, 12.0f}}; - if ((i % 4) == 1) - par = {{eps, 0.02f, 9.0f}}; - if ((i % 4) == 2) - par = {{eps, 0.01f, 9.0f}}; - if ((i % 4) == 3) - par = {{0.7f * eps, 0.01f, 9.0f}}; - - uint32_t nv = 0; -#ifdef __CUDACC__ - print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); - cudaCheck(cudaGetLastError()); - cudaDeviceSynchronize(); - -#ifdef ONE_KERNEL - cms::cuda::launch(vertexFinderOneKernel, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); -#else - cms::cuda::launch(CLUSTERIZE, {1, 512 + 256}, onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); -#endif - print<<<1, 1, 0, stream>>>(onGPU_d.view(), ws_d.view()); - - cudaCheck(cudaGetLastError()); - cudaDeviceSynchronize(); - - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - -#else - print(onGPU_d.view(), ws_d.view()); - CLUSTERIZE(onGPU_d.view(), ws_d.view(), kk, par[0], par[1], par[2]); - print(onGPU_d.view(), ws_d.view()); - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f); - nv = onGPU_d.view().nvFinal(); -#endif - - if (nv == 0) { - std::cout << "NO VERTICES???" << std::endl; - continue; - } - - float* zv = nullptr; - float* wv = nullptr; - float* ptv2 = nullptr; - int32_t* nn = nullptr; - uint16_t* ind = nullptr; - - // keep chi2 separated... - float chi2[2 * nv]; // make space for splitting... - -#ifdef __CUDACC__ - float hzv[2 * nv]; - float hwv[2 * nv]; - float hptv2[2 * nv]; - int32_t hnn[2 * nv]; - uint16_t hind[2 * nv]; - - zv = hzv; - wv = hwv; - ptv2 = hptv2; - nn = hnn; - ind = hind; -#else - zv = onGPU_d.view().zv().data(); - wv = onGPU_d.view().wv().data(); - ptv2 = onGPU_d.view().ptv2().data(); - nn = onGPU_d.view().ndof().data(); - ind = onGPU_d.view().sortInd().data(); -#endif - -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); -#else - memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float)); -#endif - - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - -#ifdef __CUDACC__ - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 50.f); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 50.f); - nv = onGPU_d.view().nvFinal(); - memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float)); -#endif - - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - -#ifdef __CUDACC__ - // one vertex per block!!! - cms::cuda::launch(gpuVertexFinder::splitVerticesKernel, {1024, 64}, onGPU_d.view(), ws_d.view(), 9.f); - cudaCheck(cudaMemcpy(&nv, &ws_d.view().nvIntermediate(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::splitVertices(onGPU_d.view(), ws_d.view(), 9.f); - nv = ws_d.view().nvIntermediate(); -#endif - std::cout << "after split " << nv << std::endl; - -#ifdef __CUDACC__ - cms::cuda::launch(gpuVertexFinder::fitVerticesKernel, {1, 1024 - 256}, onGPU_d.view(), ws_d.view(), 5000.f); - cudaCheck(cudaGetLastError()); - - cms::cuda::launch(gpuVertexFinder::sortByPt2Kernel, {1, 256}, onGPU_d.view(), ws_d.view()); - cudaCheck(cudaGetLastError()); - cudaCheck(cudaMemcpy(&nv, &onGPU_d.view().nvFinal(), sizeof(uint32_t), cudaMemcpyDeviceToHost)); -#else - gpuVertexFinder::fitVertices(onGPU_d.view(), ws_d.view(), 5000.f); - gpuVertexFinder::sortByPt2(onGPU_d.view(), ws_d.view()); - nv = onGPU_d.view().nvFinal(); - memcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float)); -#endif - - if (nv == 0) { - std::cout << "NO VERTICES???" << std::endl; - continue; - } - -#ifdef __CUDACC__ - cudaCheck(cudaMemcpy(zv, onGPU_d.view().zv().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(wv, onGPU_d.view().wv().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(chi2, onGPU_d.view().chi2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(ptv2, onGPU_d.view().ptv2().data(), nv * sizeof(float), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(nn, onGPU_d.view().ndof().data(), nv * sizeof(int32_t), cudaMemcpyDeviceToHost)); - cudaCheck(cudaMemcpy(ind, onGPU_d.view().sortInd().data(), nv * sizeof(uint16_t), cudaMemcpyDeviceToHost)); -#endif - for (auto j = 0U; j < nv; ++j) - if (nn[j] > 0) - chi2[j] /= float(nn[j]); - { - auto mx = std::minmax_element(chi2, chi2 + nv); - std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl; - } - - { - auto mx = std::minmax_element(wv, wv + nv); - std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl; - } - - { - auto mx = std::minmax_element(ptv2, ptv2 + nv); - std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl; - std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv - 1]] << " at " << ind[0] << ' ' - << ind[nv - 1] << std::endl; - } - - float dd[nv]; - for (auto kv = 0U; kv < nv; ++kv) { - auto zr = zv[kv]; - auto md = 500.0f; - for (auto zs : ev.ztrack) { - auto d = std::abs(zr - zs); - md = std::min(d, md); - } - dd[kv] = md; - } - if (i == 6) { - for (auto d : dd) - std::cout << d << ' '; - std::cout << std::endl; - } - auto mx = std::minmax_element(dd, dd + nv); - float rms = 0; - for (auto d : dd) - rms += d * d; - rms = std::sqrt(rms) / (nv - 1); - std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl; - - } // loop on events - } // lopp on ave vert - - return 0; -} diff --git a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp b/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp deleted file mode 100644 index a7906fe0d03f5..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/cpuVertexFinder_t.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "VertexFinder_t.h" diff --git a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu b/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu deleted file mode 100644 index a7906fe0d03f5..0000000000000 --- a/RecoVertex/PixelVertexFinding/test/gpuVertexFinder_t.cu +++ /dev/null @@ -1 +0,0 @@ -#include "VertexFinder_t.h"