diff --git a/CUDADataFormats/Track/BuildFile.xml b/CUDADataFormats/Track/BuildFile.xml new file mode 100644 index 0000000000000..e3f9a0910bbd8 --- /dev/null +++ b/CUDADataFormats/Track/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h new file mode 100644 index 0000000000000..3ee5af80353dd --- /dev/null +++ b/CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h @@ -0,0 +1,9 @@ +#ifndef CUDADataFormats_Track_PixelTrackHeterogeneous_h +#define CUDADataFormats_Track_PixelTrackHeterogeneous_h + +#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" +#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h" + +using PixelTrackHeterogeneous = HeterogeneousSoA; + +#endif // #ifndef CUDADataFormats_Track_PixelTrackHeterogeneous_h \ No newline at end of file diff --git a/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h new file mode 100644 index 0000000000000..bd39f3c4d3bfe --- /dev/null +++ b/CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h @@ -0,0 +1,73 @@ +#ifndef CUDADataFormats_Track_TrackHeterogeneousT_H +#define CUDADataFormats_Track_TrackHeterogeneousT_H + +#include "CUDADataFormats/Track/interface/TrajectoryStateSoAT.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" + +#include "CUDADataFormats/Common/interface/HeterogeneousSoA.h" + +namespace pixelTrack { + enum class Quality : uint8_t { bad = 0, dup, loose, strict, tight, highPurity }; +} + +template +class TrackSoAHeterogeneousT { +public: + static constexpr int32_t stride() { return S; } + + using Quality = pixelTrack::Quality; + using hindex_type = uint32_t; + using HitContainer = cms::cuda::OneToManyAssoc; + + // Always check quality is at least loose! + // CUDA does not support enums in __lgc ... +private: + eigenSoA::ScalarSoA quality_; + +public: + constexpr Quality quality(int32_t i) const { return (Quality)(quality_(i)); } + constexpr Quality &quality(int32_t i) { return (Quality &)(quality_(i)); } + constexpr Quality const *qualityData() const { return (Quality const *)(quality_.data()); } + constexpr Quality *qualityData() { return (Quality *)(quality_.data()); } + + // this is chi2/ndof as not necessarely all hits are used in the fit + eigenSoA::ScalarSoA chi2; + + constexpr int nHits(int i) const { return detIndices.size(i); } + + // State at the Beam spot + // phi,tip,1/pt,cotan(theta),zip + TrajectoryStateSoAT stateAtBS; + eigenSoA::ScalarSoA eta; + eigenSoA::ScalarSoA pt; + constexpr float charge(int32_t i) const { return std::copysign(1.f, stateAtBS.state(i)(2)); } + constexpr float phi(int32_t i) const { return stateAtBS.state(i)(0); } + constexpr float tip(int32_t i) const { return stateAtBS.state(i)(1); } + constexpr float zip(int32_t i) const { return stateAtBS.state(i)(4); } + + // state at the detector of the outermost hit + // representation to be decided... + // not yet filled on GPU + // TrajectoryStateSoA stateAtOuterDet; + + HitContainer hitIndices; + HitContainer detIndices; +}; + +namespace pixelTrack { + +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxNumber() { return 2 * 1024; } +#else + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxNumber() { return 32 * 1024; } +#endif + + using TrackSoA = TrackSoAHeterogeneousT; + using TrajectoryState = TrajectoryStateSoAT; + using HitContainer = TrackSoA::HitContainer; + +} // namespace pixelTrack + +#endif // CUDADataFormats_Track_TrackHeterogeneousT_H diff --git a/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h b/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h new file mode 100644 index 0000000000000..64fcd573a6991 --- /dev/null +++ b/CUDADataFormats/Track/interface/TrajectoryStateSoAT.h @@ -0,0 +1,59 @@ +#ifndef CUDADataFormats_Track_TrajectoryStateSOAT_H +#define CUDADataFormats_Track_TrajectoryStateSOAT_H + +#include +#include "HeterogeneousCore/CUDAUtilities/interface/eigenSoA.h" + +template +struct TrajectoryStateSoAT { + using Vector5f = Eigen::Matrix; + using Vector15f = Eigen::Matrix; + + using Vector5d = Eigen::Matrix; + using Matrix5d = Eigen::Matrix; + + static constexpr int32_t stride() { return S; } + + eigenSoA::MatrixSoA state; + eigenSoA::MatrixSoA covariance; + + template + __host__ __device__ inline void copyFromCircle( + V3 const& cp, M3 const& ccov, V2 const& lp, M2 const& lcov, float b, int32_t i) { + state(i) << cp.template cast(), lp.template cast(); + state(i)(2) *= b; + auto cov = covariance(i); + cov(0) = ccov(0, 0); + cov(1) = ccov(0, 1); + cov(2) = b * float(ccov(0, 2)); + cov(4) = cov(3) = 0; + cov(5) = ccov(1, 1); + cov(6) = b * float(ccov(1, 2)); + cov(8) = cov(7) = 0; + cov(9) = b * b * float(ccov(2, 2)); + cov(11) = cov(10) = 0; + cov(12) = lcov(0, 0); + cov(13) = lcov(0, 1); + cov(14) = lcov(1, 1); + } + + template + __host__ __device__ inline void copyFromDense(V5 const& v, M5 const& cov, int32_t i) { + state(i) = v.template cast(); + for (int j = 0, ind = 0; j < 5; ++j) + for (auto k = j; k < 5; ++k) + covariance(i)(ind++) = cov(j, k); + } + + template + __host__ __device__ inline void copyToDense(V5& v, M5& cov, int32_t i) const { + v = state(i).template cast(); + for (int j = 0, ind = 0; j < 5; ++j) { + cov(j, j) = covariance(i)(ind++); + for (auto k = j + 1; k < 5; ++k) + cov(k, j) = cov(j, k) = covariance(i)(ind++); + } + } +}; + +#endif // CUDADataFormats_Track_TrajectoryStateSOAT_H diff --git a/CUDADataFormats/Track/src/classes.h b/CUDADataFormats/Track/src/classes.h new file mode 100644 index 0000000000000..97c116f6c88d3 --- /dev/null +++ b/CUDADataFormats/Track/src/classes.h @@ -0,0 +1,9 @@ +#ifndef CUDADataFormats_Track_src_classes_h +#define CUDADataFormats_Track_src_classes_h + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h" +#include "DataFormats/Common/interface/Wrapper.h" + +#endif // CUDADataFormats_Track_src_classes_h diff --git a/CUDADataFormats/Track/src/classes_def.xml b/CUDADataFormats/Track/src/classes_def.xml new file mode 100644 index 0000000000000..9c80ae91baf29 --- /dev/null +++ b/CUDADataFormats/Track/src/classes_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/CUDADataFormats/Track/test/BuildFile.xml b/CUDADataFormats/Track/test/BuildFile.xml new file mode 100644 index 0000000000000..598b345d4709d --- /dev/null +++ b/CUDADataFormats/Track/test/BuildFile.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp new file mode 100644 index 0000000000000..d6ff539a642b0 --- /dev/null +++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cpp @@ -0,0 +1 @@ +#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu new file mode 100644 index 0000000000000..d6ff539a642b0 --- /dev/null +++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.cu @@ -0,0 +1 @@ +#include "TrajectoryStateSOA_t.h" diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h new file mode 100644 index 0000000000000..97b88873c2613 --- /dev/null +++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h @@ -0,0 +1,75 @@ +#include "CUDADataFormats/Track/interface/TrajectoryStateSoAT.h" + +using Vector5d = Eigen::Matrix; +using Matrix5d = Eigen::Matrix; + +__host__ __device__ Matrix5d loadCov(Vector5d const& e) { + Matrix5d cov; + for (int i = 0; i < 5; ++i) + cov(i, i) = e(i) * e(i); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < i; ++j) { + double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j)); // this makes the matrix pos defined + cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v; + cov(j, i) = cov(i, j); + } + } + return cov; +} + +using TS = TrajectoryStateSoAT<128>; + +__global__ void testTSSoA(TS* pts, int n) { + assert(n <= 128); + + Vector5d par0; + par0 << 0.2, 0.1, 3.5, 0.8, 0.1; + Vector5d e0; + e0 << 0.01, 0.01, 0.035, -0.03, -0.01; + auto cov0 = loadCov(e0); + + TS& ts = *pts; + + int first = threadIdx.x + blockIdx.x * blockDim.x; + + for (int i = first; i < n; i += blockDim.x * gridDim.x) { + ts.copyFromDense(par0, cov0, i); + Vector5d par1; + Matrix5d cov1; + ts.copyToDense(par1, cov1, i); + Vector5d delV = par1 - par0; + Matrix5d delM = cov1 - cov0; + for (int j = 0; j < 5; ++j) { + assert(std::abs(delV(j)) < 1.e-5); + for (auto k = j; k < 5; ++k) { + assert(cov0(k, j) == cov0(j, k)); + assert(cov1(k, j) == cov1(j, k)); + assert(std::abs(delM(k, j)) < 1.e-5); + } + } + } +} + +#ifdef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#endif + +int main() { +#ifdef __CUDACC__ + cms::cudatest::requireDevices(); +#endif + + TS ts; + +#ifdef __CUDACC__ + TS* ts_d; + cudaCheck(cudaMalloc(&ts_d, sizeof(TS))); + testTSSoA<<<1, 64>>>(ts_d, 128); + cudaCheck(cudaGetLastError()); + cudaCheck(cudaMemcpy(&ts, ts_d, sizeof(TS), cudaMemcpyDefault)); + cudaCheck(cudaDeviceSynchronize()); +#else + testTSSoA(&ts, 128); +#endif +} diff --git a/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py b/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py index 15ceaf93ed20a..cff85e56d94f7 100644 --- a/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py +++ b/DQM/TrackingMonitorClient/python/pixelTrackingEffFromHitPattern_cff.py @@ -21,7 +21,10 @@ def _layers(suffix, quant, histoPostfix): ] pixelTrackingEffFromHitPattern = DQMEDHarvester("DQMGenericClient", - subDirs = cms.untracked.vstring("Tracking/PixelTrackParameters/HitEffFromHitPattern*"), + subDirs = cms.untracked.vstring("Tracking/PixelTrackParameters/pixelTracks/HitEffFromHitPattern*", + "Tracking/PixelTrackParameters/dzPV0p1/HitEffFromHitPattern*", + "Tracking/PixelTrackParameters/pt_0to1/HitEffFromHitPattern*", + "Tracking/PixelTrackParameters/pt_1/HitEffFromHitPattern*"), efficiency = cms.vstring( _layers("PU", "GoodNumVertices", "") + _layers("BX", "BX", "VsBX") + diff --git a/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py b/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py index a075f671f05ce..d5deba78b46c8 100644 --- a/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py +++ b/DQM/TrackingMonitorSource/python/pixelTracksMonitoring_cff.py @@ -1,23 +1,77 @@ import FWCore.ParameterSet.Config as cms import DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi -pixelTracksMonitoring = DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi.TrackerCollisionTrackMon.clone() -pixelTracksMonitoring.FolderName = 'Tracking/PixelTrackParameters' -pixelTracksMonitoring.TrackProducer = 'pixelTracks' -pixelTracksMonitoring.allTrackProducer = 'pixelTracks' -pixelTracksMonitoring.beamSpot = 'offlineBeamSpot' -pixelTracksMonitoring.primaryVertex = 'pixelVertices' -pixelTracksMonitoring.pvNDOF = 1 -pixelTracksMonitoring.doAllPlots = True -pixelTracksMonitoring.doLumiAnalysis = True -pixelTracksMonitoring.doProfilesVsLS = True -pixelTracksMonitoring.doDCAPlots = True -pixelTracksMonitoring.doProfilesVsLS = True -pixelTracksMonitoring.doPlotsVsGoodPVtx = True -pixelTracksMonitoring.doEffFromHitPatternVsPU = False -pixelTracksMonitoring.doEffFromHitPatternVsBX = False -pixelTracksMonitoring.doEffFromHitPatternVsLUMI = False -pixelTracksMonitoring.doPlotsVsGoodPVtx = True -pixelTracksMonitoring.doPlotsVsLUMI = True -pixelTracksMonitoring.doPlotsVsBX = True +pixelTracksMonitor = DQM.TrackingMonitor.TrackerCollisionTrackingMonitor_cfi.TrackerCollisionTrackMon.clone() +pixelTracksMonitor.FolderName = 'Tracking/PixelTrackParameters/pixelTracks' +pixelTracksMonitor.TrackProducer = 'pixelTracks' +pixelTracksMonitor.allTrackProducer = 'pixelTracks' +pixelTracksMonitor.beamSpot = 'offlineBeamSpot' +pixelTracksMonitor.primaryVertex = 'pixelVertices' +pixelTracksMonitor.pvNDOF = 1 +pixelTracksMonitor.doAllPlots = True +pixelTracksMonitor.doLumiAnalysis = True +pixelTracksMonitor.doProfilesVsLS = True +pixelTracksMonitor.doDCAPlots = True +pixelTracksMonitor.doProfilesVsLS = True +pixelTracksMonitor.doPlotsVsGoodPVtx = True +pixelTracksMonitor.doEffFromHitPatternVsPU = False +pixelTracksMonitor.doEffFromHitPatternVsBX = False +pixelTracksMonitor.doEffFromHitPatternVsLUMI = False +pixelTracksMonitor.doPlotsVsGoodPVtx = True +pixelTracksMonitor.doPlotsVsLUMI = True +pixelTracksMonitor.doPlotsVsBX = True +_trackSelector = cms.EDFilter('TrackSelector', + src = cms.InputTag('pixelTracks'), + cut = cms.string("") +) + +pixelTracksPt0to1 = _trackSelector.clone(cut = "pt >= 0 & pt < 1 ") +pixelTracksPt1 = _trackSelector.clone(cut = "pt >= 1 ") +from DQM.TrackingMonitorSource.TrackCollections2monitor_cff import highPurityPV0p1 as _highPurityPV0p1 +pixelTracksPV0p1 = _highPurityPV0p1.clone( + src = "pixelTracks", + quality = "", + vertexTag = "goodPixelVertices" +) + +pixelTracksMonitorPt0to1 = pixelTracksMonitor.clone( + TrackProducer = "pixelTracksPt0to1", + FolderName = "Tracking/PixelTrackParameters/pt_0to1" +) +pixelTracksMonitorPt1 = pixelTracksMonitor.clone( + TrackProducer = "pixelTracksPt1", + FolderName = "Tracking/PixelTrackParameters/pt_1" +) +pixelTracksMonitorPV0p1 = pixelTracksMonitor.clone( + TrackProducer = "pixelTracksPV0p1", + FolderName = "Tracking/PixelTrackParameters/dzPV0p1" +) + + +from CommonTools.ParticleFlow.goodOfflinePrimaryVertices_cfi import goodOfflinePrimaryVertices as _goodOfflinePrimaryVertices +goodPixelVertices = _goodOfflinePrimaryVertices.clone( + src = "pixelVertices", +) + +from DQM.TrackingMonitor.primaryVertexResolution_cfi import primaryVertexResolution as _primaryVertexResolution +pixelVertexResolution = _primaryVertexResolution.clone( + vertexSrc = "goodPixelVertices", + rootFolder = "OfflinePixelPV/Resolution", +) + +pixelTracksMonitoringTask = cms.Task( + goodPixelVertices, + pixelTracksPt0to1, + pixelTracksPt1, + pixelTracksPV0p1, +) + +pixelTracksMonitoring = cms.Sequence( + pixelTracksMonitor + + pixelTracksMonitorPt0to1 + + pixelTracksMonitorPt1 + + pixelTracksMonitorPV0p1 + + pixelVertexResolution, + pixelTracksMonitoringTask +) diff --git a/RecoPixelVertexing/Configuration/python/customizePixelTracksForProfiling.py b/RecoPixelVertexing/Configuration/python/customizePixelTracksForProfiling.py deleted file mode 100644 index 4713b64e5e48a..0000000000000 --- a/RecoPixelVertexing/Configuration/python/customizePixelTracksForProfiling.py +++ /dev/null @@ -1,15 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -def customizePixelTracksForProfiling(process): - process.out = cms.OutputModule("AsciiOutputModule", - outputCommands = cms.untracked.vstring( - "keep *_pixelTracks_*_*", - ), - verbosity = cms.untracked.uint32(0), - ) - - process.outPath = cms.EndPath(process.out) - - process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.outPath) - - return process diff --git a/RecoPixelVertexing/Configuration/python/customizePixelTracksSoAonCPU.py b/RecoPixelVertexing/Configuration/python/customizePixelTracksSoAonCPU.py new file mode 100644 index 0000000000000..24cc16e02b463 --- /dev/null +++ b/RecoPixelVertexing/Configuration/python/customizePixelTracksSoAonCPU.py @@ -0,0 +1,53 @@ +import FWCore.ParameterSet.Config as cms + +def customizePixelTracksSoAonCPU(process): + + process.CUDAService = cms.Service('CUDAService', + enabled = cms.untracked.bool(False) + ) + + # ensure the same results when running on GPU (which supports only the 'HLT' payload) and CPU + process.siPixelClustersPreSplitting.cpu.payloadType = cms.string('HLT') + + from RecoLocalTracker.SiPixelRecHits.siPixelRecHitSoAFromLegacy_cfi import siPixelRecHitSoAFromLegacy + process.siPixelRecHitsPreSplitting = siPixelRecHitSoAFromLegacy.clone( + convertToLegacy = True + ) + + from RecoPixelVertexing.PixelTriplets.caHitNtupletCUDA_cfi import caHitNtupletCUDA + process.pixelTrackSoA = caHitNtupletCUDA.clone( + onGPU = False, + pixelRecHitSrc = 'siPixelRecHitsPreSplitting' + ) + + from RecoPixelVertexing.PixelTrackFitting.pixelTrackProducerFromSoA_cfi import pixelTrackProducerFromSoA + process.pixelTracks = pixelTrackProducerFromSoA.clone( + pixelRecHitLegacySrc = 'siPixelRecHitsPreSplitting' + ) + + process.reconstruction_step += process.siPixelRecHitsPreSplitting + process.pixelTrackSoA + + return process + + +def customizePixelTracksForTriplets(process): + + from HLTrigger.Configuration.common import producers_by_type + for producer in producers_by_type(process, 'CAHitNtupletCUDA'): + producer.includeJumpingForwardDoublets = True + producer.minHitsPerNtuplet = 3 + + return process + + +def customizePixelTracksSoAonCPUForProfiling(process): + + process = customizePixelTracksSoAonCPU(process) + + process.siPixelRecHitSoAFromLegacy.convertToLegacy = False + + process.TkSoA = cms.Path(process.offlineBeamSpot + process.siPixelDigis + process.siPixelClustersPreSplitting + process.siPixelRecHitSoAFromLegacy + process.pixelTrackSoA) + + process.schedule = cms.Schedule(process.TkSoA) + + return process diff --git a/RecoPixelVertexing/PixelTrackFitting/BuildFile.xml b/RecoPixelVertexing/PixelTrackFitting/BuildFile.xml index e6fc938dc25a7..a589aad036996 100644 --- a/RecoPixelVertexing/PixelTrackFitting/BuildFile.xml +++ b/RecoPixelVertexing/PixelTrackFitting/BuildFile.xml @@ -1,3 +1,5 @@ + + diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h b/RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h new file mode 100644 index 0000000000000..86fe6a278777c --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h @@ -0,0 +1,606 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h +#define RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h + +#include + +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h" + +namespace brokenline { + + //!< Karimäki's parameters: (phi, d, k=1/R) + /*!< covariance matrix: \n + |cov(phi,phi)|cov( d ,phi)|cov( k ,phi)| \n + |cov(phi, d )|cov( d , d )|cov( k , d )| \n + |cov(phi, k )|cov( d , k )|cov( k , k )| \n + as defined in Karimäki V., 1990, Effective circle fitting for particle trajectories, + Nucl. Instr. and Meth. A305 (1991) 187. + */ + using karimaki_circle_fit = riemannFit::CircleFit; + + /*! + \brief data needed for the Broken Line fit procedure. + */ + template + struct PreparedBrokenLineData { + int qCharge; //!< particle charge + riemannFit::Matrix2xNd radii; //!< xy data in the system in which the pre-fitted center is the origin + riemannFit::VectorNd sTransverse; //!< total distance traveled in the transverse plane + // starting from the pre-fitted closest approach + riemannFit::VectorNd sTotal; //!< total distance traveled (three-dimensional) + riemannFit::VectorNd zInSZplane; //!< orthogonal coordinate to the pre-fitted line in the sz plane + riemannFit::VectorNd varBeta; //!< kink angles in the SZ plane + }; + + /*! + \brief Computes the Coulomb multiple scattering variance of the planar angle. + + \param length length of the track in the material. + \param bField magnetic field in Gev/cm/c. + \param radius radius of curvature (needed to evaluate p). + \param layer denotes which of the four layers of the detector is the endpoint of the + * multiple scattered track. For example, if Layer=3, then the particle has + * just gone through the material between the second and the third layer. + + \todo add another Layer variable to identify also the start point of the track, + * so if there are missing hits or multiple hits, the part of the detector that + * the particle has traversed can be exactly identified. + + \warning the formula used here assumes beta=1, and so neglects the dependence + * of theta_0 on the mass of the particle at fixed momentum. + + \return the variance of the planar angle ((theta_0)^2 /3). + */ + __host__ __device__ inline double multScatt( + const double& length, const double bField, const double radius, int layer, double slope) { + // limit R to 20GeV... + auto pt2 = std::min(20., bField * radius); + pt2 *= pt2; + constexpr double inv_X0 = 0.06 / 16.; //!< inverse of radiation length of the material in cm + //if(Layer==1) XXI_0=0.06/16.; + // else XXI_0=0.06/16.; + //XX_0*=1; + + //! number between 1/3 (uniform material) and 1 (thin scatterer) to be manually tuned + constexpr double geometry_factor = 0.7; + constexpr double fact = geometry_factor * riemannFit::sqr(13.6 / 1000.); + return fact / (pt2 * (1. + riemannFit::sqr(slope))) * (std::abs(length) * inv_X0) * + riemannFit::sqr(1. + 0.038 * log(std::abs(length) * inv_X0)); + } + + /*! + \brief Computes the 2D rotation matrix that transforms the line y=slope*x into the line y=0. + + \param slope tangent of the angle of rotation. + + \return 2D rotation matrix. + */ + __host__ __device__ inline riemannFit::Matrix2d rotationMatrix(double slope) { + riemannFit::Matrix2d rot; + rot(0, 0) = 1. / sqrt(1. + riemannFit::sqr(slope)); + rot(0, 1) = slope * rot(0, 0); + rot(1, 0) = -rot(0, 1); + rot(1, 1) = rot(0, 0); + return rot; + } + + /*! + \brief Changes the Karimäki parameters (and consequently their covariance matrix) under a + * translation of the coordinate system, such that the old origin has coordinates (x0,y0) + * in the new coordinate system. The formulas are taken from Karimäki V., 1990, Effective + * circle fitting for particle trajectories, Nucl. Instr. and Meth. A305 (1991) 187. + + \param circle circle fit in the old coordinate system. circle.par(0) is phi, circle.par(1) is d and circle.par(2) is rho. + \param x0 x coordinate of the translation vector. + \param y0 y coordinate of the translation vector. + \param jacobian passed by reference in order to save stack. + */ + __host__ __device__ inline void translateKarimaki(karimaki_circle_fit& circle, + double x0, + double y0, + riemannFit::Matrix3d& jacobian) { + // Avoid multiple access to the circle.par vector. + using scalar = std::remove_reference::type; + scalar phi = circle.par(0); + scalar dee = circle.par(1); + scalar rho = circle.par(2); + + // Avoid repeated trig. computations + scalar sinPhi = sin(phi); + scalar cosPhi = cos(phi); + + // Intermediate computations for the circle parameters + scalar deltaPara = x0 * cosPhi + y0 * sinPhi; + scalar deltaOrth = x0 * sinPhi - y0 * cosPhi + dee; + scalar tempSmallU = 1 + rho * dee; + scalar tempC = -rho * y0 + tempSmallU * cosPhi; + scalar tempB = rho * x0 + tempSmallU * sinPhi; + scalar tempA = 2. * deltaOrth + rho * (riemannFit::sqr(deltaOrth) + riemannFit::sqr(deltaPara)); + scalar tempU = sqrt(1. + rho * tempA); + + // Intermediate computations for the error matrix transform + scalar xi = 1. / (riemannFit::sqr(tempB) + riemannFit::sqr(tempC)); + scalar tempV = 1. + rho * deltaOrth; + scalar lambda = (0.5 * tempA) / (riemannFit::sqr(1. + tempU) * tempU); + scalar mu = 1. / (tempU * (1. + tempU)) + rho * lambda; + scalar zeta = riemannFit::sqr(deltaOrth) + riemannFit::sqr(deltaPara); + jacobian << xi * tempSmallU * tempV, -xi * riemannFit::sqr(rho) * deltaOrth, xi * deltaPara, + 2. * mu * tempSmallU * deltaPara, 2. * mu * tempV, mu * zeta - lambda * tempA, 0, 0, 1.; + + // translated circle parameters + // phi + circle.par(0) = atan2(tempB, tempC); + // d + circle.par(1) = tempA / (1 + tempU); + // rho after translation. It is invariant, so noop + // circle.par(2)= rho; + + // translated error matrix + circle.cov = jacobian * circle.cov * jacobian.transpose(); + } + + /*! + \brief Computes the data needed for the Broken Line fit procedure that are mainly common for the circle and the line fit. + + \param hits hits coordinates. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param results PreparedBrokenLineData to be filled (see description of PreparedBrokenLineData). + */ + template + __host__ __device__ inline void prepareBrokenLineData(const M3xN& hits, + const V4& fast_fit, + const double bField, + PreparedBrokenLineData& results) { + riemannFit::Vector2d dVec; + riemannFit::Vector2d eVec; + + dVec = hits.block(0, 1, 2, 1) - hits.block(0, 0, 2, 1); + eVec = hits.block(0, n - 1, 2, 1) - hits.block(0, n - 2, 2, 1); + results.qCharge = riemannFit::cross2D(dVec, eVec) > 0 ? -1 : 1; + + const double slope = -results.qCharge / fast_fit(3); + + riemannFit::Matrix2d rotMat = rotationMatrix(slope); + + // calculate radii and s + results.radii = hits.block(0, 0, 2, n) - fast_fit.head(2) * riemannFit::MatrixXd::Constant(1, n, 1); + eVec = -fast_fit(2) * fast_fit.head(2) / fast_fit.head(2).norm(); + for (u_int i = 0; i < n; i++) { + dVec = results.radii.block(0, i, 2, 1); + results.sTransverse(i) = results.qCharge * fast_fit(2) * + atan2(riemannFit::cross2D(dVec, eVec), dVec.dot(eVec)); // calculates the arc length + } + riemannFit::VectorNd zVec = hits.block(2, 0, 1, n).transpose(); + + //calculate sTotal and zVec + riemannFit::Matrix2xNd pointsSZ = riemannFit::Matrix2xNd::Zero(); + for (u_int i = 0; i < n; i++) { + pointsSZ(0, i) = results.sTransverse(i); + pointsSZ(1, i) = zVec(i); + pointsSZ.block(0, i, 2, 1) = rotMat * pointsSZ.block(0, i, 2, 1); + } + results.sTotal = pointsSZ.block(0, 0, 1, n).transpose(); + results.zInSZplane = pointsSZ.block(1, 0, 1, n).transpose(); + + //calculate varBeta + results.varBeta(0) = results.varBeta(n - 1) = 0; + for (u_int i = 1; i < n - 1; i++) { + results.varBeta(i) = multScatt(results.sTotal(i + 1) - results.sTotal(i), bField, fast_fit(2), i + 2, slope) + + multScatt(results.sTotal(i) - results.sTotal(i - 1), bField, fast_fit(2), i + 1, slope); + } + } + + /*! + \brief Computes the n-by-n band matrix obtained minimizing the Broken Line's cost function w.r.t u. + * This is the whole matrix in the case of the line fit and the main n-by-n block in the case + * of the circle fit. + + \param weights weights of the first part of the cost function, the one with the measurements + * and not the angles (\sum_{i=1}^n w*(y_i-u_i)^2). + \param sTotal total distance traveled by the particle from the pre-fitted closest approach. + \param varBeta kink angles' variance. + + \return the n-by-n matrix of the linear system + */ + template + __host__ __device__ inline riemannFit::MatrixNd matrixC_u(const riemannFit::VectorNd& weights, + const riemannFit::VectorNd& sTotal, + const riemannFit::VectorNd& varBeta) { + riemannFit::MatrixNd c_uMat = riemannFit::MatrixNd::Zero(); + for (u_int i = 0; i < n; i++) { + c_uMat(i, i) = weights(i); + if (i > 1) + c_uMat(i, i) += 1. / (varBeta(i - 1) * riemannFit::sqr(sTotal(i) - sTotal(i - 1))); + if (i > 0 && i < n - 1) + c_uMat(i, i) += + (1. / varBeta(i)) * riemannFit::sqr((sTotal(i + 1) - sTotal(i - 1)) / + ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1)))); + if (i < n - 2) + c_uMat(i, i) += 1. / (varBeta(i + 1) * riemannFit::sqr(sTotal(i + 1) - sTotal(i))); + + if (i > 0 && i < n - 1) + c_uMat(i, i + 1) = + 1. / (varBeta(i) * (sTotal(i + 1) - sTotal(i))) * + (-(sTotal(i + 1) - sTotal(i - 1)) / ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1)))); + if (i < n - 2) + c_uMat(i, i + 1) += + 1. / (varBeta(i + 1) * (sTotal(i + 1) - sTotal(i))) * + (-(sTotal(i + 2) - sTotal(i)) / ((sTotal(i + 2) - sTotal(i + 1)) * (sTotal(i + 1) - sTotal(i)))); + + if (i < n - 2) + c_uMat(i, i + 2) = 1. / (varBeta(i + 1) * (sTotal(i + 2) - sTotal(i + 1)) * (sTotal(i + 1) - sTotal(i))); + + c_uMat(i, i) *= 0.5; + } + return c_uMat + c_uMat.transpose(); + } + + /*! + \brief A very fast helix fit. + + \param hits the measured hits. + + \return (X0,Y0,R,tan(theta)). + + \warning sign of theta is (intentionally, for now) mistaken for negative charges. + */ + + template + __host__ __device__ inline void fastFit(const M3xN& hits, V4& result) { + constexpr uint32_t n = M3xN::ColsAtCompileTime; + + const riemannFit::Vector2d a = hits.block(0, n / 2, 2, 1) - hits.block(0, 0, 2, 1); + const riemannFit::Vector2d b = hits.block(0, n - 1, 2, 1) - hits.block(0, n / 2, 2, 1); + const riemannFit::Vector2d c = hits.block(0, 0, 2, 1) - hits.block(0, n - 1, 2, 1); + + auto tmp = 0.5 / riemannFit::cross2D(c, a); + result(0) = hits(0, 0) - (a(1) * c.squaredNorm() + c(1) * a.squaredNorm()) * tmp; + result(1) = hits(1, 0) + (a(0) * c.squaredNorm() + c(0) * a.squaredNorm()) * tmp; + // check Wikipedia for these formulas + + result(2) = sqrt(a.squaredNorm() * b.squaredNorm() * c.squaredNorm()) / (2. * std::abs(riemannFit::cross2D(b, a))); + // Using Math Olympiad's formula R=abc/(4A) + + const riemannFit::Vector2d d = hits.block(0, 0, 2, 1) - result.head(2); + const riemannFit::Vector2d e = hits.block(0, n - 1, 2, 1) - result.head(2); + + result(3) = result(2) * atan2(riemannFit::cross2D(d, e), d.dot(e)) / (hits(2, n - 1) - hits(2, 0)); + // ds/dz slope between last and first point + } + + /*! + \brief Performs the Broken Line fit in the curved track case (that is, the fit + * parameters are the interceptions u and the curvature correction \Delta\kappa). + + \param hits hits coordinates. + \param hits_cov hits covariance matrix. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param data PreparedBrokenLineData. + \param circle_results struct to be filled with the results in this form: + -par parameter of the line in this form: (phi, d, k); \n + -cov covariance matrix of the fitted parameter; \n + -chi2 value of the cost function in the minimum. + + \details The function implements the steps 2 and 3 of the Broken Line fit + * with the curvature correction.\n + * The step 2 is the least square fit, done by imposing the minimum constraint on + * the cost function and solving the consequent linear system. It determines the + * fitted parameters u and \Delta\kappa and their covariance matrix. + * The step 3 is the correction of the fast pre-fitted parameters for the innermost + * part of the track. It is first done in a comfortable coordinate system (the one + * in which the first hit is the origin) and then the parameters and their + * covariance matrix are transformed to the original coordinate system. + */ + template + __host__ __device__ inline void circleFit(const M3xN& hits, + const M6xN& hits_ge, + const V4& fast_fit, + const double bField, + PreparedBrokenLineData& data, + karimaki_circle_fit& circle_results) { + circle_results.qCharge = data.qCharge; + auto& radii = data.radii; + const auto& sTransverse = data.sTransverse; + const auto& sTotal = data.sTotal; + auto& zInSZplane = data.zInSZplane; + auto& varBeta = data.varBeta; + const double slope = -circle_results.qCharge / fast_fit(3); + varBeta *= 1. + riemannFit::sqr(slope); // the kink angles are projected! + + for (u_int i = 0; i < n; i++) { + zInSZplane(i) = radii.block(0, i, 2, 1).norm() - fast_fit(2); + } + + riemannFit::Matrix2d vMat; // covariance matrix + riemannFit::VectorNd weightsVec; // weights + riemannFit::Matrix2d rotMat; // rotation matrix point by point + for (u_int i = 0; i < n; i++) { + vMat(0, 0) = hits_ge.col(i)[0]; // x errors + vMat(0, 1) = vMat(1, 0) = hits_ge.col(i)[1]; // cov_xy + vMat(1, 1) = hits_ge.col(i)[2]; // y errors + rotMat = rotationMatrix(-radii(0, i) / radii(1, i)); + weightsVec(i) = + 1. / ((rotMat * vMat * rotMat.transpose())(1, 1)); // compute the orthogonal weight point by point + } + + riemannFit::VectorNplusONEd r_uVec; + r_uVec(n) = 0; + for (u_int i = 0; i < n; i++) { + r_uVec(i) = weightsVec(i) * zInSZplane(i); + } + + riemannFit::MatrixNplusONEd c_uMat; + c_uMat.block(0, 0, n, n) = matrixC_u(weightsVec, sTransverse, varBeta); + c_uMat(n, n) = 0; + //add the border to the c_uMat matrix + for (u_int i = 0; i < n; i++) { + c_uMat(i, n) = 0; + if (i > 0 && i < n - 1) { + c_uMat(i, n) += + -(sTransverse(i + 1) - sTransverse(i - 1)) * (sTransverse(i + 1) - sTransverse(i - 1)) / + (2. * varBeta(i) * (sTransverse(i + 1) - sTransverse(i)) * (sTransverse(i) - sTransverse(i - 1))); + } + if (i > 1) { + c_uMat(i, n) += + (sTransverse(i) - sTransverse(i - 2)) / (2. * varBeta(i - 1) * (sTransverse(i) - sTransverse(i - 1))); + } + if (i < n - 2) { + c_uMat(i, n) += + (sTransverse(i + 2) - sTransverse(i)) / (2. * varBeta(i + 1) * (sTransverse(i + 1) - sTransverse(i))); + } + c_uMat(n, i) = c_uMat(i, n); + if (i > 0 && i < n - 1) + c_uMat(n, n) += riemannFit::sqr(sTransverse(i + 1) - sTransverse(i - 1)) / (4. * varBeta(i)); + } + +#ifdef CPP_DUMP + std::cout << "CU5\n" << c_uMat << std::endl; +#endif + riemannFit::MatrixNplusONEd iMat; + math::cholesky::invert(c_uMat, iMat); +#ifdef CPP_DUMP + std::cout << "I5\n" << iMat << std::endl; +#endif + + riemannFit::VectorNplusONEd uVec = iMat * r_uVec; // obtain the fitted parameters by solving the linear system + + // compute (phi, d_ca, k) in the system in which the midpoint of the first two corrected hits is the origin... + + radii.block(0, 0, 2, 1) /= radii.block(0, 0, 2, 1).norm(); + radii.block(0, 1, 2, 1) /= radii.block(0, 1, 2, 1).norm(); + + riemannFit::Vector2d dVec = hits.block(0, 0, 2, 1) + (-zInSZplane(0) + uVec(0)) * radii.block(0, 0, 2, 1); + riemannFit::Vector2d eVec = hits.block(0, 1, 2, 1) + (-zInSZplane(1) + uVec(1)) * radii.block(0, 1, 2, 1); + + circle_results.par << atan2((eVec - dVec)(1), (eVec - dVec)(0)), + -circle_results.qCharge * + (fast_fit(2) - sqrt(riemannFit::sqr(fast_fit(2)) - 0.25 * (eVec - dVec).squaredNorm())), + circle_results.qCharge * (1. / fast_fit(2) + uVec(n)); + + assert(circle_results.qCharge * circle_results.par(1) <= 0); + + riemannFit::Vector2d eMinusd = eVec - dVec; + double tmp1 = eMinusd.squaredNorm(); + double tmp2 = sqrt(riemannFit::sqr(2 * fast_fit(2)) - tmp1); + + riemannFit::Matrix3d jacobian; + jacobian << (radii(1, 0) * eMinusd(0) - eMinusd(1) * radii(0, 0)) / tmp1, + (radii(1, 1) * eMinusd(0) - eMinusd(1) * radii(0, 1)) / tmp1, 0, + (circle_results.qCharge / 2) * (eMinusd(0) * radii(0, 0) + eMinusd(1) * radii(1, 0)) / tmp2, + (circle_results.qCharge / 2) * (eMinusd(0) * radii(0, 1) + eMinusd(1) * radii(1, 1)) / tmp2, 0, 0, 0, + circle_results.qCharge; + + circle_results.cov << iMat(0, 0), iMat(0, 1), iMat(0, n), iMat(1, 0), iMat(1, 1), iMat(1, n), iMat(n, 0), + iMat(n, 1), iMat(n, n); + + circle_results.cov = jacobian * circle_results.cov * jacobian.transpose(); + + //...Translate in the system in which the first corrected hit is the origin, adding the m.s. correction... + + auto eMinusDVec = eVec - dVec; + translateKarimaki(circle_results, 0.5 * eMinusDVec(0), 0.5 * eMinusDVec(1), jacobian); + circle_results.cov(0, 0) += + (1 + riemannFit::sqr(slope)) * multScatt(sTotal(1) - sTotal(0), bField, fast_fit(2), 2, slope); + + //...And translate back to the original system + + translateKarimaki(circle_results, dVec(0), dVec(1), jacobian); + + // compute chi2 + circle_results.chi2 = 0; + for (u_int i = 0; i < n; i++) { + circle_results.chi2 += weightsVec(i) * riemannFit::sqr(zInSZplane(i) - uVec(i)); + if (i > 0 && i < n - 1) + circle_results.chi2 += + riemannFit::sqr(uVec(i - 1) / (sTransverse(i) - sTransverse(i - 1)) - + uVec(i) * (sTransverse(i + 1) - sTransverse(i - 1)) / + ((sTransverse(i + 1) - sTransverse(i)) * (sTransverse(i) - sTransverse(i - 1))) + + uVec(i + 1) / (sTransverse(i + 1) - sTransverse(i)) + + (sTransverse(i + 1) - sTransverse(i - 1)) * uVec(n) / 2) / + varBeta(i); + } + + // assert(circle_results.chi2>=0); + } + + /*! + \brief Performs the Broken Line fit in the straight track case (that is, the fit parameters are only the interceptions u). + + \param hits hits coordinates. + \param fast_fit pre-fit result in the form (X0,Y0,R,tan(theta)). + \param bField magnetic field in Gev/cm/c. + \param data PreparedBrokenLineData. + \param line_results struct to be filled with the results in this form: + -par parameter of the line in this form: (cot(theta), Zip); \n + -cov covariance matrix of the fitted parameter; \n + -chi2 value of the cost function in the minimum. + + \details The function implements the steps 2 and 3 of the Broken Line fit without + * the curvature correction.\n + * The step 2 is the least square fit, done by imposing the minimum constraint + * on the cost function and solving the consequent linear system. It determines + * the fitted parameters u and their covariance matrix. + * The step 3 is the correction of the fast pre-fitted parameters for the innermost + * part of the track. It is first done in a comfortable coordinate system (the one + * in which the first hit is the origin) and then the parameters and their covariance + * matrix are transformed to the original coordinate system. + */ + template + __host__ __device__ inline void lineFit(const M6xN& hits_ge, + const V4& fast_fit, + const double bField, + const PreparedBrokenLineData& data, + riemannFit::LineFit& line_results) { + const auto& radii = data.radii; + const auto& sTotal = data.sTotal; + const auto& zInSZplane = data.zInSZplane; + const auto& varBeta = data.varBeta; + + const double slope = -data.qCharge / fast_fit(3); + riemannFit::Matrix2d rotMat = rotationMatrix(slope); + + riemannFit::Matrix3d vMat = riemannFit::Matrix3d::Zero(); // covariance matrix XYZ + riemannFit::Matrix2x3d jacobXYZtosZ = + riemannFit::Matrix2x3d::Zero(); // jacobian for computation of the error on s (xyz -> sz) + riemannFit::VectorNd weights = riemannFit::VectorNd::Zero(); + for (u_int i = 0; i < n; i++) { + vMat(0, 0) = hits_ge.col(i)[0]; // x errors + vMat(0, 1) = vMat(1, 0) = hits_ge.col(i)[1]; // cov_xy + vMat(0, 2) = vMat(2, 0) = hits_ge.col(i)[3]; // cov_xz + vMat(1, 1) = hits_ge.col(i)[2]; // y errors + vMat(2, 1) = vMat(1, 2) = hits_ge.col(i)[4]; // cov_yz + vMat(2, 2) = hits_ge.col(i)[5]; // z errors + auto tmp = 1. / radii.block(0, i, 2, 1).norm(); + jacobXYZtosZ(0, 0) = radii(1, i) * tmp; + jacobXYZtosZ(0, 1) = -radii(0, i) * tmp; + jacobXYZtosZ(1, 2) = 1.; + weights(i) = 1. / ((rotMat * jacobXYZtosZ * vMat * jacobXYZtosZ.transpose() * rotMat.transpose())( + 1, 1)); // compute the orthogonal weight point by point + } + + riemannFit::VectorNd r_u; + for (u_int i = 0; i < n; i++) { + r_u(i) = weights(i) * zInSZplane(i); + } +#ifdef CPP_DUMP + std::cout << "CU4\n" << matrixC_u(w, sTotal, varBeta) << std::endl; +#endif + riemannFit::MatrixNd iMat; + math::cholesky::invert(matrixC_u(weights, sTotal, varBeta), iMat); +#ifdef CPP_DUMP + std::cout << "I4\n" << iMat << std::endl; +#endif + + riemannFit::VectorNd uVec = iMat * r_u; // obtain the fitted parameters by solving the linear system + + // line parameters in the system in which the first hit is the origin and with axis along SZ + line_results.par << (uVec(1) - uVec(0)) / (sTotal(1) - sTotal(0)), uVec(0); + auto idiff = 1. / (sTotal(1) - sTotal(0)); + line_results.cov << (iMat(0, 0) - 2 * iMat(0, 1) + iMat(1, 1)) * riemannFit::sqr(idiff) + + multScatt(sTotal(1) - sTotal(0), bField, fast_fit(2), 2, slope), + (iMat(0, 1) - iMat(0, 0)) * idiff, (iMat(0, 1) - iMat(0, 0)) * idiff, iMat(0, 0); + + // translate to the original SZ system + riemannFit::Matrix2d jacobian; + jacobian(0, 0) = 1.; + jacobian(0, 1) = 0; + jacobian(1, 0) = -sTotal(0); + jacobian(1, 1) = 1.; + line_results.par(1) += -line_results.par(0) * sTotal(0); + line_results.cov = jacobian * line_results.cov * jacobian.transpose(); + + // rotate to the original sz system + auto tmp = rotMat(0, 0) - line_results.par(0) * rotMat(0, 1); + jacobian(1, 1) = 1. / tmp; + jacobian(0, 0) = jacobian(1, 1) * jacobian(1, 1); + jacobian(0, 1) = 0; + jacobian(1, 0) = line_results.par(1) * rotMat(0, 1) * jacobian(0, 0); + line_results.par(1) = line_results.par(1) * jacobian(1, 1); + line_results.par(0) = (rotMat(0, 1) + line_results.par(0) * rotMat(0, 0)) * jacobian(1, 1); + line_results.cov = jacobian * line_results.cov * jacobian.transpose(); + + // compute chi2 + line_results.chi2 = 0; + for (u_int i = 0; i < n; i++) { + line_results.chi2 += weights(i) * riemannFit::sqr(zInSZplane(i) - uVec(i)); + if (i > 0 && i < n - 1) + line_results.chi2 += riemannFit::sqr(uVec(i - 1) / (sTotal(i) - sTotal(i - 1)) - + uVec(i) * (sTotal(i + 1) - sTotal(i - 1)) / + ((sTotal(i + 1) - sTotal(i)) * (sTotal(i) - sTotal(i - 1))) + + uVec(i + 1) / (sTotal(i + 1) - sTotal(i))) / + varBeta(i); + } + } + + /*! + \brief Helix fit by three step: + -fast pre-fit (see Fast_fit() for further info); \n + -circle fit of the hits projected in the transverse plane by Broken Line algorithm (see BL_Circle_fit() for further info); \n + -line fit of the hits projected on the (pre-fitted) cilinder surface by Broken Line algorithm (see BL_Line_fit() for further info); \n + Points must be passed ordered (from inner to outer layer). + + \param hits Matrix3xNd hits coordinates in this form: \n + |x1|x2|x3|...|xn| \n + |y1|y2|y3|...|yn| \n + |z1|z2|z3|...|zn| + \param hits_cov Matrix3Nd covariance matrix in this form (()->cov()): \n + |(x1,x1)|(x2,x1)|(x3,x1)|(x4,x1)|.|(y1,x1)|(y2,x1)|(y3,x1)|(y4,x1)|.|(z1,x1)|(z2,x1)|(z3,x1)|(z4,x1)| \n + |(x1,x2)|(x2,x2)|(x3,x2)|(x4,x2)|.|(y1,x2)|(y2,x2)|(y3,x2)|(y4,x2)|.|(z1,x2)|(z2,x2)|(z3,x2)|(z4,x2)| \n + |(x1,x3)|(x2,x3)|(x3,x3)|(x4,x3)|.|(y1,x3)|(y2,x3)|(y3,x3)|(y4,x3)|.|(z1,x3)|(z2,x3)|(z3,x3)|(z4,x3)| \n + |(x1,x4)|(x2,x4)|(x3,x4)|(x4,x4)|.|(y1,x4)|(y2,x4)|(y3,x4)|(y4,x4)|.|(z1,x4)|(z2,x4)|(z3,x4)|(z4,x4)| \n + . . . . . . . . . . . . . . . \n + |(x1,y1)|(x2,y1)|(x3,y1)|(x4,y1)|.|(y1,y1)|(y2,y1)|(y3,x1)|(y4,y1)|.|(z1,y1)|(z2,y1)|(z3,y1)|(z4,y1)| \n + |(x1,y2)|(x2,y2)|(x3,y2)|(x4,y2)|.|(y1,y2)|(y2,y2)|(y3,x2)|(y4,y2)|.|(z1,y2)|(z2,y2)|(z3,y2)|(z4,y2)| \n + |(x1,y3)|(x2,y3)|(x3,y3)|(x4,y3)|.|(y1,y3)|(y2,y3)|(y3,x3)|(y4,y3)|.|(z1,y3)|(z2,y3)|(z3,y3)|(z4,y3)| \n + |(x1,y4)|(x2,y4)|(x3,y4)|(x4,y4)|.|(y1,y4)|(y2,y4)|(y3,x4)|(y4,y4)|.|(z1,y4)|(z2,y4)|(z3,y4)|(z4,y4)| \n + . . . . . . . . . . . . . . . \n + |(x1,z1)|(x2,z1)|(x3,z1)|(x4,z1)|.|(y1,z1)|(y2,z1)|(y3,z1)|(y4,z1)|.|(z1,z1)|(z2,z1)|(z3,z1)|(z4,z1)| \n + |(x1,z2)|(x2,z2)|(x3,z2)|(x4,z2)|.|(y1,z2)|(y2,z2)|(y3,z2)|(y4,z2)|.|(z1,z2)|(z2,z2)|(z3,z2)|(z4,z2)| \n + |(x1,z3)|(x2,z3)|(x3,z3)|(x4,z3)|.|(y1,z3)|(y2,z3)|(y3,z3)|(y4,z3)|.|(z1,z3)|(z2,z3)|(z3,z3)|(z4,z3)| \n + |(x1,z4)|(x2,z4)|(x3,z4)|(x4,z4)|.|(y1,z4)|(y2,z4)|(y3,z4)|(y4,z4)|.|(z1,z4)|(z2,z4)|(z3,z4)|(z4,z4)| + \param bField magnetic field in the center of the detector in Gev/cm/c, in order to perform the p_t calculation. + + \warning see BL_Circle_fit(), BL_Line_fit() and Fast_fit() warnings. + + \bug see BL_Circle_fit(), BL_Line_fit() and Fast_fit() bugs. + + \return (phi,Tip,p_t,cot(theta)),Zip), their covariance matrix and the chi2's of the circle and line fits. + */ + template + inline riemannFit::HelixFit helixFit(const riemannFit::Matrix3xNd& hits, + const Eigen::Matrix& hits_ge, + const double bField) { + riemannFit::HelixFit helix; + riemannFit::Vector4d fast_fit; + fastFit(hits, fast_fit); + + PreparedBrokenLineData data; + karimaki_circle_fit circle; + riemannFit::LineFit line; + riemannFit::Matrix3d jacobian; + + prepareBrokenLineData(hits, fast_fit, bField, data); + lineFit(hits_ge, fast_fit, bField, data, line); + circleFit(hits, hits_ge, fast_fit, bField, data, circle); + + // the circle fit gives k, but here we want p_t, so let's change the parameter and the covariance matrix + jacobian << 1., 0, 0, 0, 1., 0, 0, 0, + -std::abs(circle.par(2)) * bField / (riemannFit::sqr(circle.par(2)) * circle.par(2)); + circle.par(2) = bField / std::abs(circle.par(2)); + circle.cov = jacobian * circle.cov * jacobian.transpose(); + + helix.par << circle.par, line.par; + helix.cov = riemannFit::MatrixXd::Zero(5, 5); + helix.cov.block(0, 0, 3, 3) = circle.cov; + helix.cov.block(3, 3, 2, 2) = line.cov; + helix.qCharge = circle.qCharge; + helix.chi2_circle = circle.chi2; + helix.chi2_line = line.chi2; + + return helix; + } + +} // namespace brokenline + +#endif // RecoPixelVertexing_PixelTrackFitting_interface_BrokenLine_h diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h b/RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h new file mode 100644 index 0000000000000..01497719d2998 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h @@ -0,0 +1,65 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_FitResult_h +#define RecoPixelVertexing_PixelTrackFitting_interface_FitResult_h + +#include +#include + +#include +#include +#include + +namespace riemannFit { + + using Vector2d = Eigen::Vector2d; + using Vector3d = Eigen::Vector3d; + using Vector4d = Eigen::Vector4d; + using Vector5d = Eigen::Matrix; + using Matrix2d = Eigen::Matrix2d; + using Matrix3d = Eigen::Matrix3d; + using Matrix4d = Eigen::Matrix4d; + using Matrix5d = Eigen::Matrix; + using Matrix6d = Eigen::Matrix; + + template + using Matrix3xNd = Eigen::Matrix; // used for inputs hits + + struct CircleFit { + Vector3d par; //!< parameter: (X0,Y0,R) + Matrix3d cov; + /*!< covariance matrix: \n + |cov(X0,X0)|cov(Y0,X0)|cov( R,X0)| \n + |cov(X0,Y0)|cov(Y0,Y0)|cov( R,Y0)| \n + |cov(X0, R)|cov(Y0, R)|cov( R, R)| + */ + int32_t qCharge; //!< particle charge + float chi2; + }; + + struct LineFit { + Vector2d par; //!<(cotan(theta),Zip) + Matrix2d cov; + /*!< + |cov(c_t,c_t)|cov(Zip,c_t)| \n + |cov(c_t,Zip)|cov(Zip,Zip)| + */ + double chi2; + }; + + struct HelixFit { + Vector5d par; //!<(phi,Tip,pt,cotan(theta)),Zip) + Matrix5d cov; + /*!< ()->cov() \n + |(phi,phi)|(Tip,phi)|(p_t,phi)|(c_t,phi)|(Zip,phi)| \n + |(phi,Tip)|(Tip,Tip)|(p_t,Tip)|(c_t,Tip)|(Zip,Tip)| \n + |(phi,p_t)|(Tip,p_t)|(p_t,p_t)|(c_t,p_t)|(Zip,p_t)| \n + |(phi,c_t)|(Tip,c_t)|(p_t,c_t)|(c_t,c_t)|(Zip,c_t)| \n + |(phi,Zip)|(Tip,Zip)|(p_t,Zip)|(c_t,Zip)|(Zip,Zip)| + */ + float chi2_circle; + float chi2_line; + // Vector4d fast_fit; + int32_t qCharge; //!< particle charge + }; // __attribute__((aligned(16))); + +} // namespace riemannFit +#endif diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h b/RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h new file mode 100644 index 0000000000000..2fe74f53a7bd2 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h @@ -0,0 +1,243 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_FitUtils_h +#define RecoPixelVertexing_PixelTrackFitting_interface_FitUtils_h + +#include "DataFormats/Math/interface/choleskyInversion.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h" + +namespace riemannFit { + + constexpr double epsilon = 1.e-4; //!< used in numerical derivative (J2 in Circle_fit()) + + using VectorXd = Eigen::VectorXd; + using MatrixXd = Eigen::MatrixXd; + template + using MatrixNd = Eigen::Matrix; + template + using MatrixNplusONEd = Eigen::Matrix; + template + using ArrayNd = Eigen::Array; + template + using Matrix2Nd = Eigen::Matrix; + template + using Matrix3Nd = Eigen::Matrix; + template + using Matrix2xNd = Eigen::Matrix; + template + using Array2xNd = Eigen::Array; + template + using MatrixNx3d = Eigen::Matrix; + template + using MatrixNx5d = Eigen::Matrix; + template + using VectorNd = Eigen::Matrix; + template + using VectorNplusONEd = Eigen::Matrix; + template + using Vector2Nd = Eigen::Matrix; + template + using Vector3Nd = Eigen::Matrix; + template + using RowVectorNd = Eigen::Matrix; + template + using RowVector2Nd = Eigen::Matrix; + + using Matrix2x3d = Eigen::Matrix; + + using Matrix3f = Eigen::Matrix3f; + using Vector3f = Eigen::Vector3f; + using Vector4f = Eigen::Vector4f; + using Vector6f = Eigen::Matrix; + + template + __host__ __device__ void printIt(C* m, const char* prefix = "") { +#ifdef RFIT_DEBUG + for (uint r = 0; r < m->rows(); ++r) { + for (uint c = 0; c < m->cols(); ++c) { + printf("%s Matrix(%d,%d) = %g\n", prefix, r, c, (*m)(r, c)); + } + } +#endif + } + + /*! + \brief raise to square. + */ + template + constexpr T sqr(const T a) { + return a * a; + } + + /*! + \brief Compute cross product of two 2D vector (assuming z component 0), + returning z component of the result. + \param a first 2D vector in the product. + \param b second 2D vector in the product. + \return z component of the cross product. + */ + + __host__ __device__ inline double cross2D(const Vector2d& a, const Vector2d& b) { + return a.x() * b.y() - a.y() * b.x(); + } + + /*! + * load error in CMSSW format to our formalism + * + */ + template + __host__ __device__ void loadCovariance2D(M6xNf const& ge, M2Nd& hits_cov) { + // Index numerology: + // i: index of the hits/point (0,..,3) + // j: index of space component (x,y,z) + // l: index of space components (x,y,z) + // ge is always in sync with the index i and is formatted as: + // ge[] ==> [xx, xy, yy, xz, yz, zz] + // in (j,l) notation, we have: + // ge[] ==> [(0,0), (0,1), (1,1), (0,2), (1,2), (2,2)] + // so the index ge_idx corresponds to the matrix elements: + // | 0 1 3 | + // | 1 2 4 | + // | 3 4 5 | + constexpr uint32_t hits_in_fit = M6xNf::ColsAtCompileTime; + for (uint32_t i = 0; i < hits_in_fit; ++i) { + { + constexpr uint32_t ge_idx = 0, j = 0, l = 0; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 2, j = 1, l = 1; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 1, j = 1, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + } + } + + template + __host__ __device__ void loadCovariance(M6xNf const& ge, M3xNd& hits_cov) { + // Index numerology: + // i: index of the hits/point (0,..,3) + // j: index of space component (x,y,z) + // l: index of space components (x,y,z) + // ge is always in sync with the index i and is formatted as: + // ge[] ==> [xx, xy, yy, xz, yz, zz] + // in (j,l) notation, we have: + // ge[] ==> [(0,0), (0,1), (1,1), (0,2), (1,2), (2,2)] + // so the index ge_idx corresponds to the matrix elements: + // | 0 1 3 | + // | 1 2 4 | + // | 3 4 5 | + constexpr uint32_t hits_in_fit = M6xNf::ColsAtCompileTime; + for (uint32_t i = 0; i < hits_in_fit; ++i) { + { + constexpr uint32_t ge_idx = 0, j = 0, l = 0; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 2, j = 1, l = 1; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 5, j = 2, l = 2; + hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 1, j = 1, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 3, j = 2, l = 0; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + { + constexpr uint32_t ge_idx = 4, j = 2, l = 1; + hits_cov(i + l * hits_in_fit, i + j * hits_in_fit) = hits_cov(i + j * hits_in_fit, i + l * hits_in_fit) = + ge.col(i)[ge_idx]; + } + } + } + + /*! + \brief Transform circle parameter from (X0,Y0,R) to (phi,Tip,p_t) and + consequently covariance matrix. + \param circle_uvr parameter (X0,Y0,R), covariance matrix to + be transformed and particle charge. + \param B magnetic field in Gev/cm/c unit. + \param error flag for errors computation. + */ + __host__ __device__ inline void par_uvrtopak(CircleFit& circle, const double B, const bool error) { + Vector3d par_pak; + const double temp0 = circle.par.head(2).squaredNorm(); + const double temp1 = sqrt(temp0); + par_pak << atan2(circle.qCharge * circle.par(0), -circle.qCharge * circle.par(1)), + circle.qCharge * (temp1 - circle.par(2)), circle.par(2) * B; + if (error) { + const double temp2 = sqr(circle.par(0)) * 1. / temp0; + const double temp3 = 1. / temp1 * circle.qCharge; + Matrix3d j4Mat; + j4Mat << -circle.par(1) * temp2 * 1. / sqr(circle.par(0)), temp2 * 1. / circle.par(0), 0., circle.par(0) * temp3, + circle.par(1) * temp3, -circle.qCharge, 0., 0., B; + circle.cov = j4Mat * circle.cov * j4Mat.transpose(); + } + circle.par = par_pak; + } + + /*! + \brief Transform circle parameter from (X0,Y0,R) to (phi,Tip,q/R) and + consequently covariance matrix. + \param circle_uvr parameter (X0,Y0,R), covariance matrix to + be transformed and particle charge. + */ + __host__ __device__ inline void fromCircleToPerigee(CircleFit& circle) { + Vector3d par_pak; + const double temp0 = circle.par.head(2).squaredNorm(); + const double temp1 = sqrt(temp0); + par_pak << atan2(circle.qCharge * circle.par(0), -circle.qCharge * circle.par(1)), + circle.qCharge * (temp1 - circle.par(2)), circle.qCharge / circle.par(2); + + const double temp2 = sqr(circle.par(0)) * 1. / temp0; + const double temp3 = 1. / temp1 * circle.qCharge; + Matrix3d j4Mat; + j4Mat << -circle.par(1) * temp2 * 1. / sqr(circle.par(0)), temp2 * 1. / circle.par(0), 0., circle.par(0) * temp3, + circle.par(1) * temp3, -circle.qCharge, 0., 0., -circle.qCharge / (circle.par(2) * circle.par(2)); + circle.cov = j4Mat * circle.cov * j4Mat.transpose(); + + circle.par = par_pak; + } + + // transformation between the "perigee" to cmssw localcoord frame + // the plane of the latter is the perigee plane... + // from //!<(phi,Tip,q/pt,cotan(theta)),Zip) + // to q/p,dx/dz,dy/dz,x,z + template + __host__ __device__ inline void transformToPerigeePlane(VI5 const& ip, MI5 const& icov, VO5& op, MO5& ocov) { + auto sinTheta2 = 1. / (1. + ip(3) * ip(3)); + auto sinTheta = std::sqrt(sinTheta2); + auto cosTheta = ip(3) * sinTheta; + + op(0) = sinTheta * ip(2); + op(1) = 0.; + op(2) = -ip(3); + op(3) = ip(1); + op(4) = -ip(4); + + Matrix5d jMat = Matrix5d::Zero(); + + jMat(0, 2) = sinTheta; + jMat(0, 3) = -sinTheta2 * cosTheta * ip(2); + jMat(1, 0) = 1.; + jMat(2, 3) = -1.; + jMat(3, 1) = 1.; + jMat(4, 4) = -1; + + ocov = jMat * icov * jMat.transpose(); + } + +} // namespace riemannFit + +#endif // RecoPixelVertexing_PixelTrackFitting_interface_FitUtils_h diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/PixelNtupletsFitter.h b/RecoPixelVertexing/PixelTrackFitting/interface/PixelNtupletsFitter.h new file mode 100644 index 0000000000000..9fb8843589669 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/interface/PixelNtupletsFitter.h @@ -0,0 +1,27 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h +#define RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h + +#include + +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackingRecHit/interface/TrackingRecHit.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelFitterBase.h" +#include "RecoTracker/TkTrackingRegions/interface/TrackingRegion.h" + +class PixelNtupletsFitter final : public PixelFitterBase { +public: + explicit PixelNtupletsFitter(float nominalB, const MagneticField* field, bool useRiemannFit); + ~PixelNtupletsFitter() override = default; + std::unique_ptr run(const std::vector& hits, + const TrackingRegion& region, + const edm::EventSetup& setup) const override; + +private: + float nominalB_; + const MagneticField* field_; + bool useRiemannFit_; +}; + +#endif // RecoPixelVertexing_PixelTrackFitting_interface_PixelNtupletsFitter_h diff --git a/RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h b/RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h new file mode 100644 index 0000000000000..52cf4b637fb37 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h @@ -0,0 +1,1008 @@ +#ifndef RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h +#define RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h + +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h" + +namespace riemannFit { + + /*! Compute the Radiation length in the uniform hypothesis + * + * The Pixel detector, barrel and forward, is considered as an homogeneous + * cylinder of material, whose radiation lengths has been derived from the TDR + * plot that shows that 16cm correspond to 0.06 radiation lengths. Therefore + * one radiation length corresponds to 16cm/0.06 =~ 267 cm. All radiation + * lengths are computed using this unique number, in both regions, barrel and + * endcap. + * + * NB: no angle corrections nor projections are computed inside this routine. + * It is therefore the responsibility of the caller to supply the proper + * lengths in input. These lengths are the path traveled by the particle along + * its trajectory, namely the so called S of the helix in 3D space. + * + * \param length_values vector of incremental distances that will be translated + * into radiation length equivalent. Each radiation length i is computed + * incrementally with respect to the previous length i-1. The first length has + * no reference point (i.e. it has the dca). + * + * \return incremental radiation lengths that correspond to each segment. + */ + + template + __host__ __device__ inline void computeRadLenUniformMaterial(const VNd1& length_values, VNd2& rad_lengths) { + // Radiation length of the pixel detector in the uniform assumption, with + // 0.06 rad_len at 16 cm + constexpr double xx_0_inv = 0.06 / 16.; + uint n = length_values.rows(); + rad_lengths(0) = length_values(0) * xx_0_inv; + for (uint j = 1; j < n; ++j) { + rad_lengths(j) = std::abs(length_values(j) - length_values(j - 1)) * xx_0_inv; + } + } + + /*! + \brief Compute the covariance matrix along cartesian S-Z of points due to + multiple Coulomb scattering to be used in the line_fit, for the barrel + and forward cases. + The input covariance matrix is in the variables s-z, original and + unrotated. + The multiple scattering component is computed in the usual linear + approximation, using the 3D path which is computed as the squared root of + the squared sum of the s and z components passed in. + Internally a rotation by theta is performed and the covariance matrix + returned is the one in the direction orthogonal to the rotated S3D axis, + i.e. along the rotated Z axis. + The choice of the rotation is not arbitrary, but derived from the fact that + putting the horizontal axis along the S3D direction allows the usage of the + ordinary least squared fitting techiques with the trivial parametrization y + = mx + q, avoiding the patological case with m = +/- inf, that would + correspond to the case at eta = 0. + */ + + template + __host__ __device__ inline auto scatterCovLine(Matrix2d const* cov_sz, + const V4& fast_fit, + VNd1 const& s_arcs, + VNd2 const& z_values, + const double theta, + const double bField, + MatrixNd& ret) { +#ifdef RFIT_DEBUG + riemannFit::printIt(&s_arcs, "Scatter_cov_line - s_arcs: "); +#endif + constexpr uint n = N; + double p_t = std::min(20., fast_fit(2) * bField); // limit pt to avoid too small error!!! + double p_2 = p_t * p_t * (1. + 1. / sqr(fast_fit(3))); + VectorNd rad_lengths_S; + // See documentation at http://eigen.tuxfamily.org/dox/group__TutorialArrayClass.html + // Basically, to perform cwise operations on Matrices and Vectors, you need + // to transform them into Array-like objects. + VectorNd s_values = s_arcs.array() * s_arcs.array() + z_values.array() * z_values.array(); + s_values = s_values.array().sqrt(); + computeRadLenUniformMaterial(s_values, rad_lengths_S); + VectorNd sig2_S; + sig2_S = .000225 / p_2 * (1. + 0.038 * rad_lengths_S.array().log()).abs2() * rad_lengths_S.array(); +#ifdef RFIT_DEBUG + riemannFit::printIt(cov_sz, "Scatter_cov_line - cov_sz: "); +#endif + Matrix2Nd tmp = Matrix2Nd::Zero(); + for (uint k = 0; k < n; ++k) { + tmp(k, k) = cov_sz[k](0, 0); + tmp(k + n, k + n) = cov_sz[k](1, 1); + tmp(k, k + n) = tmp(k + n, k) = cov_sz[k](0, 1); + } + for (uint k = 0; k < n; ++k) { + for (uint l = k; l < n; ++l) { + for (uint i = 0; i < std::min(k, l); ++i) { + tmp(k + n, l + n) += std::abs(s_values(k) - s_values(i)) * std::abs(s_values(l) - s_values(i)) * sig2_S(i); + } + tmp(l + n, k + n) = tmp(k + n, l + n); + } + } + // We are interested only in the errors orthogonal to the rotated s-axis + // which, in our formalism, are in the lower square matrix. +#ifdef RFIT_DEBUG + riemannFit::printIt(&tmp, "Scatter_cov_line - tmp: "); +#endif + ret = tmp.block(n, n, n, n); + } + + /*! + \brief Compute the covariance matrix (in radial coordinates) of points in + the transverse plane due to multiple Coulomb scattering. + \param p2D 2D points in the transverse plane. + \param fast_fit fast_fit Vector4d result of the previous pre-fit + structured in this form:(X0, Y0, R, Tan(Theta))). + \param B magnetic field use to compute p + \return scatter_cov_rad errors due to multiple scattering. + \warning input points must be ordered radially from the detector center + (from inner layer to outer ones; points on the same layer must ordered too). + \details Only the tangential component is computed (the radial one is + negligible). + */ + template + __host__ __device__ inline MatrixNd scatter_cov_rad(const M2xN& p2D, + const V4& fast_fit, + VectorNd const& rad, + double B) { + constexpr uint n = N; + double p_t = std::min(20., fast_fit(2) * B); // limit pt to avoid too small error!!! + double p_2 = p_t * p_t * (1. + 1. / sqr(fast_fit(3))); + double theta = atan(fast_fit(3)); + theta = theta < 0. ? theta + M_PI : theta; + VectorNd s_values; + VectorNd rad_lengths; + const Vector2d oVec(fast_fit(0), fast_fit(1)); + + // associated Jacobian, used in weights and errors computation + for (uint i = 0; i < n; ++i) { // x + Vector2d pVec = p2D.block(0, i, 2, 1) - oVec; + const double cross = cross2D(-oVec, pVec); + const double dot = (-oVec).dot(pVec); + const double tempAtan2 = atan2(cross, dot); + s_values(i) = std::abs(tempAtan2 * fast_fit(2)); + } + computeRadLenUniformMaterial(s_values * sqrt(1. + 1. / sqr(fast_fit(3))), rad_lengths); + MatrixNd scatter_cov_rad = MatrixNd::Zero(); + VectorNd sig2 = (1. + 0.038 * rad_lengths.array().log()).abs2() * rad_lengths.array(); + sig2 *= 0.000225 / (p_2 * sqr(sin(theta))); + for (uint k = 0; k < n; ++k) { + for (uint l = k; l < n; ++l) { + for (uint i = 0; i < std::min(k, l); ++i) { + scatter_cov_rad(k, l) += (rad(k) - rad(i)) * (rad(l) - rad(i)) * sig2(i); + } + scatter_cov_rad(l, k) = scatter_cov_rad(k, l); + } + } +#ifdef RFIT_DEBUG + riemannFit::printIt(&scatter_cov_rad, "Scatter_cov_rad - scatter_cov_rad: "); +#endif + return scatter_cov_rad; + } + + /*! + \brief Transform covariance matrix from radial (only tangential component) + to Cartesian coordinates (only transverse plane component). + \param p2D 2D points in the transverse plane. + \param cov_rad covariance matrix in radial coordinate. + \return cov_cart covariance matrix in Cartesian coordinates. +*/ + + template + __host__ __device__ inline Matrix2Nd cov_radtocart(const M2xN& p2D, + const MatrixNd& cov_rad, + const VectorNd& rad) { +#ifdef RFIT_DEBUG + printf("Address of p2D: %p\n", &p2D); +#endif + printIt(&p2D, "cov_radtocart - p2D:"); + constexpr uint n = N; + Matrix2Nd cov_cart = Matrix2Nd::Zero(); + VectorNd rad_inv = rad.cwiseInverse(); + printIt(&rad_inv, "cov_radtocart - rad_inv:"); + for (uint i = 0; i < n; ++i) { + for (uint j = i; j < n; ++j) { + cov_cart(i, j) = cov_rad(i, j) * p2D(1, i) * rad_inv(i) * p2D(1, j) * rad_inv(j); + cov_cart(i + n, j + n) = cov_rad(i, j) * p2D(0, i) * rad_inv(i) * p2D(0, j) * rad_inv(j); + cov_cart(i, j + n) = -cov_rad(i, j) * p2D(1, i) * rad_inv(i) * p2D(0, j) * rad_inv(j); + cov_cart(i + n, j) = -cov_rad(i, j) * p2D(0, i) * rad_inv(i) * p2D(1, j) * rad_inv(j); + cov_cart(j, i) = cov_cart(i, j); + cov_cart(j + n, i + n) = cov_cart(i + n, j + n); + cov_cart(j + n, i) = cov_cart(i, j + n); + cov_cart(j, i + n) = cov_cart(i + n, j); + } + } + return cov_cart; + } + + /*! + \brief Transform covariance matrix from Cartesian coordinates (only + transverse plane component) to radial coordinates (both radial and + tangential component but only diagonal terms, correlation between different + point are not managed). + \param p2D 2D points in transverse plane. + \param cov_cart covariance matrix in Cartesian coordinates. + \return cov_rad covariance matrix in raidal coordinate. + \warning correlation between different point are not computed. +*/ + template + __host__ __device__ inline VectorNd cov_carttorad(const M2xN& p2D, + const Matrix2Nd& cov_cart, + const VectorNd& rad) { + constexpr uint n = N; + VectorNd cov_rad; + const VectorNd rad_inv2 = rad.cwiseInverse().array().square(); + for (uint i = 0; i < n; ++i) { + //!< in case you have (0,0) to avoid dividing by 0 radius + if (rad(i) < 1.e-4) + cov_rad(i) = cov_cart(i, i); + else { + cov_rad(i) = rad_inv2(i) * (cov_cart(i, i) * sqr(p2D(1, i)) + cov_cart(i + n, i + n) * sqr(p2D(0, i)) - + 2. * cov_cart(i, i + n) * p2D(0, i) * p2D(1, i)); + } + } + return cov_rad; + } + + /*! + \brief Transform covariance matrix from Cartesian coordinates (only + transverse plane component) to coordinates system orthogonal to the + pre-fitted circle in each point. + Further information in attached documentation. + \param p2D 2D points in transverse plane. + \param cov_cart covariance matrix in Cartesian coordinates. + \param fast_fit fast_fit Vector4d result of the previous pre-fit + structured in this form:(X0, Y0, R, tan(theta))). + \return cov_rad covariance matrix in the pre-fitted circle's + orthogonal system. +*/ + template + __host__ __device__ inline VectorNd cov_carttorad_prefit(const M2xN& p2D, + const Matrix2Nd& cov_cart, + V4& fast_fit, + const VectorNd& rad) { + constexpr uint n = N; + VectorNd cov_rad; + for (uint i = 0; i < n; ++i) { + //!< in case you have (0,0) to avoid dividing by 0 radius + if (rad(i) < 1.e-4) + cov_rad(i) = cov_cart(i, i); // TO FIX + else { + Vector2d a = p2D.col(i); + Vector2d b = p2D.col(i) - fast_fit.head(2); + const double x2 = a.dot(b); + const double y2 = cross2D(a, b); + const double tan_c = -y2 / x2; + const double tan_c2 = sqr(tan_c); + cov_rad(i) = + 1. / (1. + tan_c2) * (cov_cart(i, i) + cov_cart(i + n, i + n) * tan_c2 + 2 * cov_cart(i, i + n) * tan_c); + } + } + return cov_rad; + } + + /*! + \brief Compute the points' weights' vector for the circle fit when multiple + scattering is managed. + Further information in attached documentation. + \param cov_rad_inv covariance matrix inverse in radial coordinated + (or, beter, pre-fitted circle's orthogonal system). + \return weight VectorNd points' weights' vector. + \bug I'm not sure this is the right way to compute the weights for non + diagonal cov matrix. Further investigation needed. +*/ + + template + __host__ __device__ inline VectorNd weightCircle(const MatrixNd& cov_rad_inv) { + return cov_rad_inv.colwise().sum().transpose(); + } + + /*! + \brief Find particle q considering the sign of cross product between + particles velocity (estimated by the first 2 hits) and the vector radius + between the first hit and the center of the fitted circle. + \param p2D 2D points in transverse plane. + \param par_uvr result of the circle fit in this form: (X0,Y0,R). + \return q int 1 or -1. +*/ + template + __host__ __device__ inline int32_t charge(const M2xN& p2D, const Vector3d& par_uvr) { + return ((p2D(0, 1) - p2D(0, 0)) * (par_uvr.y() - p2D(1, 0)) - (p2D(1, 1) - p2D(1, 0)) * (par_uvr.x() - p2D(0, 0)) > + 0) + ? -1 + : 1; + } + + /*! + \brief Compute the eigenvector associated to the minimum eigenvalue. + \param A the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored. + \return the eigenvector associated to the minimum eigenvalue. + \warning double precision is needed for a correct assessment of chi2. + \details The minimus eigenvalue is related to chi2. + We exploit the fact that the matrix is symmetrical and small (2x2 for line + fit and 3x3 for circle fit), so the SelfAdjointEigenSolver from Eigen + library is used, with the computedDirect method (available only for 2x2 + and 3x3 Matrix) wich computes eigendecomposition of given matrix using a + fast closed-form algorithm. + For this optimization the matrix type must be known at compiling time. +*/ + + __host__ __device__ inline Vector3d min_eigen3D(const Matrix3d& A, double& chi2) { +#ifdef RFIT_DEBUG + printf("min_eigen3D - enter\n"); +#endif + Eigen::SelfAdjointEigenSolver solver(3); + solver.computeDirect(A); + int min_index; + chi2 = solver.eigenvalues().minCoeff(&min_index); +#ifdef RFIT_DEBUG + printf("min_eigen3D - exit\n"); +#endif + return solver.eigenvectors().col(min_index); + } + + /*! + \brief A faster version of min_eigen3D() where double precision is not + needed. + \param A the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored + \return the eigenvector associated to the minimum eigenvalue. + \detail The computedDirect() method of SelfAdjointEigenSolver for 3x3 Matrix + indeed, use trigonometry function (it solves a third degree equation) which + speed up in single precision. +*/ + + __host__ __device__ inline Vector3d min_eigen3D_fast(const Matrix3d& A) { + Eigen::SelfAdjointEigenSolver solver(3); + solver.computeDirect(A.cast()); + int min_index; + solver.eigenvalues().minCoeff(&min_index); + return solver.eigenvectors().col(min_index).cast(); + } + + /*! + \brief 2D version of min_eigen3D(). + \param aMat the Matrix you want to know eigenvector and eigenvalue. + \param chi2 the double were the chi2-related quantity will be stored + \return the eigenvector associated to the minimum eigenvalue. + \detail The computedDirect() method of SelfAdjointEigenSolver for 2x2 Matrix + do not use special math function (just sqrt) therefore it doesn't speed up + significantly in single precision. +*/ + + __host__ __device__ inline Vector2d min_eigen2D(const Matrix2d& aMat, double& chi2) { + Eigen::SelfAdjointEigenSolver solver(2); + solver.computeDirect(aMat); + int min_index; + chi2 = solver.eigenvalues().minCoeff(&min_index); + return solver.eigenvectors().col(min_index); + } + + /*! + \brief A very fast helix fit: it fits a circle by three points (first, middle + and last point) and a line by two points (first and last). + \param hits points to be fitted + \return result in this form: (X0,Y0,R,tan(theta)). + \warning points must be passed ordered (from internal layer to external) in + order to maximize accuracy and do not mistake tan(theta) sign. + \details This fast fit is used as pre-fit which is needed for: + - weights estimation and chi2 computation in line fit (fundamental); + - weights estimation and chi2 computation in circle fit (useful); + - computation of error due to multiple scattering. +*/ + + template + __host__ __device__ inline void fastFit(const M3xN& hits, V4& result) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + constexpr auto n = N; // get the number of hits + printIt(&hits, "Fast_fit - hits: "); + + // CIRCLE FIT + // Make segments between middle-to-first(b) and last-to-first(c) hits + const Vector2d bVec = hits.block(0, n / 2, 2, 1) - hits.block(0, 0, 2, 1); + const Vector2d cVec = hits.block(0, n - 1, 2, 1) - hits.block(0, 0, 2, 1); + printIt(&bVec, "Fast_fit - b: "); + printIt(&cVec, "Fast_fit - c: "); + // Compute their lengths + auto b2 = bVec.squaredNorm(); + auto c2 = cVec.squaredNorm(); + // The algebra has been verified (MR). The usual approach has been followed: + // * use an orthogonal reference frame passing from the first point. + // * build the segments (chords) + // * build orthogonal lines through mid points + // * make a system and solve for X0 and Y0. + // * add the initial point + bool flip = abs(bVec.x()) < abs(bVec.y()); + auto bx = flip ? bVec.y() : bVec.x(); + auto by = flip ? bVec.x() : bVec.y(); + auto cx = flip ? cVec.y() : cVec.x(); + auto cy = flip ? cVec.x() : cVec.y(); + //!< in case b.x is 0 (2 hits with same x) + auto div = 2. * (cx * by - bx * cy); + // if aligned TO FIX + auto y0 = (cx * b2 - bx * c2) / div; + auto x0 = (0.5 * b2 - y0 * by) / bx; + result(0) = hits(0, 0) + (flip ? y0 : x0); + result(1) = hits(1, 0) + (flip ? x0 : y0); + result(2) = sqrt(sqr(x0) + sqr(y0)); + printIt(&result, "Fast_fit - result: "); + + // LINE FIT + const Vector2d dVec = hits.block(0, 0, 2, 1) - result.head(2); + const Vector2d eVec = hits.block(0, n - 1, 2, 1) - result.head(2); + printIt(&eVec, "Fast_fit - e: "); + printIt(&dVec, "Fast_fit - d: "); + // Compute the arc-length between first and last point: L = R * theta = R * atan (tan (Theta) ) + auto dr = result(2) * atan2(cross2D(dVec, eVec), dVec.dot(eVec)); + // Simple difference in Z between last and first hit + auto dz = hits(2, n - 1) - hits(2, 0); + + result(3) = (dr / dz); + +#ifdef RFIT_DEBUG + printf("Fast_fit: [%f, %f, %f, %f]\n", result(0), result(1), result(2), result(3)); +#endif + } + + /*! + \brief Fit a generic number of 2D points with a circle using Riemann-Chernov + algorithm. Covariance matrix of fitted parameter is optionally computed. + Multiple scattering (currently only in barrel layer) is optionally handled. + \param hits2D 2D points to be fitted. + \param hits_cov2D covariance matrix of 2D points. + \param fast_fit pre-fit result in this form: (X0,Y0,R,tan(theta)). + (tan(theta) is not used). + \param bField magnetic field + \param error flag for error computation. + \param scattering flag for multiple scattering + \return circle circle_fit: + -par parameter of the fitted circle in this form (X0,Y0,R); \n + -cov covariance matrix of the fitted parameter (not initialized if + error = false); \n + -q charge of the particle; \n + -chi2. + \warning hits must be passed ordered from inner to outer layer (double hits + on the same layer must be ordered too) so that multiple scattering is + treated properly. + \warning Multiple scattering for barrel is still not tested. + \warning Multiple scattering for endcap hits is not handled (yet). Do not + fit endcap hits with scattering = true ! + \bug for small pt (<0.3 Gev/c) chi2 could be slightly underestimated. + \bug further investigation needed for error propagation with multiple + scattering. +*/ + template + __host__ __device__ inline CircleFit circleFit(const M2xN& hits2D, + const Matrix2Nd& hits_cov2D, + const V4& fast_fit, + const VectorNd& rad, + const double bField, + const bool error) { +#ifdef RFIT_DEBUG + printf("circle_fit - enter\n"); +#endif + // INITIALIZATION + Matrix2Nd vMat = hits_cov2D; + constexpr uint n = N; + printIt(&hits2D, "circle_fit - hits2D:"); + printIt(&hits_cov2D, "circle_fit - hits_cov2D:"); + +#ifdef RFIT_DEBUG + printf("circle_fit - WEIGHT COMPUTATION\n"); +#endif + // WEIGHT COMPUTATION + VectorNd weight; + MatrixNd gMat; + double renorm; + { + MatrixNd cov_rad = cov_carttorad_prefit(hits2D, vMat, fast_fit, rad).asDiagonal(); + MatrixNd scatterCovRadMat = scatter_cov_rad(hits2D, fast_fit, rad, bField); + printIt(&scatterCovRadMat, "circle_fit - scatter_cov_rad:"); + printIt(&hits2D, "circle_fit - hits2D bis:"); +#ifdef RFIT_DEBUG + printf("Address of hits2D: a) %p\n", &hits2D); +#endif + vMat += cov_radtocart(hits2D, scatterCovRadMat, rad); + printIt(&vMat, "circle_fit - V:"); + cov_rad += scatterCovRadMat; + printIt(&cov_rad, "circle_fit - cov_rad:"); + math::cholesky::invert(cov_rad, gMat); + // gMat = cov_rad.inverse(); + renorm = gMat.sum(); + gMat *= 1. / renorm; + weight = weightCircle(gMat); + } + printIt(&weight, "circle_fit - weight:"); + + // SPACE TRANSFORMATION +#ifdef RFIT_DEBUG + printf("circle_fit - SPACE TRANSFORMATION\n"); +#endif + + // center +#ifdef RFIT_DEBUG + printf("Address of hits2D: b) %p\n", &hits2D); +#endif + const Vector2d hCentroid = hits2D.rowwise().mean(); // centroid + printIt(&hCentroid, "circle_fit - h_:"); + Matrix3xNd p3D; + p3D.block(0, 0, 2, n) = hits2D.colwise() - hCentroid; + printIt(&p3D, "circle_fit - p3D: a)"); + Vector2Nd mc; // centered hits, used in error computation + mc << p3D.row(0).transpose(), p3D.row(1).transpose(); + printIt(&mc, "circle_fit - mc(centered hits):"); + + // scale + const double tempQ = mc.squaredNorm(); + const double tempS = sqrt(n * 1. / tempQ); // scaling factor + p3D *= tempS; + + // project on paraboloid + p3D.row(2) = p3D.block(0, 0, 2, n).colwise().squaredNorm(); + printIt(&p3D, "circle_fit - p3D: b)"); + +#ifdef RFIT_DEBUG + printf("circle_fit - COST FUNCTION\n"); +#endif + // COST FUNCTION + + // compute + Vector3d r0; + r0.noalias() = p3D * weight; // center of gravity + const Matrix3xNd xMat = p3D.colwise() - r0; + Matrix3d aMat = xMat * gMat * xMat.transpose(); + printIt(&aMat, "circle_fit - A:"); + +#ifdef RFIT_DEBUG + printf("circle_fit - MINIMIZE\n"); +#endif + // minimize + double chi2; + Vector3d vVec = min_eigen3D(aMat, chi2); +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN\n"); +#endif + printIt(&vVec, "v BEFORE INVERSION"); + vVec *= (vVec(2) > 0) ? 1 : -1; // TO FIX dovrebbe essere N(3)>0 + printIt(&vVec, "v AFTER INVERSION"); + // This hack to be able to run on GPU where the automatic assignment to a + // double from the vector multiplication is not working. +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 1\n"); +#endif + Eigen::Matrix cm; +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 2\n"); +#endif + cm = -vVec.transpose() * r0; +#ifdef RFIT_DEBUG + printf("circle_fit - AFTER MIN_EIGEN 3\n"); +#endif + const double tempC = cm(0, 0); + +#ifdef RFIT_DEBUG + printf("circle_fit - COMPUTE CIRCLE PARAMETER\n"); +#endif + // COMPUTE CIRCLE PARAMETER + + // auxiliary quantities + const double tempH = sqrt(1. - sqr(vVec(2)) - 4. * tempC * vVec(2)); + const double v2x2_inv = 1. / (2. * vVec(2)); + const double s_inv = 1. / tempS; + Vector3d par_uvr; // used in error propagation + par_uvr << -vVec(0) * v2x2_inv, -vVec(1) * v2x2_inv, tempH * v2x2_inv; + + CircleFit circle; + circle.par << par_uvr(0) * s_inv + hCentroid(0), par_uvr(1) * s_inv + hCentroid(1), par_uvr(2) * s_inv; + circle.qCharge = charge(hits2D, circle.par); + circle.chi2 = abs(chi2) * renorm / sqr(2 * vVec(2) * par_uvr(2) * tempS); + printIt(&circle.par, "circle_fit - CIRCLE PARAMETERS:"); + printIt(&circle.cov, "circle_fit - CIRCLE COVARIANCE:"); +#ifdef RFIT_DEBUG + printf("circle_fit - CIRCLE CHARGE: %d\n", circle.qCharge); +#endif + +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PROPAGATION\n"); +#endif + // ERROR PROPAGATION + if (error) { +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PRPAGATION ACTIVATED\n"); +#endif + ArrayNd vcsMat[2][2]; // cov matrix of center & scaled points + MatrixNd cMat[3][3]; // cov matrix of 3D transformed points +#ifdef RFIT_DEBUG + printf("circle_fit - ERROR PRPAGATION ACTIVATED 2\n"); +#endif + { + Eigen::Matrix cm; + Eigen::Matrix cm2; + cm = mc.transpose() * vMat * mc; + const double tempC2 = cm(0, 0); + Matrix2Nd tempVcsMat; + tempVcsMat.template triangularView() = + (sqr(tempS) * vMat + sqr(sqr(tempS)) * 1. / (4. * tempQ * n) * + (2. * vMat.squaredNorm() + 4. * tempC2) * // mc.transpose() * V * mc) * + (mc * mc.transpose())); + + printIt(&tempVcsMat, "circle_fit - Vcs:"); + cMat[0][0] = tempVcsMat.block(0, 0, n, n).template selfadjointView(); + vcsMat[0][1] = tempVcsMat.block(0, n, n, n); + cMat[1][1] = tempVcsMat.block(n, n, n, n).template selfadjointView(); + vcsMat[1][0] = vcsMat[0][1].transpose(); + printIt(&tempVcsMat, "circle_fit - Vcs:"); + } + + { + const ArrayNd t0 = (VectorXd::Constant(n, 1.) * p3D.row(0)); + const ArrayNd t1 = (VectorXd::Constant(n, 1.) * p3D.row(1)); + const ArrayNd t00 = p3D.row(0).transpose() * p3D.row(0); + const ArrayNd t01 = p3D.row(0).transpose() * p3D.row(1); + const ArrayNd t11 = p3D.row(1).transpose() * p3D.row(1); + const ArrayNd t10 = t01.transpose(); + vcsMat[0][0] = cMat[0][0]; + cMat[0][1] = vcsMat[0][1]; + cMat[0][2] = 2. * (vcsMat[0][0] * t0 + vcsMat[0][1] * t1); + vcsMat[1][1] = cMat[1][1]; + cMat[1][2] = 2. * (vcsMat[1][0] * t0 + vcsMat[1][1] * t1); + MatrixNd tmp; + tmp.template triangularView() = + (2. * (vcsMat[0][0] * vcsMat[0][0] + vcsMat[0][0] * vcsMat[0][1] + vcsMat[1][1] * vcsMat[1][0] + + vcsMat[1][1] * vcsMat[1][1]) + + 4. * (vcsMat[0][0] * t00 + vcsMat[0][1] * t01 + vcsMat[1][0] * t10 + vcsMat[1][1] * t11)) + .matrix(); + cMat[2][2] = tmp.template selfadjointView(); + } + printIt(&cMat[0][0], "circle_fit - C[0][0]:"); + + Matrix3d c0Mat; // cov matrix of center of gravity (r0.x,r0.y,r0.z) + for (uint i = 0; i < 3; ++i) { + for (uint j = i; j < 3; ++j) { + Eigen::Matrix tmp; + tmp = weight.transpose() * cMat[i][j] * weight; + // Workaround to get things working in GPU + const double tempC = tmp(0, 0); + c0Mat(i, j) = tempC; //weight.transpose() * C[i][j] * weight; + c0Mat(j, i) = c0Mat(i, j); + } + } + printIt(&c0Mat, "circle_fit - C0:"); + + const MatrixNd wMat = weight * weight.transpose(); + const MatrixNd hMat = MatrixNd::Identity().rowwise() - weight.transpose(); + const MatrixNx3d s_v = hMat * p3D.transpose(); + printIt(&wMat, "circle_fit - W:"); + printIt(&hMat, "circle_fit - H:"); + printIt(&s_v, "circle_fit - s_v:"); + + MatrixNd dMat[3][3]; // cov(s_v) + dMat[0][0] = (hMat * cMat[0][0] * hMat.transpose()).cwiseProduct(wMat); + dMat[0][1] = (hMat * cMat[0][1] * hMat.transpose()).cwiseProduct(wMat); + dMat[0][2] = (hMat * cMat[0][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][1] = (hMat * cMat[1][1] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][2] = (hMat * cMat[1][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[2][2] = (hMat * cMat[2][2] * hMat.transpose()).cwiseProduct(wMat); + dMat[1][0] = dMat[0][1].transpose(); + dMat[2][0] = dMat[0][2].transpose(); + dMat[2][1] = dMat[1][2].transpose(); + printIt(&dMat[0][0], "circle_fit - D_[0][0]:"); + + constexpr uint nu[6][2] = {{0, 0}, {0, 1}, {0, 2}, {1, 1}, {1, 2}, {2, 2}}; + + Matrix6d eMat; // cov matrix of the 6 independent elements of A + for (uint a = 0; a < 6; ++a) { + const uint i = nu[a][0], j = nu[a][1]; + for (uint b = a; b < 6; ++b) { + const uint k = nu[b][0], l = nu[b][1]; + VectorNd t0(n); + VectorNd t1(n); + if (l == k) { + t0 = 2. * dMat[j][l] * s_v.col(l); + if (i == j) + t1 = t0; + else + t1 = 2. * dMat[i][l] * s_v.col(l); + } else { + t0 = dMat[j][l] * s_v.col(k) + dMat[j][k] * s_v.col(l); + if (i == j) + t1 = t0; + else + t1 = dMat[i][l] * s_v.col(k) + dMat[i][k] * s_v.col(l); + } + + if (i == j) { + Eigen::Matrix cm; + cm = s_v.col(i).transpose() * (t0 + t1); + // Workaround to get things working in GPU + const double tempC = cm(0, 0); + eMat(a, b) = 0. + tempC; + } else { + Eigen::Matrix cm; + cm = (s_v.col(i).transpose() * t0) + (s_v.col(j).transpose() * t1); + // Workaround to get things working in GPU + const double tempC = cm(0, 0); + eMat(a, b) = 0. + tempC; //(s_v.col(i).transpose() * t0) + (s_v.col(j).transpose() * t1); + } + if (b != a) + eMat(b, a) = eMat(a, b); + } + } + printIt(&eMat, "circle_fit - E:"); + + Eigen::Matrix j2Mat; // Jacobian of min_eigen() (numerically computed) + for (uint a = 0; a < 6; ++a) { + const uint i = nu[a][0], j = nu[a][1]; + Matrix3d delta = Matrix3d::Zero(); + delta(i, j) = delta(j, i) = abs(aMat(i, j) * epsilon); + j2Mat.col(a) = min_eigen3D_fast(aMat + delta); + const int sign = (j2Mat.col(a)(2) > 0) ? 1 : -1; + j2Mat.col(a) = (j2Mat.col(a) * sign - vVec) / delta(i, j); + } + printIt(&j2Mat, "circle_fit - J2:"); + + Matrix4d cvcMat; // joint cov matrix of (v0,v1,v2,c) + { + Matrix3d t0 = j2Mat * eMat * j2Mat.transpose(); + Vector3d t1 = -t0 * r0; + cvcMat.block(0, 0, 3, 3) = t0; + cvcMat.block(0, 3, 3, 1) = t1; + cvcMat.block(3, 0, 1, 3) = t1.transpose(); + Eigen::Matrix cm1; + Eigen::Matrix cm3; + cm1 = (vVec.transpose() * c0Mat * vVec); + // cm2 = (c0Mat.cwiseProduct(t0)).sum(); + cm3 = (r0.transpose() * t0 * r0); + // Workaround to get things working in GPU + const double tempC = cm1(0, 0) + (c0Mat.cwiseProduct(t0)).sum() + cm3(0, 0); + cvcMat(3, 3) = tempC; + // (v.transpose() * c0Mat * v) + (c0Mat.cwiseProduct(t0)).sum() + (r0.transpose() * t0 * r0); + } + printIt(&cvcMat, "circle_fit - Cvc:"); + + Eigen::Matrix j3Mat; // Jacobian (v0,v1,v2,c)->(X0,Y0,R) + { + const double t = 1. / tempH; + j3Mat << -v2x2_inv, 0, vVec(0) * sqr(v2x2_inv) * 2., 0, 0, -v2x2_inv, vVec(1) * sqr(v2x2_inv) * 2., 0, + vVec(0) * v2x2_inv * t, vVec(1) * v2x2_inv * t, + -tempH * sqr(v2x2_inv) * 2. - (2. * tempC + vVec(2)) * v2x2_inv * t, -t; + } + printIt(&j3Mat, "circle_fit - J3:"); + + const RowVector2Nd Jq = mc.transpose() * tempS * 1. / n; // var(q) + printIt(&Jq, "circle_fit - Jq:"); + + Matrix3d cov_uvr = j3Mat * cvcMat * j3Mat.transpose() * sqr(s_inv) // cov(X0,Y0,R) + + (par_uvr * par_uvr.transpose()) * (Jq * vMat * Jq.transpose()); + + circle.cov = cov_uvr; + } + + printIt(&circle.cov, "Circle cov:"); +#ifdef RFIT_DEBUG + printf("circle_fit - exit\n"); +#endif + return circle; + } + + /*! \brief Perform an ordinary least square fit in the s-z plane to compute + * the parameters cotTheta and Zip. + * + * The fit is performed in the rotated S3D-Z' plane, following the formalism of + * Frodesen, Chapter 10, p. 259. + * + * The system has been rotated to both try to use the combined errors in s-z + * along Z', as errors in the Y direction and to avoid the patological case of + * degenerate lines with angular coefficient m = +/- inf. + * + * The rotation is using the information on the theta angle computed in the + * fast fit. The rotation is such that the S3D axis will be the X-direction, + * while the rotated Z-axis will be the Y-direction. This pretty much follows + * what is done in the same fit in the Broken Line approach. + */ + + template + __host__ __device__ inline LineFit lineFit(const M3xN& hits, + const M6xN& hits_ge, + const CircleFit& circle, + const V4& fast_fit, + const double bField, + const bool error) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + constexpr auto n = N; + double theta = -circle.qCharge * atan(fast_fit(3)); + theta = theta < 0. ? theta + M_PI : theta; + + // Prepare the Rotation Matrix to rotate the points + Eigen::Matrix rot; + rot << sin(theta), cos(theta), -cos(theta), sin(theta); + + // PROJECTION ON THE CILINDER + // + // p2D will be: + // [s1, s2, s3, ..., sn] + // [z1, z2, z3, ..., zn] + // s values will be ordinary x-values + // z values will be ordinary y-values + + Matrix2xNd p2D = Matrix2xNd::Zero(); + Eigen::Matrix jxMat; + +#ifdef RFIT_DEBUG + printf("Line_fit - B: %g\n", bField); + printIt(&hits, "Line_fit points: "); + printIt(&hits_ge, "Line_fit covs: "); + printIt(&rot, "Line_fit rot: "); +#endif + // x & associated Jacobian + // cfr https://indico.cern.ch/event/663159/contributions/2707659/attachments/1517175/2368189/Riemann_fit.pdf + // Slide 11 + // a ==> -o i.e. the origin of the circle in XY plane, negative + // b ==> p i.e. distances of the points wrt the origin of the circle. + const Vector2d oVec(circle.par(0), circle.par(1)); + + // associated Jacobian, used in weights and errors computation + Matrix6d covMat = Matrix6d::Zero(); + Matrix2d cov_sz[N]; + for (uint i = 0; i < n; ++i) { + Vector2d pVec = hits.block(0, i, 2, 1) - oVec; + const double cross = cross2D(-oVec, pVec); + const double dot = (-oVec).dot(pVec); + // atan2(cross, dot) give back the angle in the transverse plane so tha the + // final equation reads: x_i = -q*R*theta (theta = angle returned by atan2) + const double tempQAtan2 = -circle.qCharge * atan2(cross, dot); + // p2D.coeffRef(1, i) = atan2_ * circle.par(2); + p2D(0, i) = tempQAtan2 * circle.par(2); + + // associated Jacobian, used in weights and errors- computation + const double temp0 = -circle.qCharge * circle.par(2) * 1. / (sqr(dot) + sqr(cross)); + double d_X0 = 0., d_Y0 = 0., d_R = 0.; // good approximation for big pt and eta + if (error) { + d_X0 = -temp0 * ((pVec(1) + oVec(1)) * dot - (pVec(0) - oVec(0)) * cross); + d_Y0 = temp0 * ((pVec(0) + oVec(0)) * dot - (oVec(1) - pVec(1)) * cross); + d_R = tempQAtan2; + } + const double d_x = temp0 * (oVec(1) * dot + oVec(0) * cross); + const double d_y = temp0 * (-oVec(0) * dot + oVec(1) * cross); + jxMat << d_X0, d_Y0, d_R, d_x, d_y, 0., 0., 0., 0., 0., 0., 1.; + + covMat.block(0, 0, 3, 3) = circle.cov; + covMat(3, 3) = hits_ge.col(i)[0]; // x errors + covMat(4, 4) = hits_ge.col(i)[2]; // y errors + covMat(5, 5) = hits_ge.col(i)[5]; // z errors + covMat(3, 4) = covMat(4, 3) = hits_ge.col(i)[1]; // cov_xy + covMat(3, 5) = covMat(5, 3) = hits_ge.col(i)[3]; // cov_xz + covMat(4, 5) = covMat(5, 4) = hits_ge.col(i)[4]; // cov_yz + Matrix2d tmp = jxMat * covMat * jxMat.transpose(); + cov_sz[i].noalias() = rot * tmp * rot.transpose(); + } + // Math of d_{X0,Y0,R,x,y} all verified by hand + p2D.row(1) = hits.row(2); + + // The following matrix will contain errors orthogonal to the rotated S + // component only, with the Multiple Scattering properly treated!! + MatrixNd cov_with_ms; + scatterCovLine(cov_sz, fast_fit, p2D.row(0), p2D.row(1), theta, bField, cov_with_ms); +#ifdef RFIT_DEBUG + printIt(cov_sz, "line_fit - cov_sz:"); + printIt(&cov_with_ms, "line_fit - cov_with_ms: "); +#endif + + // Rotate Points with the shape [2, n] + Matrix2xNd p2D_rot = rot * p2D; + +#ifdef RFIT_DEBUG + printf("Fast fit Tan(theta): %g\n", fast_fit(3)); + printf("Rotation angle: %g\n", theta); + printIt(&rot, "Rotation Matrix:"); + printIt(&p2D, "Original Hits(s,z):"); + printIt(&p2D_rot, "Rotated hits(S3D, Z'):"); + printIt(&rot, "Rotation Matrix:"); +#endif + + // Build the A Matrix + Matrix2xNd aMat; + aMat << MatrixXd::Ones(1, n), p2D_rot.row(0); // rotated s values + +#ifdef RFIT_DEBUG + printIt(&aMat, "A Matrix:"); +#endif + + // Build A^T V-1 A, where V-1 is the covariance of only the Y components. + MatrixNd vyInvMat; + math::cholesky::invert(cov_with_ms, vyInvMat); + // MatrixNd vyInvMat = cov_with_ms.inverse(); + Eigen::Matrix covParamsMat = aMat * vyInvMat * aMat.transpose(); + // Compute the Covariance Matrix of the fit parameters + math::cholesky::invert(covParamsMat, covParamsMat); + + // Now Compute the Parameters in the form [2,1] + // The first component is q. + // The second component is m. + Eigen::Matrix sol = covParamsMat * aMat * vyInvMat * p2D_rot.row(1).transpose(); + +#ifdef RFIT_DEBUG + printIt(&sol, "Rotated solutions:"); +#endif + + // We need now to transfer back the results in the original s-z plane + const auto sinTheta = sin(theta); + const auto cosTheta = cos(theta); + auto common_factor = 1. / (sinTheta - sol(1, 0) * cosTheta); + Eigen::Matrix jMat; + jMat << 0., common_factor * common_factor, common_factor, sol(0, 0) * cosTheta * common_factor * common_factor; + + double tempM = common_factor * (sol(1, 0) * sinTheta + cosTheta); + double tempQ = common_factor * sol(0, 0); + auto cov_mq = jMat * covParamsMat * jMat.transpose(); + + VectorNd res = p2D_rot.row(1).transpose() - aMat.transpose() * sol; + double chi2 = res.transpose() * vyInvMat * res; + + LineFit line; + line.par << tempM, tempQ; + line.cov << cov_mq; + line.chi2 = chi2; + +#ifdef RFIT_DEBUG + printf("Common_factor: %g\n", common_factor); + printIt(&jMat, "Jacobian:"); + printIt(&sol, "Rotated solutions:"); + printIt(&covParamsMat, "Cov_params:"); + printIt(&cov_mq, "Rotated Covariance Matrix:"); + printIt(&(line.par), "Real Parameters:"); + printIt(&(line.cov), "Real Covariance Matrix:"); + printf("Chi2: %g\n", chi2); +#endif + + return line; + } + + /*! + \brief Helix fit by three step: + -fast pre-fit (see Fast_fit() for further info); \n + -circle fit of hits projected in the transverse plane by Riemann-Chernov + algorithm (see Circle_fit() for further info); \n + -line fit of hits projected on cylinder surface by orthogonal distance + regression (see Line_fit for further info). \n + Points must be passed ordered (from inner to outer layer). + \param hits Matrix3xNd hits coordinates in this form: \n + |x0|x1|x2|...|xn| \n + |y0|y1|y2|...|yn| \n + |z0|z1|z2|...|zn| + \param hits_cov Matrix3Nd covariance matrix in this form (()->cov()): \n + |(x0,x0)|(x1,x0)|(x2,x0)|.|(y0,x0)|(y1,x0)|(y2,x0)|.|(z0,x0)|(z1,x0)|(z2,x0)| \n + |(x0,x1)|(x1,x1)|(x2,x1)|.|(y0,x1)|(y1,x1)|(y2,x1)|.|(z0,x1)|(z1,x1)|(z2,x1)| \n + |(x0,x2)|(x1,x2)|(x2,x2)|.|(y0,x2)|(y1,x2)|(y2,x2)|.|(z0,x2)|(z1,x2)|(z2,x2)| \n + . . . . . . . . . . . \n + |(x0,y0)|(x1,y0)|(x2,y0)|.|(y0,y0)|(y1,y0)|(y2,x0)|.|(z0,y0)|(z1,y0)|(z2,y0)| \n + |(x0,y1)|(x1,y1)|(x2,y1)|.|(y0,y1)|(y1,y1)|(y2,x1)|.|(z0,y1)|(z1,y1)|(z2,y1)| \n + |(x0,y2)|(x1,y2)|(x2,y2)|.|(y0,y2)|(y1,y2)|(y2,x2)|.|(z0,y2)|(z1,y2)|(z2,y2)| \n + . . . . . . . . . . . \n + |(x0,z0)|(x1,z0)|(x2,z0)|.|(y0,z0)|(y1,z0)|(y2,z0)|.|(z0,z0)|(z1,z0)|(z2,z0)| \n + |(x0,z1)|(x1,z1)|(x2,z1)|.|(y0,z1)|(y1,z1)|(y2,z1)|.|(z0,z1)|(z1,z1)|(z2,z1)| \n + |(x0,z2)|(x1,z2)|(x2,z2)|.|(y0,z2)|(y1,z2)|(y2,z2)|.|(z0,z2)|(z1,z2)|(z2,z2)| + \param bField magnetic field in the center of the detector in Gev/cm/c + unit, in order to perform pt calculation. + \param error flag for error computation. + \param scattering flag for multiple scattering treatment. + (see Circle_fit() documentation for further info). + \warning see Circle_fit(), Line_fit() and Fast_fit() warnings. + \bug see Circle_fit(), Line_fit() and Fast_fit() bugs. +*/ + + template + inline HelixFit helixFit(const Matrix3xNd& hits, + const Eigen::Matrix& hits_ge, + const double bField, + const bool error) { + constexpr uint n = N; + VectorNd<4> rad = (hits.block(0, 0, 2, n).colwise().norm()); + + // Fast_fit gives back (X0, Y0, R, theta) w/o errors, using only 3 points. + Vector4d fast_fit; + fastFit(hits, fast_fit); + riemannFit::Matrix2Nd hits_cov = MatrixXd::Zero(2 * n, 2 * n); + riemannFit::loadCovariance2D(hits_ge, hits_cov); + CircleFit circle = circleFit(hits.block(0, 0, 2, n), hits_cov, fast_fit, rad, bField, error); + LineFit line = lineFit(hits, hits_ge, circle, fast_fit, bField, error); + + par_uvrtopak(circle, bField, error); + + HelixFit helix; + helix.par << circle.par, line.par; + if (error) { + helix.cov = MatrixXd::Zero(5, 5); + helix.cov.block(0, 0, 3, 3) = circle.cov; + helix.cov.block(3, 3, 2, 2) = line.cov; + } + helix.qCharge = circle.qCharge; + helix.chi2_circle = circle.chi2; + helix.chi2_line = line.chi2; + + return helix; + } + +} // namespace riemannFit + +#endif // RecoPixelVertexing_PixelTrackFitting_interface_RiemannFit_h diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/BuildFile.xml b/RecoPixelVertexing/PixelTrackFitting/plugins/BuildFile.xml index be113d7a5a3dc..ecfbd99b667fc 100644 --- a/RecoPixelVertexing/PixelTrackFitting/plugins/BuildFile.xml +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/BuildFile.xml @@ -1,3 +1,6 @@ + + + diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelNtupletsFitterProducer.cc b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelNtupletsFitterProducer.cc new file mode 100644 index 0000000000000..f49d2f01f48c6 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelNtupletsFitterProducer.cc @@ -0,0 +1,44 @@ +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelFitter.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelNtupletsFitter.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" + +class PixelNtupletsFitterProducer : public edm::global::EDProducer<> { +public: + explicit PixelNtupletsFitterProducer(const edm::ParameterSet& iConfig) + : useRiemannFit_(iConfig.getParameter("useRiemannFit")), idealMagneticFieldToken_(esConsumes()) { + produces(); + } + ~PixelNtupletsFitterProducer() override {} + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); + descriptions.add("pixelNtupletsFitterDefault", desc); + } + +private: + bool useRiemannFit_; + const edm::ESGetToken idealMagneticFieldToken_; + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; +}; + +void PixelNtupletsFitterProducer::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + auto const& idealField = iSetup.getData(idealMagneticFieldToken_); + float bField = 1 / PixelRecoUtilities::fieldInInvGev(iSetup); + auto impl = std::make_unique(bField, &idealField, useRiemannFit_); + auto prod = std::make_unique(std::move(impl)); + iEvent.put(std::move(prod)); +} + +DEFINE_FWK_MODULE(PixelNtupletsFitterProducer); diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.cc b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.cc index bd390f5f65352..91c3a44cc8643 100644 --- a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.cc +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.cc @@ -1,23 +1,22 @@ -#include "PixelTrackProducer.h" +#include +#include "DataFormats/Common/interface/OrphanHandle.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackReco/interface/TrackExtra.h" +#include "DataFormats/TrackReco/interface/TrackFwd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" - -#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" -#include "DataFormats/TrackReco/interface/Track.h" -#include "DataFormats/TrackReco/interface/TrackFwd.h" -#include "DataFormats/TrackReco/interface/TrackExtra.h" -#include "DataFormats/Common/interface/OrphanHandle.h" - -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "Geometry/Records/interface/TrackerTopologyRcd.h" -#include +#include "PixelTrackProducer.h" +#include "storeTracks.h" using namespace pixeltrackfitting; using edm::ParameterSet; @@ -45,62 +44,9 @@ void PixelTrackProducer::produce(edm::Event& ev, const edm::EventSetup& es) { TracksWithTTRHs tracks; theReconstruction.run(tracks, ev, es); - edm::ESHandle httopo; es.get().get(httopo); // store tracks - store(ev, tracks, *httopo); -} - -void PixelTrackProducer::store(edm::Event& ev, const TracksWithTTRHs& tracksWithHits, const TrackerTopology& ttopo) { - auto tracks = std::make_unique(); - auto recHits = std::make_unique(); - auto trackExtras = std::make_unique(); - - int cc = 0, nTracks = tracksWithHits.size(); - - for (int i = 0; i < nTracks; i++) { - reco::Track* track = tracksWithHits.at(i).first; - const SeedingHitSet& hits = tracksWithHits.at(i).second; - - for (unsigned int k = 0; k < hits.size(); k++) { - TrackingRecHit* hit = hits[k]->hit()->clone(); - - track->appendHitPattern(*hit, ttopo); - recHits->push_back(hit); - } - tracks->push_back(*track); - delete track; - } - - LogDebug("TrackProducer") << "put the collection of TrackingRecHit in the event" - << "\n"; - edm::OrphanHandle ohRH = ev.put(std::move(recHits)); - - edm::RefProd hitCollProd(ohRH); - for (int k = 0; k < nTracks; k++) { - reco::TrackExtra theTrackExtra{}; - - //fill the TrackExtra with TrackingRecHitRef - unsigned int nHits = tracks->at(k).numberOfValidHits(); - theTrackExtra.setHits(hitCollProd, cc, nHits); - cc += nHits; - AlgebraicVector5 v = AlgebraicVector5(0, 0, 0, 0, 0); - reco::TrackExtra::TrajParams trajParams(nHits, LocalTrajectoryParameters(v, 1.)); - reco::TrackExtra::Chi2sFive chi2s(nHits, 0); - theTrackExtra.setTrajParams(std::move(trajParams), std::move(chi2s)); - trackExtras->push_back(theTrackExtra); - } - - LogDebug("TrackProducer") << "put the collection of TrackExtra in the event" - << "\n"; - edm::OrphanHandle ohTE = ev.put(std::move(trackExtras)); - - for (int k = 0; k < nTracks; k++) { - const reco::TrackExtraRef theTrackExtraRef(ohTE, k); - (tracks->at(k)).setExtra(theTrackExtraRef); - } - - ev.put(std::move(tracks)); + storeTracks(ev, tracks, *httopo); } diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.h b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.h index d756a9cf963f5..c38fd44c0d7f5 100644 --- a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.h +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducer.h @@ -1,8 +1,7 @@ -#ifndef PixelTrackProducer_H -#define PixelTrackProducer_H +#ifndef RecoPixelVertexing_PixelTrackFitting_plugins_PixelTrackProducer_h +#define RecoPixelVertexing_PixelTrackFitting_plugins_PixelTrackProducer_h #include "FWCore/Framework/interface/stream/EDProducer.h" -#include "RecoPixelVertexing/PixelTrackFitting/interface/TracksWithHits.h" #include "RecoPixelVertexing/PixelTrackFitting/interface/PixelTrackReconstruction.h" namespace edm { @@ -24,7 +23,7 @@ class PixelTrackProducer : public edm::stream::EDProducer<> { void produce(edm::Event& ev, const edm::EventSetup& es) override; private: - void store(edm::Event& ev, const pixeltrackfitting::TracksWithTTRHs& selectedTracks, const TrackerTopology& ttopo); PixelTrackReconstruction theReconstruction; }; -#endif + +#endif // RecoPixelVertexing_PixelTrackFitting_plugins_PixelTrackProducer_h diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc new file mode 100644 index 0000000000000..94c490e948575 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackProducerFromSoA.cc @@ -0,0 +1,205 @@ +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/Common/interface/OrphanHandle.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackReco/interface/TrackExtra.h" +#include "DataFormats/TrackReco/interface/TrackFwd.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "DataFormats/GeometrySurface/interface/Plane.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" + +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" +#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h" + +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" + +#include "storeTracks.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" + +/** + * This class creates "leagcy" reco::Track + * objects from the output of SoA CA. + */ +class PixelTrackProducerFromSoA : public edm::global::EDProducer<> { +public: + using IndToEdm = std::vector; + + explicit PixelTrackProducerFromSoA(const edm::ParameterSet &iConfig); + ~PixelTrackProducerFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); + + // using HitModuleStart = std::array; + using HMSstorage = HostProduct; + +private: + void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; + + // Event Data tokens + const edm::EDGetTokenT tBeamSpot_; + const edm::EDGetTokenT tokenTrack_; + const edm::EDGetTokenT cpuHits_; + const edm::EDGetTokenT hmsToken_; + // Event Setup tokens + const edm::ESGetToken idealMagneticFieldToken_; + const edm::ESGetToken ttTopoToken_; + + int32_t const minNumberOfHits_; +}; + +PixelTrackProducerFromSoA::PixelTrackProducerFromSoA(const edm::ParameterSet &iConfig) + : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), + tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), + cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + hmsToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + idealMagneticFieldToken_(esConsumes()), + ttTopoToken_(esConsumes()), + minNumberOfHits_(iConfig.getParameter("minNumberOfHits")) { + produces(); + produces(); + produces(); + produces(); +} + +void PixelTrackProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { + edm::ParameterSetDescription desc; + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("trackSrc", edm::InputTag("pixelTrackSoA")); + desc.add("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy")); + desc.add("minNumberOfHits", 0); + + descriptions.addWithDefaultLabel(desc); +} + +void PixelTrackProducerFromSoA::produce(edm::StreamID streamID, + edm::Event &iEvent, + const edm::EventSetup &iSetup) const { + // std::cout << "Converting gpu helix in reco tracks" << std::endl; + + auto indToEdmP = std::make_unique(); + auto &indToEdm = *indToEdmP; + + auto const &idealField = iSetup.getData(idealMagneticFieldToken_); + + pixeltrackfitting::TracksWithRecHits tracks; + + auto const &httopo = iSetup.getData(ttTopoToken_); + + const auto &bsh = iEvent.get(tBeamSpot_); + GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); + + auto const &rechits = iEvent.get(cpuHits_); + std::vector hitmap; + auto const &rcs = rechits.data(); + auto nhits = rcs.size(); + hitmap.resize(nhits, nullptr); + + auto const *hitsModuleStart = iEvent.get(hmsToken_).get(); + auto fc = hitsModuleStart; + + for (auto const &h : rcs) { + auto const &thit = static_cast(h); + auto detI = thit.det()->index(); + auto const &clus = thit.firstClusterRef(); + assert(clus.isPixel()); + auto i = fc[detI] + clus.pixelCluster().originalId(); + if (i >= hitmap.size()) + hitmap.resize(i + 256, nullptr); // only in case of hit overflow in one module + assert(nullptr == hitmap[i]); + hitmap[i] = &h; + } + + std::vector hits; + hits.reserve(5); + + const auto &tsoa = *iEvent.get(tokenTrack_); + + auto const *quality = tsoa.qualityData(); + auto const &fit = tsoa.stateAtBS; + auto const &hitIndices = tsoa.hitIndices; + auto maxTracks = tsoa.stride(); + + int32_t nt = 0; + + for (int32_t it = 0; it < maxTracks; ++it) { + auto nHits = tsoa.nHits(it); + if (nHits == 0) + break; // this is a guard: maybe we need to move to nTracks... + indToEdm.push_back(-1); + auto q = quality[it]; + if (q != pixelTrack::Quality::loose) + continue; + if (nHits < minNumberOfHits_) + continue; + indToEdm.back() = nt; + ++nt; + + hits.resize(nHits); + auto b = hitIndices.begin(it); + for (int iHit = 0; iHit < nHits; ++iHit) + hits[iHit] = hitmap[*(b + iHit)]; + + // mind: this values are respect the beamspot! + + float chi2 = tsoa.chi2(it); + float phi = tsoa.phi(it); + + riemannFit::Vector5d ipar, opar; + riemannFit::Matrix5d icov, ocov; + fit.copyToDense(ipar, icov, it); + riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); + + LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); + AlgebraicSymMatrix55 m; + for (int i = 0; i < 5; ++i) + for (int j = i; j < 5; ++j) + m(i, j) = ocov(i, j); + + float sp = std::sin(phi); + float cp = std::cos(phi); + Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + + Plane impPointPlane(bs, rot); + GlobalTrajectoryParameters gp( + impPointPlane.toGlobal(lpar.position()), impPointPlane.toGlobal(lpar.momentum()), lpar.charge(), &idealField); + JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, idealField); + + AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); + + int ndof = 2 * hits.size() - 5; + chi2 = chi2 * ndof; + GlobalPoint vv = gp.position(); + math::XYZPoint pos(vv.x(), vv.y(), vv.z()); + GlobalVector pp = gp.momentum(); + math::XYZVector mom(pp.x(), pp.y(), pp.z()); + + auto track = std::make_unique(chi2, ndof, pos, mom, gp.charge(), CurvilinearTrajectoryError(mo)); + // filter??? + tracks.emplace_back(track.release(), hits); + } + // std::cout << "processed " << nt << " good tuples " << tracks.size() << "out of " << indToEdm.size() << std::endl; + + // store tracks + storeTracks(iEvent, tracks, httopo); + iEvent.put(std::move(indToEdmP)); +} + +DEFINE_FWK_MODULE(PixelTrackProducerFromSoA); diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc new file mode 100644 index 0000000000000..2de8ec6c335b5 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc @@ -0,0 +1,86 @@ +#include + +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/Common/interface/HostProduct.h" +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" + +// Switch on to enable checks and printout for found tracks +#undef PIXEL_DEBUG_PRODUCE + +class PixelTrackSoAFromCUDA : public edm::stream::EDProducer { +public: + explicit PixelTrackSoAFromCUDA(const edm::ParameterSet& iConfig); + ~PixelTrackSoAFromCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; + void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; + + edm::EDGetTokenT> tokenCUDA_; + edm::EDPutTokenT tokenSOA_; + + cms::cuda::host::unique_ptr soa_; +}; + +PixelTrackSoAFromCUDA::PixelTrackSoAFromCUDA(const edm::ParameterSet& iConfig) + : tokenCUDA_(consumes>(iConfig.getParameter("src"))), + tokenSOA_(produces()) {} + +void PixelTrackSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("src", edm::InputTag("caHitNtupletCUDA")); + descriptions.add("pixelTrackSoA", desc); +} + +void PixelTrackSoAFromCUDA::acquire(edm::Event const& iEvent, + edm::EventSetup const& iSetup, + edm::WaitingTaskWithArenaHolder waitingTaskHolder) { + cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + auto const& inputData = ctx.get(inputDataWrapped); + + soa_ = inputData.toHostAsync(ctx.stream()); +} + +void PixelTrackSoAFromCUDA::produce(edm::Event& iEvent, edm::EventSetup const& iSetup) { +#ifdef PIXEL_DEBUG_PRODUCE + auto const& tsoa = *soa_; + auto maxTracks = tsoa.stride(); + std::cout << "size of SoA" << sizeof(tsoa) << " stride " << maxTracks << std::endl; + + int32_t nt = 0; + for (int32_t it = 0; it < maxTracks; ++it) { + auto nHits = tsoa.nHits(it); + assert(nHits == int(tsoa.hitIndices.size(it))); + if (nHits == 0) + break; // this is a guard: maybe we need to move to nTracks... + nt++; + } + std::cout << "found " << nt << " tracks in cpu SoA at " << &tsoa << std::endl; +#endif + + // DO NOT make a copy (actually TWO....) + iEvent.emplace(tokenSOA_, PixelTrackHeterogeneous(std::move(soa_))); + + assert(!soa_); +} + +DEFINE_FWK_MODULE(PixelTrackSoAFromCUDA); diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/storeTracks.h b/RecoPixelVertexing/PixelTrackFitting/plugins/storeTracks.h new file mode 100644 index 0000000000000..59101b6ba5214 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/storeTracks.h @@ -0,0 +1,72 @@ +#ifndef RecoPixelVertexingPixelTrackFittingStoreTracks_H +#define RecoPixelVertexingPixelTrackFittingStoreTracks_H + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "DataFormats/TrackReco/interface/Track.h" +#include "DataFormats/TrackReco/interface/TrackFwd.h" +#include "DataFormats/TrackReco/interface/TrackExtra.h" +#include "DataFormats/Common/interface/OrphanHandle.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/TracksWithHits.h" + +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" + +template +void storeTracks(Ev& ev, const TWH& tracksWithHits, const TrackerTopology& ttopo) { + auto tracks = std::make_unique(); + auto recHits = std::make_unique(); + auto trackExtras = std::make_unique(); + + int cc = 0, nTracks = tracksWithHits.size(); + + for (int i = 0; i < nTracks; i++) { + reco::Track* track = tracksWithHits[i].first; + const auto& hits = tracksWithHits[i].second; + + for (unsigned int k = 0; k < hits.size(); k++) { + auto* hit = hits[k]->clone(); + + track->appendHitPattern(*hit, ttopo); + recHits->push_back(hit); + } + tracks->push_back(*track); + delete track; + } + + LogDebug("TrackProducer") << "put the collection of TrackingRecHit in the event" + << "\n"; + edm::OrphanHandle ohRH = ev.put(std::move(recHits)); + + edm::RefProd hitCollProd(ohRH); + for (int k = 0; k < nTracks; k++) { + reco::TrackExtra theTrackExtra{}; + + //fill the TrackExtra with TrackingRecHitRef + unsigned int nHits = tracks->at(k).numberOfValidHits(); + theTrackExtra.setHits(hitCollProd, cc, nHits); + cc += nHits; + AlgebraicVector5 v = AlgebraicVector5(0, 0, 0, 0, 0); + reco::TrackExtra::TrajParams trajParams(nHits, LocalTrajectoryParameters(v, 1.)); + reco::TrackExtra::Chi2sFive chi2s(nHits, 0); + theTrackExtra.setTrajParams(std::move(trajParams), std::move(chi2s)); + trackExtras->push_back(theTrackExtra); + } + + LogDebug("TrackProducer") << "put the collection of TrackExtra in the event" + << "\n"; + edm::OrphanHandle ohTE = ev.put(std::move(trackExtras)); + + for (int k = 0; k < nTracks; k++) { + const reco::TrackExtraRef theTrackExtraRef(ohTE, k); + (tracks->at(k)).setExtra(theTrackExtraRef); + } + + ev.put(std::move(tracks)); +} + +#endif diff --git a/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py b/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py index 4334d724358f3..5ff404cb603d4 100644 --- a/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py +++ b/RecoPixelVertexing/PixelTrackFitting/python/PixelTracks_cff.py @@ -11,6 +11,7 @@ from RecoTracker.TkSeedingLayers.PixelLayerTriplets_cfi import * from RecoTracker.TkSeedingLayers.TTRHBuilderWithoutAngle4PixelTriplets_cfi import * from RecoPixelVertexing.PixelTrackFitting.pixelFitterByHelixProjections_cfi import pixelFitterByHelixProjections +from RecoPixelVertexing.PixelTrackFitting.pixelNtupletsFitter_cfi import pixelNtupletsFitter from RecoPixelVertexing.PixelTrackFitting.pixelTrackFilterByKinematics_cfi import pixelTrackFilterByKinematics from RecoPixelVertexing.PixelTrackFitting.pixelTrackCleanerBySharedHits_cfi import pixelTrackCleanerBySharedHits from RecoPixelVertexing.PixelTrackFitting.pixelTracks_cfi import pixelTracks as _pixelTracks @@ -76,4 +77,26 @@ _pixelTracksTask_lowPU.replace(pixelTracksHitQuadruplets, pixelTracksHitTriplets) trackingLowPU.toReplaceWith(pixelTracksTask, _pixelTracksTask_lowPU) +# Use ntuple fit and substitute previous Fitter producer with the ntuple one +from Configuration.ProcessModifiers.pixelNtupleFit_cff import pixelNtupleFit as ntupleFit +ntupleFit.toModify(pixelTracks, Fitter = "pixelNtupletsFitter") +_pixelTracksTask_ntupleFit = pixelTracksTask.copy() +_pixelTracksTask_ntupleFit.replace(pixelFitterByHelixProjections, pixelNtupletsFitter) +ntupleFit.toReplaceWith(pixelTracksTask, _pixelTracksTask_ntupleFit) + + +from Configuration.ProcessModifiers.gpu_cff import gpu +from RecoPixelVertexing.PixelTriplets.caHitNtupletCUDA_cfi import caHitNtupletCUDA +from RecoPixelVertexing.PixelTrackFitting.pixelTrackSoA_cfi import pixelTrackSoA +from RecoPixelVertexing.PixelTrackFitting.pixelTrackProducerFromSoA_cfi import pixelTrackProducerFromSoA as _pixelTrackFromSoA +_pixelTracksGPUTask = cms.Task( + caHitNtupletCUDA, + pixelTrackSoA, + pixelTracks # FromSoA +) + +gpu.toReplaceWith(pixelTracksTask, _pixelTracksGPUTask) +gpu.toReplaceWith(pixelTracks,_pixelTrackFromSoA) + + pixelTracksSequence = cms.Sequence(pixelTracksTask) diff --git a/RecoPixelVertexing/PixelTrackFitting/python/pixelNtupletsFitter_cfi.py b/RecoPixelVertexing/PixelTrackFitting/python/pixelNtupletsFitter_cfi.py new file mode 100644 index 0000000000000..10e1e3852e9c4 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/python/pixelNtupletsFitter_cfi.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from RecoPixelVertexing.PixelTrackFitting.pixelNtupletsFitterDefault_cfi import pixelNtupletsFitterDefault + +pixelNtupletsFitter = pixelNtupletsFitterDefault.clone() + diff --git a/RecoPixelVertexing/PixelTrackFitting/src/PixelNtupletsFitter.cc b/RecoPixelVertexing/PixelTrackFitting/src/PixelNtupletsFitter.cc new file mode 100644 index 0000000000000..96f5d5fe03448 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/src/PixelNtupletsFitter.cc @@ -0,0 +1,102 @@ +#include "CommonTools/Utils/interface/DynArray.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/GlobalError.h" +#include "DataFormats/GeometryCommonDetAlgo/interface/Measurement1D.h" +#include "DataFormats/GeometryVector/interface/GlobalPoint.h" +#include "DataFormats/GeometryVector/interface/LocalPoint.h" +#include "DataFormats/GeometryVector/interface/Pi.h" +#include "DataFormats/TrackingRecHit/interface/TrackingRecHit.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelNtupletsFitter.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelTrackBuilder.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/PixelTrackErrorParam.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" + +using namespace std; + +PixelNtupletsFitter::PixelNtupletsFitter(float nominalB, const MagneticField* field, bool useRiemannFit) + : nominalB_(nominalB), field_(field), useRiemannFit_(useRiemannFit) {} + +std::unique_ptr PixelNtupletsFitter::run(const std::vector& hits, + const TrackingRegion& region, + const edm::EventSetup&) const { + using namespace riemannFit; + + std::unique_ptr ret; + + unsigned int nhits = hits.size(); + + if (nhits < 2) + return ret; + + declareDynArray(GlobalPoint, nhits, points); + declareDynArray(GlobalError, nhits, errors); + declareDynArray(bool, nhits, isBarrel); + + for (unsigned int i = 0; i != nhits; ++i) { + auto const& recHit = hits[i]; + points[i] = GlobalPoint(recHit->globalPosition().basicVector() - region.origin().basicVector()); + errors[i] = recHit->globalPositionError(); + isBarrel[i] = recHit->detUnit()->type().isBarrel(); + } + + assert(nhits == 4); + riemannFit::Matrix3xNd<4> hits_gp; + + Eigen::Matrix hits_ge = Eigen::Matrix::Zero(); + + for (unsigned int i = 0; i < nhits; ++i) { + hits_gp.col(i) << points[i].x(), points[i].y(), points[i].z(); + + hits_ge.col(i) << errors[i].cxx(), errors[i].cyx(), errors[i].cyy(), errors[i].czx(), errors[i].czy(), + errors[i].czz(); + } + + HelixFit fittedTrack = useRiemannFit_ ? riemannFit::helixFit(hits_gp, hits_ge, nominalB_, true) + : brokenline::helixFit(hits_gp, hits_ge, nominalB_); + + int iCharge = fittedTrack.qCharge; + + // parameters are: + // 0: phi + // 1: tip + // 2: curvature + // 3: cottheta + // 4: zip + float valPhi = fittedTrack.par(0); + + float valTip = fittedTrack.par(1); + + float valCotTheta = fittedTrack.par(3); + + float valZip = fittedTrack.par(4); + float valPt = fittedTrack.par(2); + // + // PixelTrackErrorParam param(valEta, valPt); + float errValPhi = std::sqrt(fittedTrack.cov(0, 0)); + float errValTip = std::sqrt(fittedTrack.cov(1, 1)); + + float errValPt = std::sqrt(fittedTrack.cov(2, 2)); + + float errValCotTheta = std::sqrt(fittedTrack.cov(3, 3)); + float errValZip = std::sqrt(fittedTrack.cov(4, 4)); + + float chi2 = fittedTrack.chi2_line + fittedTrack.chi2_circle; + + PixelTrackBuilder builder; + Measurement1D phi(valPhi, errValPhi); + Measurement1D tip(valTip, errValTip); + + Measurement1D pt(valPt, errValPt); + Measurement1D cotTheta(valCotTheta, errValCotTheta); + Measurement1D zip(valZip, errValZip); + + ret.reset(builder.build(pt, phi, cotTheta, tip, zip, chi2, iCharge, hits, field_, region.origin())); + return ret; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/BuildFile.xml b/RecoPixelVertexing/PixelTrackFitting/test/BuildFile.xml index 44820da381dd1..f45da7a1880de 100644 --- a/RecoPixelVertexing/PixelTrackFitting/test/BuildFile.xml +++ b/RecoPixelVertexing/PixelTrackFitting/test/BuildFile.xml @@ -1,8 +1,80 @@ - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoPixelVertexing/PixelTrackFitting/test/PixelTrackFits.cc b/RecoPixelVertexing/PixelTrackFitting/test/PixelTrackFits.cc new file mode 100644 index 0000000000000..e5a652e9d43f8 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/PixelTrackFits.cc @@ -0,0 +1,431 @@ +#define _USE_MATH_DEFINES + +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef USE_BL +#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" +#else +#include "RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h" +#endif + +using namespace std; +using namespace Eigen; +using namespace riemannFit; +using std::unique_ptr; + +namespace riemannFit { + using Vector3i = Eigen::Matrix; + using Vector4i = Eigen::Matrix; + using Vector6d = Eigen::Matrix; + using Vector8d = Eigen::Matrix; +}; // namespace riemannFit + +// quadruplets... +struct hits_gen { + Matrix3xNd<4> hits; + Eigen::Matrix hits_ge; + Vector5d true_par; +}; + +struct geometry { + Vector8d barrel; + Vector4i barrel_2; + Vector8d R_err; + Vector8d Rp_err; + Vector8d z_err; + Vector6d hand; + Vector3i hand_2; + Vector6d xy_err; + Vector6d zh_err; + double z_max; + double r_max; +}; + +void test_helix_fit(); + +constexpr int c_speed = 299792458; +constexpr double pi = M_PI; +default_random_engine generator(1); + +void smearing(const Vector5d& err, const bool& isbarrel, double& x, double& y, double& z) { + normal_distribution dist_R(0., err[0]); + normal_distribution dist_Rp(0., err[1]); + normal_distribution dist_z(0., err[2]); + normal_distribution dist_xyh(0., err[3]); + normal_distribution dist_zh(0., err[4]); + if (isbarrel) { + double dev_Rp = dist_Rp(generator); + double dev_R = dist_R(generator); + double R = sqrt(riemannFit::sqr(x) + riemannFit::sqr(y)); + x += dev_Rp * +y / R + dev_R * -x / R; + y += dev_Rp * -x / R + dev_R * -y / R; + z += dist_z(generator); + } else { + x += dist_xyh(generator); + y += dist_xyh(generator); + z += dist_zh(generator); + } +} + +template +void Hits_cov(Eigen::Matrix& V, + const unsigned int& i, + const unsigned int& n, + const Matrix3xNd& hits, + const Vector5d& err, + bool isbarrel) { + if (isbarrel) { + double R2 = riemannFit::sqr(hits(0, i)) + riemannFit::sqr(hits(1, i)); + V.col(i)[0] = (riemannFit::sqr(err[1]) * riemannFit::sqr(hits(1, i)) + + riemannFit::sqr(err[0]) * riemannFit::sqr(hits(0, i))) / + R2; + V.col(i)[2] = (riemannFit::sqr(err[1]) * riemannFit::sqr(hits(0, i)) + + riemannFit::sqr(err[0]) * riemannFit::sqr(hits(1, i))) / + R2; + V.col(i)[1] = (riemannFit::sqr(err[0]) - riemannFit::sqr(err[1])) * hits(1, i) * hits(0, i) / R2; + V.col(i)[5] = riemannFit::sqr(err[2]); + } else { + V.col(i)[0] = riemannFit::sqr(err[3]); + V.col(i)[2] = riemannFit::sqr(err[3]); + V.col(i)[5] = riemannFit::sqr(err[4]); + } +} + +hits_gen Hits_gen(const unsigned int& n, const Matrix& gen_par) { + hits_gen gen; + gen.hits = MatrixXd::Zero(3, n); + gen.hits_ge = Eigen::Matrix::Zero(); + // err /= 10000.; + constexpr double rad[8] = {2.95, 6.8, 10.9, 16., 3.1, 7., 11., 16.2}; + // constexpr double R_err[8] = {5./10000, 5./10000, 5./10000, 5./10000, 5./10000, + // 5./10000, 5./10000, 5./10000}; constexpr double Rp_err[8] = {35./10000, 18./10000, + // 15./10000, 34./10000, 35./10000, 18./10000, 15./10000, 34./10000}; constexpr double z_err[8] = + // {72./10000, 38./10000, 25./10000, 56./10000, 72./10000, 38./10000, 25./10000, 56./10000}; + constexpr double R_err[8] = { + 10. / 10000, 10. / 10000, 10. / 10000, 10. / 10000, 10. / 10000, 10. / 10000, 10. / 10000, 10. / 10000}; + constexpr double Rp_err[8] = { + 35. / 10000, 18. / 10000, 15. / 10000, 34. / 10000, 35. / 10000, 18. / 10000, 15. / 10000, 34. / 10000}; + constexpr double z_err[8] = { + 72. / 10000, 38. / 10000, 25. / 10000, 56. / 10000, 72. / 10000, 38. / 10000, 25. / 10000, 56. / 10000}; + const double x2 = gen_par(0) + gen_par(4) * cos(gen_par(3) * pi / 180); + const double y2 = gen_par(1) + gen_par(4) * sin(gen_par(3) * pi / 180); + const double alpha = atan2(y2, x2); + + for (unsigned int i = 0; i < n; ++i) { + const double a = gen_par(4); + const double b = rad[i]; + const double c = sqrt(riemannFit::sqr(x2) + riemannFit::sqr(y2)); + const double beta = acos((riemannFit::sqr(a) - riemannFit::sqr(b) - riemannFit::sqr(c)) / (-2. * b * c)); + const double gamma = alpha + beta; + gen.hits(0, i) = rad[i] * cos(gamma); + gen.hits(1, i) = rad[i] * sin(gamma); + gen.hits(2, i) = + gen_par(2) + + 1 / tan(gen_par(5) * pi / 180) * 2. * + asin(sqrt(riemannFit::sqr((gen_par(0) - gen.hits(0, i))) + riemannFit::sqr((gen_par(1) - gen.hits(1, i)))) / + (2. * gen_par(4))) * + gen_par(4); + // isbarrel(i) = ?? + Vector5d err; + err << R_err[i], Rp_err[i], z_err[i], 0, 0; + smearing(err, true, gen.hits(0, i), gen.hits(1, i), gen.hits(2, i)); + Hits_cov(gen.hits_ge, i, n, gen.hits, err, true); + } + + return gen; +} + +Vector5d True_par(const Matrix& gen_par, const int& charge, const double& B_field) { + Vector5d true_par; + const double x0 = gen_par(0) + gen_par(4) * cos(gen_par(3) * pi / 180); + const double y0 = gen_par(1) + gen_par(4) * sin(gen_par(3) * pi / 180); + CircleFit circle; + circle.par << x0, y0, gen_par(4); + circle.qCharge = 1; + riemannFit::par_uvrtopak(circle, B_field, false); + true_par.block(0, 0, 3, 1) = circle.par; + true_par(3) = 1 / tan(gen_par(5) * pi / 180); + const int dir = ((gen_par(0) - cos(true_par(0) - pi / 2) * true_par(1)) * (gen_par(1) - y0) - + (gen_par(1) - sin(true_par(0) - pi / 2) * true_par(1)) * (gen_par(0) - x0) > + 0) + ? -1 + : 1; + true_par(4) = gen_par(2) + 1 / tan(gen_par(5) * pi / 180) * dir * 2.f * + asin(sqrt(riemannFit::sqr((gen_par(0) - cos(true_par(0) - pi / 2) * true_par(1))) + + riemannFit::sqr((gen_par(1) - sin(true_par(0) - pi / 2) * true_par(1)))) / + (2.f * gen_par(4))) * + gen_par(4); + return true_par; +} + +Matrix New_par(const Matrix& gen_par, const int& charge, const double& B_field) { + Matrix new_par; + new_par.block(0, 0, 3, 1) = gen_par.block(0, 0, 3, 1); + new_par(3) = gen_par(3) - charge * 90; + new_par(4) = gen_par(4) / B_field; + // new_par(5) = atan(sinh(gen_par(5))) * 180 / pi; + new_par(5) = 2. * atan(exp(-gen_par(5))) * 180 / pi; + return new_par; +} + +template +void computePull(std::array& fit, const char* label, int n_, int iteration, const Vector5d& true_par) { + Eigen::Matrix score(41, iteration); + + std::string histo_name("Phi Pull"); + histo_name += label; + TH1F phi_pull(histo_name.data(), histo_name.data(), 100, -10., 10.); + histo_name = "dxy Pull "; + histo_name += label; + TH1F dxy_pull(histo_name.data(), histo_name.data(), 100, -10., 10.); + histo_name = "dz Pull "; + histo_name += label; + TH1F dz_pull(histo_name.data(), histo_name.data(), 100, -10., 10.); + histo_name = "Theta Pull "; + histo_name += label; + TH1F theta_pull(histo_name.data(), histo_name.data(), 100, -10., 10.); + histo_name = "Pt Pull "; + histo_name += label; + TH1F pt_pull(histo_name.data(), histo_name.data(), 100, -10., 10.); + histo_name = "Phi Error "; + histo_name += label; + TH1F phi_error(histo_name.data(), histo_name.data(), 100, 0., 0.1); + histo_name = "dxy error "; + histo_name += label; + TH1F dxy_error(histo_name.data(), histo_name.data(), 100, 0., 0.1); + histo_name = "dz error "; + histo_name += label; + TH1F dz_error(histo_name.data(), histo_name.data(), 100, 0., 0.1); + histo_name = "Theta error "; + histo_name += label; + TH1F theta_error(histo_name.data(), histo_name.data(), 100, 0., 0.1); + histo_name = "Pt error "; + histo_name += label; + TH1F pt_error(histo_name.data(), histo_name.data(), 100, 0., 0.1); + for (int x = 0; x < iteration; x++) { + // Compute PULLS information + score(0, x) = (fit[x].par(0) - true_par(0)) / sqrt(fit[x].cov(0, 0)); + score(1, x) = (fit[x].par(1) - true_par(1)) / sqrt(fit[x].cov(1, 1)); + score(2, x) = (fit[x].par(2) - true_par(2)) / sqrt(fit[x].cov(2, 2)); + score(3, x) = (fit[x].par(3) - true_par(3)) / sqrt(fit[x].cov(3, 3)); + score(4, x) = (fit[x].par(4) - true_par(4)) / sqrt(fit[x].cov(4, 4)); + phi_pull.Fill(score(0, x)); + dxy_pull.Fill(score(1, x)); + pt_pull.Fill(score(2, x)); + theta_pull.Fill(score(3, x)); + dz_pull.Fill(score(4, x)); + phi_error.Fill(sqrt(fit[x].cov(0, 0))); + dxy_error.Fill(sqrt(fit[x].cov(1, 1))); + pt_error.Fill(sqrt(fit[x].cov(2, 2))); + theta_error.Fill(sqrt(fit[x].cov(3, 3))); + dz_error.Fill(sqrt(fit[x].cov(4, 4))); + score(5, x) = (fit[x].par(0) - true_par(0)) * (fit[x].par(1) - true_par(1)) / (fit[x].cov(0, 1)); + score(6, x) = (fit[x].par(0) - true_par(0)) * (fit[x].par(2) - true_par(2)) / (fit[x].cov(0, 2)); + score(7, x) = (fit[x].par(1) - true_par(1)) * (fit[x].par(2) - true_par(2)) / (fit[x].cov(1, 2)); + score(8, x) = (fit[x].par(3) - true_par(3)) * (fit[x].par(4) - true_par(4)) / (fit[x].cov(3, 4)); + score(9, x) = fit[x].chi2_circle; + score(25, x) = fit[x].chi2_line; + score(10, x) = sqrt(fit[x].cov(0, 0)) / fit[x].par(0) * 100; + score(13, x) = sqrt(fit[x].cov(3, 3)) / fit[x].par(3) * 100; + score(14, x) = sqrt(fit[x].cov(4, 4)) / fit[x].par(4) * 100; + score(15, x) = + (fit[x].par(0) - true_par(0)) * (fit[x].par(3) - true_par(3)) / sqrt(fit[x].cov(0, 0)) / sqrt(fit[x].cov(3, 3)); + score(16, x) = + (fit[x].par(1) - true_par(1)) * (fit[x].par(3) - true_par(3)) / sqrt(fit[x].cov(1, 1)) / sqrt(fit[x].cov(3, 3)); + score(17, x) = + (fit[x].par(2) - true_par(2)) * (fit[x].par(3) - true_par(3)) / sqrt(fit[x].cov(2, 2)) / sqrt(fit[x].cov(3, 3)); + score(18, x) = + (fit[x].par(0) - true_par(0)) * (fit[x].par(4) - true_par(4)) / sqrt(fit[x].cov(0, 0)) / sqrt(fit[x].cov(4, 4)); + score(19, x) = + (fit[x].par(1) - true_par(1)) * (fit[x].par(4) - true_par(4)) / sqrt(fit[x].cov(1, 1)) / sqrt(fit[x].cov(4, 4)); + score(20, x) = + (fit[x].par(2) - true_par(2)) * (fit[x].par(4) - true_par(4)) / sqrt(fit[x].cov(2, 2)) / sqrt(fit[x].cov(4, 4)); + score(21, x) = + (fit[x].par(0) - true_par(0)) * (fit[x].par(1) - true_par(1)) / sqrt(fit[x].cov(0, 0)) / sqrt(fit[x].cov(1, 1)); + score(22, x) = + (fit[x].par(0) - true_par(0)) * (fit[x].par(2) - true_par(2)) / sqrt(fit[x].cov(0, 0)) / sqrt(fit[x].cov(2, 2)); + score(23, x) = + (fit[x].par(1) - true_par(1)) * (fit[x].par(2) - true_par(2)) / sqrt(fit[x].cov(1, 1)) / sqrt(fit[x].cov(2, 2)); + score(24, x) = + (fit[x].par(3) - true_par(3)) * (fit[x].par(4) - true_par(4)) / sqrt(fit[x].cov(3, 3)) / sqrt(fit[x].cov(4, 4)); + score(30, x) = fit[x].par(0); + score(31, x) = fit[x].par(1); + score(32, x) = fit[x].par(2); + score(33, x) = fit[x].par(3); + score(34, x) = fit[x].par(4); + score(35, x) = sqrt(fit[x].cov(0, 0)); + score(36, x) = sqrt(fit[x].cov(1, 1)); + score(37, x) = sqrt(fit[x].cov(2, 2)); + score(38, x) = sqrt(fit[x].cov(3, 3)); + score(39, x) = sqrt(fit[x].cov(4, 4)); + } + + double phi_ = score.row(0).mean(); + double a_ = score.row(1).mean(); + double pt_ = score.row(2).mean(); + double coT_ = score.row(3).mean(); + double Zip_ = score.row(4).mean(); + std::cout << std::setprecision(5) << std::scientific << label << " AVERAGE FITTED VALUES: \n" + << "phi: " << score.row(30).mean() << " +/- " << score.row(35).mean() << " [+/-] " + << sqrt(score.row(35).array().abs2().mean() - score.row(35).mean() * score.row(35).mean()) << std::endl + << "d0: " << score.row(31).mean() << " +/- " << score.row(36).mean() << " [+/-] " + << sqrt(score.row(36).array().abs2().mean() - score.row(36).mean() * score.row(36).mean()) << std::endl + << "pt: " << score.row(32).mean() << " +/- " << score.row(37).mean() << " [+/-] " + << sqrt(score.row(37).array().abs2().mean() - score.row(37).mean() * score.row(37).mean()) << std::endl + << "coT: " << score.row(33).mean() << " +/- " << score.row(38).mean() << " [+/-] " + << sqrt(score.row(38).array().abs2().mean() - score.row(38).mean() * score.row(38).mean()) << std::endl + << "Zip: " << score.row(34).mean() << " +/- " << score.row(39).mean() << " [+/-] " + << sqrt(score.row(39).array().abs2().mean() - score.row(39).mean() * score.row(39).mean()) << std::endl; + + Matrix5d correlation; + correlation << 1., score.row(21).mean(), score.row(22).mean(), score.row(15).mean(), score.row(20).mean(), + score.row(21).mean(), 1., score.row(23).mean(), score.row(16).mean(), score.row(19).mean(), score.row(22).mean(), + score.row(23).mean(), 1., score.row(17).mean(), score.row(20).mean(), score.row(15).mean(), score.row(16).mean(), + score.row(17).mean(), 1., score.row(24).mean(), score.row(18).mean(), score.row(19).mean(), score.row(20).mean(), + score.row(24).mean(), 1.; + + cout << "\n" + << label << " PULLS (mean, sigma, relative_error):\n" + << "phi: " << phi_ << " " << sqrt((score.row(0).array() - phi_).square().sum() / (iteration - 1)) << " " + << abs(score.row(10).mean()) << "%\n" + << "a0 : " << a_ << " " << sqrt((score.row(1).array() - a_).square().sum() / (iteration - 1)) << " " + << abs(score.row(11).mean()) << "%\n" + << "pt : " << pt_ << " " << sqrt((score.row(2).array() - pt_).square().sum() / (iteration - 1)) << " " + << abs(score.row(12).mean()) << "%\n" + << "coT: " << coT_ << " " << sqrt((score.row(3).array() - coT_).square().sum() / (iteration - 1)) << " " + << abs(score.row(13).mean()) << "%\n" + << "Zip: " << Zip_ << " " << sqrt((score.row(4).array() - Zip_).square().sum() / (iteration - 1)) << " " + << abs(score.row(14).mean()) << "%\n\n" + << "cov(phi,a0)_: " << score.row(5).mean() << "\n" + << "cov(phi,pt)_: " << score.row(6).mean() << "\n" + << "cov(a0,pt)_: " << score.row(7).mean() << "\n" + << "cov(coT,Zip)_: " << score.row(8).mean() << "\n\n" + << "chi2_circle: " << score.row(9).mean() << " vs " << n_ - 3 << "\n" + << "chi2_line: " << score.row(25).mean() << " vs " << n_ - 2 << "\n\n" + << "correlation matrix:\n" + << correlation << "\n\n" + << endl; + + phi_pull.Fit("gaus", "Q"); + dxy_pull.Fit("gaus", "Q"); + dz_pull.Fit("gaus", "Q"); + theta_pull.Fit("gaus", "Q"); + pt_pull.Fit("gaus", "Q"); + phi_pull.Write(); + dxy_pull.Write(); + dz_pull.Write(); + theta_pull.Write(); + pt_pull.Write(); + phi_error.Write(); + dxy_error.Write(); + dz_error.Write(); + theta_error.Write(); + pt_error.Write(); +} + +void test_helix_fit(bool getcin) { + int n_; + const double B_field = 3.8 * c_speed / pow(10, 9) / 100; + Matrix gen_par; + Vector5d true_par; + Vector5d err; + generator.seed(1); + std::cout << std::setprecision(6); + cout << "_________________________________________________________________________\n"; + cout << "n x(cm) y(cm) z(cm) phi(grad) R(Gev/c) eta iteration debug" << endl; + if (getcin) { + cout << "hits: "; + cin >> n_; + cout << "x: "; + cin >> gen_par(0); + cout << "y: "; + cin >> gen_par(1); + cout << "z: "; + cin >> gen_par(2); + cout << "phi: "; + cin >> gen_par(3); + cout << "p_t: "; + cin >> gen_par(4); + cout << "eta: "; + cin >> gen_par(5); + } else { + n_ = 4; + gen_par(0) = -0.1; // x + gen_par(1) = 0.1; // y + gen_par(2) = -1.; // z + gen_par(3) = 45.; // phi + gen_par(4) = 10.; // R (p_t) + gen_par(5) = 1.; // eta + } + + const int iteration = 5000; + gen_par = New_par(gen_par, 1, B_field); + true_par = True_par(gen_par, 1, B_field); + std::array helixRiemann_fit; + + std::cout << "\nTrue parameters: " + << "phi: " << true_par(0) << " " + << "dxy: " << true_par(1) << " " + << "pt: " << true_par(2) << " " + << "CotT: " << true_par(3) << " " + << "Zip: " << true_par(4) << " " << std::endl; + auto start = std::chrono::high_resolution_clock::now(); + auto delta = start - start; + for (int i = 0; i < 100 * iteration; i++) { + hits_gen gen; + gen = Hits_gen(n_, gen_par); + // gen.hits = MatrixXd::Zero(3, 4); + // gen.hits_cov = MatrixXd::Zero(3 * 4, 3 * 4); + // gen.hits.col(0) << 1.82917642593, 2.0411875248, 7.18495464325; + // gen.hits.col(1) << 4.47041416168, 4.82704305649, 18.6394691467; + // gen.hits.col(2) << 7.25991010666, 7.74653434753, 30.6931324005; + // gen.hits.col(3) << 8.99161434174, 9.54262828827, 38.1338043213; + delta -= std::chrono::high_resolution_clock::now() - start; + helixRiemann_fit[i % iteration] = +#ifdef USE_BL + brokenline::helixFit(gen.hits, gen.hits_ge, B_field); +#else + riemannFit::helixFit(gen.hits, gen.hits_ge, B_field, true); +#endif + delta += std::chrono::high_resolution_clock::now() - start; + + if (helixRiemann_fit[i % iteration].par(0) > 10.) + std::cout << "error" << std::endl; + if (0 == i) + cout << std::setprecision(6) << "phi: " << helixRiemann_fit[i].par(0) << " +/- " + << sqrt(helixRiemann_fit[i].cov(0, 0)) << " vs " << true_par(0) << endl + << "Tip: " << helixRiemann_fit[i].par(1) << " +/- " << sqrt(helixRiemann_fit[i].cov(1, 1)) << " vs " + << true_par(1) << endl + << "p_t: " << helixRiemann_fit[i].par(2) << " +/- " << sqrt(helixRiemann_fit[i].cov(2, 2)) << " vs " + << true_par(2) << endl + << "theta:" << helixRiemann_fit[i].par(3) << " +/- " << sqrt(helixRiemann_fit[i].cov(3, 3)) << " vs " + << true_par(3) << endl + << "Zip: " << helixRiemann_fit[i].par(4) << " +/- " << sqrt(helixRiemann_fit[i].cov(4, 4)) << " vs " + << true_par(4) << endl + << "charge:" << helixRiemann_fit[i].qCharge << " vs 1" << endl + << "covariance matrix:" << endl + << helixRiemann_fit[i].cov << endl + << "Initial hits:\n" + << gen.hits << endl + << "Initial Covariance:\n" + << gen.hits_ge << endl; + } + std::cout << "elapsted time " << double(std::chrono::duration_cast(delta).count()) / 1.e6 + << std::endl; + computePull(helixRiemann_fit, "Riemann", n_, iteration, true_par); +} + +int main(int nargs, char**) { + TFile f("TestFitResults.root", "RECREATE"); + test_helix_fit(nargs > 1); + f.Close(); + return 0; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu new file mode 100644 index 0000000000000..d5eba9be26594 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu @@ -0,0 +1,343 @@ +#include + +#include +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" + +#ifdef USE_BL +#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" +#else +#include "RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h" +#endif + +#include "test_common.h" + +using namespace Eigen; + +namespace riemannFit { + constexpr uint32_t maxNumberOfTracks() { return 5 * 1024; } + constexpr uint32_t stride() { return maxNumberOfTracks(); } + // hits + template + using Matrix3xNd = Eigen::Matrix; + template + using Map3xNd = Eigen::Map, 0, Eigen::Stride<3 * stride(), stride()>>; + // errors + template + using Matrix6xNf = Eigen::Matrix; + template + using Map6xNf = Eigen::Map, 0, Eigen::Stride<6 * stride(), stride()>>; + // fast fit + using Map4d = Eigen::Map>; + +} // namespace riemannFit + +template +__global__ void kernelPrintSizes(double* __restrict__ phits, float* __restrict__ phits_ge) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, 4); + riemannFit::Map6xNf hits_ge(phits_ge + i, 6, 4); + if (i != 0) + return; + printf("GPU sizes %lu %lu %lu %lu %lu\n", + sizeof(hits[i]), + sizeof(hits_ge[i]), + sizeof(Vector4d), + sizeof(riemannFit::LineFit), + sizeof(riemannFit::CircleFit)); +} + +template +__global__ void kernelFastFit(double* __restrict__ phits, double* __restrict__ presults) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, N); + riemannFit::Map4d result(presults + i, 4); +#ifdef USE_BL + brokenline::fastFit(hits, result); +#else + riemannFit::fastFit(hits, result); +#endif +} + +#ifdef USE_BL + +template +__global__ void kernelBrokenLineFit(double* __restrict__ phits, + float* __restrict__ phits_ge, + double* __restrict__ pfast_fit_input, + double B, + riemannFit::CircleFit* circle_fit, + riemannFit::LineFit* line_fit) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, N); + riemannFit::Map4d fast_fit_input(pfast_fit_input + i, 4); + riemannFit::Map6xNf hits_ge(phits_ge + i, 6, N); + + brokenline::PreparedBrokenLineData data; + riemannFit::Matrix3d Jacob; + + auto& line_fit_results = line_fit[i]; + auto& circle_fit_results = circle_fit[i]; + + brokenline::prepareBrokenLineData(hits, fast_fit_input, B, data); + brokenline::lineFit(hits_ge, fast_fit_input, B, data, line_fit_results); + brokenline::circleFit(hits, hits_ge, fast_fit_input, B, data, circle_fit_results); + Jacob << 1., 0, 0, 0, 1., 0, 0, 0, + -B / std::copysign(riemannFit::sqr(circle_fit_results.par(2)), circle_fit_results.par(2)); + circle_fit_results.par(2) = B / std::abs(circle_fit_results.par(2)); + circle_fit_results.cov = Jacob * circle_fit_results.cov * Jacob.transpose(); + +#ifdef TEST_DEBUG + if (0 == i) { + printf("Circle param %f,%f,%f\n", circle_fit[i].par(0), circle_fit[i].par(1), circle_fit[i].par(2)); + } +#endif +} + +#else + +template +__global__ void kernel_CircleFit(double* __restrict__ phits, + float* __restrict__ phits_ge, + double* __restrict__ pfast_fit_input, + double B, + riemannFit::CircleFit* circle_fit_resultsGPU) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, N); + riemannFit::Map4d fast_fit_input(pfast_fit_input + i, 4); + riemannFit::Map6xNf hits_ge(phits_ge + i, 6, N); + + constexpr auto n = N; + + riemannFit::VectorNd rad = (hits.block(0, 0, 2, n).colwise().norm()); + riemannFit::Matrix2Nd hits_cov = MatrixXd::Zero(2 * n, 2 * n); + riemannFit::loadCovariance2D(hits_ge, hits_cov); + +#ifdef TEST_DEBUG + if (0 == i) { + printf("hits %f, %f\n", hits.block(0, 0, 2, n)(0, 0), hits.block(0, 0, 2, n)(0, 1)); + printf("hits %f, %f\n", hits.block(0, 0, 2, n)(1, 0), hits.block(0, 0, 2, n)(1, 1)); + printf("fast_fit_input(0): %f\n", fast_fit_input(0)); + printf("fast_fit_input(1): %f\n", fast_fit_input(1)); + printf("fast_fit_input(2): %f\n", fast_fit_input(2)); + printf("fast_fit_input(3): %f\n", fast_fit_input(3)); + printf("rad(0,0): %f\n", rad(0, 0)); + printf("rad(1,1): %f\n", rad(1, 1)); + printf("rad(2,2): %f\n", rad(2, 2)); + printf("hits_cov(0,0): %f\n", (*hits_cov)(0, 0)); + printf("hits_cov(1,1): %f\n", (*hits_cov)(1, 1)); + printf("hits_cov(2,2): %f\n", (*hits_cov)(2, 2)); + printf("hits_cov(11,11): %f\n", (*hits_cov)(11, 11)); + printf("B: %f\n", B); + } +#endif + circle_fit_resultsGPU[i] = riemannFit::circleFit(hits.block(0, 0, 2, n), hits_cov, fast_fit_input, rad, B, true); +#ifdef TEST_DEBUG + if (0 == i) { + printf("Circle param %f,%f,%f\n", + circle_fit_resultsGPU[i].par(0), + circle_fit_resultsGPU[i].par(1), + circle_fit_resultsGPU[i].par(2)); + } +#endif +} + +template +__global__ void kernelLineFit(double* __restrict__ phits, + float* __restrict__ phits_ge, + double B, + riemannFit::CircleFit* circle_fit, + double* __restrict__ pfast_fit_input, + riemannFit::LineFit* line_fit) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, N); + riemannFit::Map4d fast_fit_input(pfast_fit_input + i, 4); + riemannFit::Map6xNf hits_ge(phits_ge + i, 6, N); + line_fit[i] = riemannFit::lineFit(hits, hits_ge, circle_fit[i], fast_fit_input, B, true); +} +#endif + +template +__device__ __host__ void fillHitsAndHitsCov(M3xN& hits, M6xN& hits_ge) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + + if (N == 5) { + hits << 2.934787, 6.314229, 8.936963, 10.360559, 12.856387, 0.773211, 1.816356, 2.765734, 3.330824, 4.422212, + -10.980247, -23.162731, -32.759060, -38.061260, -47.518867; + hits_ge.col(0) << 1.424715e-07, -4.996975e-07, 1.752614e-06, 3.660689e-11, 1.644638e-09, 7.346080e-05; + hits_ge.col(1) << 6.899177e-08, -1.873414e-07, 5.087101e-07, -2.078806e-10, -2.210498e-11, 4.346079e-06; + hits_ge.col(2) << 1.406273e-06, 4.042467e-07, 6.391180e-07, -3.141497e-07, 6.513821e-08, 1.163863e-07; + hits_ge.col(3) << 1.176358e-06, 2.154100e-07, 5.072816e-07, -8.161219e-08, 1.437878e-07, 5.951832e-08; + hits_ge.col(4) << 2.852843e-05, 7.956492e-06, 3.117701e-06, -1.060541e-06, 8.777413e-09, 1.426417e-07; + return; + } + + if (N > 3) + hits << 1.98645, 4.72598, 7.65632, 11.3151, 2.18002, 4.88864, 7.75845, 11.3134, 2.46338, 6.99838, 11.808, 17.793; + else + hits << 1.98645, 4.72598, 7.65632, 2.18002, 4.88864, 7.75845, 2.46338, 6.99838, 11.808; + + hits_ge.col(0)[0] = 7.14652e-06; + hits_ge.col(1)[0] = 2.15789e-06; + hits_ge.col(2)[0] = 1.63328e-06; + if (N > 3) + hits_ge.col(3)[0] = 6.27919e-06; + hits_ge.col(0)[2] = 6.10348e-06; + hits_ge.col(1)[2] = 2.08211e-06; + hits_ge.col(2)[2] = 1.61672e-06; + if (N > 3) + hits_ge.col(3)[2] = 6.28081e-06; + hits_ge.col(0)[5] = 5.184e-05; + hits_ge.col(1)[5] = 1.444e-05; + hits_ge.col(2)[5] = 6.25e-06; + if (N > 3) + hits_ge.col(3)[5] = 3.136e-05; + hits_ge.col(0)[1] = -5.60077e-06; + hits_ge.col(1)[1] = -1.11936e-06; + hits_ge.col(2)[1] = -6.24945e-07; + if (N > 3) + hits_ge.col(3)[1] = -5.28e-06; +} + +template +__global__ void kernelFillHitsAndHitsCov(double* __restrict__ phits, float* phits_ge) { + auto i = blockIdx.x * blockDim.x + threadIdx.x; + riemannFit::Map3xNd hits(phits + i, 3, N); + riemannFit::Map6xNf hits_ge(phits_ge + i, 6, N); + hits_ge = MatrixXf::Zero(6, N); + fillHitsAndHitsCov(hits, hits_ge); +} + +template +void testFit() { + constexpr double B = 0.0113921; + riemannFit::Matrix3xNd hits; + riemannFit::Matrix6xNf hits_ge = MatrixXf::Zero(6, N); + double* hitsGPU = nullptr; + ; + float* hits_geGPU = nullptr; + double* fast_fit_resultsGPU = nullptr; + double* fast_fit_resultsGPUret = new double[riemannFit::maxNumberOfTracks() * sizeof(Vector4d)]; + riemannFit::CircleFit* circle_fit_resultsGPU = nullptr; + riemannFit::CircleFit* circle_fit_resultsGPUret = new riemannFit::CircleFit(); + riemannFit::LineFit* line_fit_resultsGPU = nullptr; + riemannFit::LineFit* line_fit_resultsGPUret = new riemannFit::LineFit(); + + fillHitsAndHitsCov(hits, hits_ge); + + std::cout << "sizes " << N << ' ' << sizeof(hits) << ' ' << sizeof(hits_ge) << ' ' << sizeof(Vector4d) << ' ' + << sizeof(riemannFit::LineFit) << ' ' << sizeof(riemannFit::CircleFit) << std::endl; + + std::cout << "Generated hits:\n" << hits << std::endl; + std::cout << "Generated cov:\n" << hits_ge << std::endl; + + // FAST_FIT_CPU +#ifdef USE_BL + Vector4d fast_fit_results; + brokenline::fastFit(hits, fast_fit_results); +#else + Vector4d fast_fit_results; + riemannFit::fastFit(hits, fast_fit_results); +#endif + std::cout << "Fitted values (FastFit, [X0, Y0, R, tan(theta)]):\n" << fast_fit_results << std::endl; + + // for timing purposes we fit 4096 tracks + constexpr uint32_t Ntracks = 4096; + cudaCheck(cudaMalloc(&hitsGPU, riemannFit::maxNumberOfTracks() * sizeof(riemannFit::Matrix3xNd))); + cudaCheck(cudaMalloc(&hits_geGPU, riemannFit::maxNumberOfTracks() * sizeof(riemannFit::Matrix6xNf))); + cudaCheck(cudaMalloc(&fast_fit_resultsGPU, riemannFit::maxNumberOfTracks() * sizeof(Vector4d))); + cudaCheck(cudaMalloc(&line_fit_resultsGPU, riemannFit::maxNumberOfTracks() * sizeof(riemannFit::LineFit))); + cudaCheck(cudaMalloc(&circle_fit_resultsGPU, riemannFit::maxNumberOfTracks() * sizeof(riemannFit::CircleFit))); + + cudaCheck(cudaMemset(fast_fit_resultsGPU, 0, riemannFit::maxNumberOfTracks() * sizeof(Vector4d))); + cudaCheck(cudaMemset(line_fit_resultsGPU, 0, riemannFit::maxNumberOfTracks() * sizeof(riemannFit::LineFit))); + + kernelPrintSizes<<>>(hitsGPU, hits_geGPU); + kernelFillHitsAndHitsCov<<>>(hitsGPU, hits_geGPU); + + // FAST_FIT GPU + kernelFastFit<<>>(hitsGPU, fast_fit_resultsGPU); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy(fast_fit_resultsGPUret, + fast_fit_resultsGPU, + riemannFit::maxNumberOfTracks() * sizeof(Vector4d), + cudaMemcpyDeviceToHost)); + riemannFit::Map4d fast_fit(fast_fit_resultsGPUret + 10, 4); + std::cout << "Fitted values (FastFit, [X0, Y0, R, tan(theta)]): GPU\n" << fast_fit << std::endl; + assert(isEqualFuzzy(fast_fit_results, fast_fit)); + +#ifdef USE_BL + // CIRCLE AND LINE FIT CPU + brokenline::PreparedBrokenLineData data; + brokenline::karimaki_circle_fit circle_fit_results; + riemannFit::LineFit line_fit_results; + riemannFit::Matrix3d Jacob; + brokenline::prepareBrokenLineData(hits, fast_fit_results, B, data); + brokenline::lineFit(hits_ge, fast_fit_results, B, data, line_fit_results); + brokenline::circleFit(hits, hits_ge, fast_fit_results, B, data, circle_fit_results); + Jacob << 1., 0, 0, 0, 1., 0, 0, 0, + -B / std::copysign(riemannFit::sqr(circle_fit_results.par(2)), circle_fit_results.par(2)); + circle_fit_results.par(2) = B / std::abs(circle_fit_results.par(2)); + circle_fit_results.cov = Jacob * circle_fit_results.cov * Jacob.transpose(); + + // fit on GPU + kernelBrokenLineFit + <<>>(hitsGPU, hits_geGPU, fast_fit_resultsGPU, B, circle_fit_resultsGPU, line_fit_resultsGPU); + cudaDeviceSynchronize(); + +#else + // CIRCLE_FIT CPU + riemannFit::VectorNd rad = (hits.block(0, 0, 2, N).colwise().norm()); + + riemannFit::Matrix2Nd hits_cov = riemannFit::Matrix2Nd::Zero(); + riemannFit::loadCovariance2D(hits_ge, hits_cov); + riemannFit::CircleFit circle_fit_results = + riemannFit::circleFit(hits.block(0, 0, 2, N), hits_cov, fast_fit_results, rad, B, true); + + // CIRCLE_FIT GPU + kernel_CircleFit<<>>(hitsGPU, hits_geGPU, fast_fit_resultsGPU, B, circle_fit_resultsGPU); + cudaDeviceSynchronize(); + + // LINE_FIT CPU + riemannFit::LineFit line_fit_results = + riemannFit::lineFit(hits, hits_ge, circle_fit_results, fast_fit_results, B, true); + + kernelLineFit + <<>>(hitsGPU, hits_geGPU, B, circle_fit_resultsGPU, fast_fit_resultsGPU, line_fit_resultsGPU); + cudaDeviceSynchronize(); +#endif + + std::cout << "Fitted values (CircleFit):\n" << circle_fit_results.par << std::endl; + + cudaCheck(cudaMemcpy( + circle_fit_resultsGPUret, circle_fit_resultsGPU, sizeof(riemannFit::CircleFit), cudaMemcpyDeviceToHost)); + std::cout << "Fitted values (CircleFit) GPU:\n" << circle_fit_resultsGPUret->par << std::endl; + assert(isEqualFuzzy(circle_fit_results.par, circle_fit_resultsGPUret->par)); + + std::cout << "Fitted values (LineFit):\n" << line_fit_results.par << std::endl; + // LINE_FIT GPU + cudaCheck( + cudaMemcpy(line_fit_resultsGPUret, line_fit_resultsGPU, sizeof(riemannFit::LineFit), cudaMemcpyDeviceToHost)); + std::cout << "Fitted values (LineFit) GPU:\n" << line_fit_resultsGPUret->par << std::endl; + assert(isEqualFuzzy(line_fit_results.par, line_fit_resultsGPUret->par, N == 5 ? 1e-4 : 1e-6)); // requires fma on CPU + + std::cout << "Fitted cov (CircleFit) CPU:\n" << circle_fit_results.cov << std::endl; + std::cout << "Fitted cov (LineFit): CPU\n" << line_fit_results.cov << std::endl; + std::cout << "Fitted cov (CircleFit) GPU:\n" << circle_fit_resultsGPUret->cov << std::endl; + std::cout << "Fitted cov (LineFit): GPU\n" << line_fit_resultsGPUret->cov << std::endl; +} + +int main(int argc, char* argv[]) { + cms::cudatest::requireDevices(); + + testFit<4>(); + testFit<3>(); + testFit<5>(); + + std::cout << "TEST FIT, NO ERRORS" << std::endl; + + return 0; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu new file mode 100644 index 0000000000000..6ac1088943305 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu @@ -0,0 +1,248 @@ +#include + +#include +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" +#include "test_common.h" + +using namespace Eigen; + +using Matrix5d = Matrix; + +__host__ __device__ void eigenValues(Matrix3d *m, Eigen::SelfAdjointEigenSolver::RealVectorType *ret) { +#if TEST_DEBUG + printf("Matrix(0,0): %f\n", (*m)(0, 0)); + printf("Matrix(1,1): %f\n", (*m)(1, 1)); + printf("Matrix(2,2): %f\n", (*m)(2, 2)); +#endif + SelfAdjointEigenSolver es; + es.computeDirect(*m); + (*ret) = es.eigenvalues(); + return; +} + +__global__ void kernel(Matrix3d *m, Eigen::SelfAdjointEigenSolver::RealVectorType *ret) { + eigenValues(m, ret); +} + +__global__ void kernelInverse3x3(Matrix3d *in, Matrix3d *out) { (*out) = in->inverse(); } + +__global__ void kernelInverse4x4(Matrix4d *in, Matrix4d *out) { (*out) = in->inverse(); } + +__global__ void kernelInverse5x5(Matrix5d *in, Matrix5d *out) { (*out) = in->inverse(); } + +template +__global__ void kernelMultiply(M1 *J, M2 *C, M3 *result) { +// Map res(result->data()); +#if TEST_DEBUG + printf("*** GPU IN ***\n"); +#endif + printIt(J); + printIt(C); + // res.noalias() = (*J) * (*C); + // printIt(&res); + (*result) = (*J) * (*C); +#if TEST_DEBUG + printf("*** GPU OUT ***\n"); +#endif + return; +} + +template +void testMultiply() { + std::cout << "TEST MULTIPLY" << std::endl; + std::cout << "Product of type " << row1 << "x" << col1 << " * " << row2 << "x" << col2 << std::endl; + Eigen::Matrix J; + fillMatrix(J); + Eigen::Matrix C; + fillMatrix(C); + Eigen::Matrix multiply_result = J * C; +#if TEST_DEBUG + std::cout << "Input J:" << std::endl; + printIt(&J); + std::cout << "Input C:" << std::endl; + printIt(&C); + std::cout << "Output:" << std::endl; + printIt(&multiply_result); +#endif + // GPU + Eigen::Matrix *JGPU = nullptr; + Eigen::Matrix *CGPU = nullptr; + Eigen::Matrix *multiply_resultGPU = nullptr; + Eigen::Matrix *multiply_resultGPUret = new Eigen::Matrix(); + + cudaCheck(cudaMalloc((void **)&JGPU, sizeof(Eigen::Matrix))); + cudaCheck(cudaMalloc((void **)&CGPU, sizeof(Eigen::Matrix))); + cudaCheck(cudaMalloc((void **)&multiply_resultGPU, sizeof(Eigen::Matrix))); + cudaCheck(cudaMemcpy(JGPU, &J, sizeof(Eigen::Matrix), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy(CGPU, &C, sizeof(Eigen::Matrix), cudaMemcpyHostToDevice)); + cudaCheck(cudaMemcpy( + multiply_resultGPU, &multiply_result, sizeof(Eigen::Matrix), cudaMemcpyHostToDevice)); + + kernelMultiply<<<1, 1>>>(JGPU, CGPU, multiply_resultGPU); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy( + multiply_resultGPUret, multiply_resultGPU, sizeof(Eigen::Matrix), cudaMemcpyDeviceToHost)); + printIt(multiply_resultGPUret); + assert(isEqualFuzzy(multiply_result, (*multiply_resultGPUret))); +} + +void testInverse3x3() { + std::cout << "TEST INVERSE 3x3" << std::endl; + Matrix3d m; + fillMatrix(m); + m += m.transpose().eval(); + + Matrix3d m_inv = m.inverse(); + Matrix3d *mGPU = nullptr; + Matrix3d *mGPUret = nullptr; + Matrix3d *mCPUret = new Matrix3d(); + +#if TEST_DEBUG + std::cout << "Here is the matrix m:" << std::endl << m << std::endl; + std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl; +#endif + cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix3d))); + cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix3d))); + cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice)); + + kernelInverse3x3<<<1, 1>>>(mGPU, mGPUret); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix3d), cudaMemcpyDeviceToHost)); +#if TEST_DEBUG + std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl; +#endif + assert(isEqualFuzzy(m_inv, *mCPUret)); +} + +void testInverse4x4() { + std::cout << "TEST INVERSE 4x4" << std::endl; + Matrix4d m; + fillMatrix(m); + m += m.transpose().eval(); + + Matrix4d m_inv = m.inverse(); + Matrix4d *mGPU = nullptr; + Matrix4d *mGPUret = nullptr; + Matrix4d *mCPUret = new Matrix4d(); + +#if TEST_DEBUG + std::cout << "Here is the matrix m:" << std::endl << m << std::endl; + std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl; +#endif + cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix4d))); + cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix4d))); + cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix4d), cudaMemcpyHostToDevice)); + + kernelInverse4x4<<<1, 1>>>(mGPU, mGPUret); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix4d), cudaMemcpyDeviceToHost)); +#if TEST_DEBUG + std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl; +#endif + assert(isEqualFuzzy(m_inv, *mCPUret)); +} + +void testInverse5x5() { + std::cout << "TEST INVERSE 5x5" << std::endl; + Matrix5d m; + fillMatrix(m); + m += m.transpose().eval(); + + Matrix5d m_inv = m.inverse(); + Matrix5d *mGPU = nullptr; + Matrix5d *mGPUret = nullptr; + Matrix5d *mCPUret = new Matrix5d(); + +#if TEST_DEBUG + std::cout << "Here is the matrix m:" << std::endl << m << std::endl; + std::cout << "Its inverse is:" << std::endl << m.inverse() << std::endl; +#endif + cudaCheck(cudaMalloc((void **)&mGPU, sizeof(Matrix5d))); + cudaCheck(cudaMalloc((void **)&mGPUret, sizeof(Matrix5d))); + cudaCheck(cudaMemcpy(mGPU, &m, sizeof(Matrix5d), cudaMemcpyHostToDevice)); + + kernelInverse5x5<<<1, 1>>>(mGPU, mGPUret); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy(mCPUret, mGPUret, sizeof(Matrix5d), cudaMemcpyDeviceToHost)); +#if TEST_DEBUG + std::cout << "Its GPU inverse is:" << std::endl << (*mCPUret) << std::endl; +#endif + assert(isEqualFuzzy(m_inv, *mCPUret)); +} + +void testEigenvalues() { + std::cout << "TEST EIGENVALUES" << std::endl; + Matrix3d m; + fillMatrix(m); + m += m.transpose().eval(); + + Matrix3d *m_gpu = nullptr; + Matrix3d *mgpudebug = new Matrix3d(); + Eigen::SelfAdjointEigenSolver::RealVectorType *ret = + new Eigen::SelfAdjointEigenSolver::RealVectorType; + Eigen::SelfAdjointEigenSolver::RealVectorType *ret1 = + new Eigen::SelfAdjointEigenSolver::RealVectorType; + Eigen::SelfAdjointEigenSolver::RealVectorType *ret_gpu = nullptr; + eigenValues(&m, ret); +#if TEST_DEBUG + std::cout << "Generated Matrix M 3x3:\n" << m << std::endl; + std::cout << "The eigenvalues of M are:" << std::endl << (*ret) << std::endl; + std::cout << "*************************\n\n" << std::endl; +#endif + cudaCheck(cudaMalloc((void **)&m_gpu, sizeof(Matrix3d))); + cudaCheck(cudaMalloc((void **)&ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver::RealVectorType))); + cudaCheck(cudaMemcpy(m_gpu, &m, sizeof(Matrix3d), cudaMemcpyHostToDevice)); + + kernel<<<1, 1>>>(m_gpu, ret_gpu); + cudaDeviceSynchronize(); + + cudaCheck(cudaMemcpy(mgpudebug, m_gpu, sizeof(Matrix3d), cudaMemcpyDeviceToHost)); + cudaCheck(cudaMemcpy( + ret1, ret_gpu, sizeof(Eigen::SelfAdjointEigenSolver::RealVectorType), cudaMemcpyDeviceToHost)); +#if TEST_DEBUG + std::cout << "GPU Generated Matrix M 3x3:\n" << (*mgpudebug) << std::endl; + std::cout << "GPU The eigenvalues of M are:" << std::endl << (*ret1) << std::endl; + std::cout << "*************************\n\n" << std::endl; +#endif + assert(isEqualFuzzy(*ret, *ret1)); +} + +int main(int argc, char *argv[]) { + cms::cudatest::requireDevices(); + + testEigenvalues(); + testInverse3x3(); + testInverse4x4(); + testInverse5x5(); + + testMultiply<1, 2, 2, 1>(); + testMultiply<1, 2, 2, 2>(); + testMultiply<1, 2, 2, 3>(); + testMultiply<1, 2, 2, 4>(); + testMultiply<1, 2, 2, 5>(); + testMultiply<2, 1, 1, 2>(); + testMultiply<2, 1, 1, 3>(); + testMultiply<2, 1, 1, 4>(); + testMultiply<2, 1, 1, 5>(); + testMultiply<2, 2, 2, 2>(); + testMultiply<2, 3, 3, 1>(); + testMultiply<2, 3, 3, 2>(); + testMultiply<2, 3, 3, 4>(); + testMultiply<2, 3, 3, 5>(); + testMultiply<3, 2, 2, 3>(); + testMultiply<2, 3, 3, 3>(); // DOES NOT COMPILE W/O PATCHING EIGEN + testMultiply<3, 3, 3, 3>(); + testMultiply<8, 8, 8, 8>(); + testMultiply<3, 4, 4, 3>(); + testMultiply<2, 4, 4, 2>(); + testMultiply<3, 4, 4, 2>(); // DOES NOT COMPILE W/O PATCHING EIGEN + + return 0; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testEigenJacobian.cpp b/RecoPixelVertexing/PixelTrackFitting/test/testEigenJacobian.cpp new file mode 100644 index 0000000000000..a8e040fa0df38 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/testEigenJacobian.cpp @@ -0,0 +1,134 @@ +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h" +#include + +using riemannFit::Matrix5d; +using riemannFit::Vector5d; + +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" + +#include "DataFormats/GeometrySurface/interface/Surface.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" + +#include "DataFormats/GeometrySurface/interface/Plane.h" + +#include "MagneticField/Engine/interface/MagneticField.h" + +namespace { + + struct M5T : public MagneticField { + M5T() : mf(0., 0., 5.) {} + virtual GlobalVector inTesla(const GlobalPoint&) const { return mf; } + + GlobalVector mf; + }; + +} // namespace + +// old pixeltrack version... +Matrix5d transfFast(Matrix5d cov, Vector5d const& p) { + auto sqr = [](auto x) { return x * x; }; + auto sinTheta = 1 / std::sqrt(1 + p(3) * p(3)); + auto cosTheta = p(3) * sinTheta; + cov(2, 2) = sqr(sinTheta) * (cov(2, 2) * sqr(1. / (p(2) * p(2))) + cov(3, 3) * sqr(cosTheta * sinTheta / p(2))); + cov(3, 2) = cov(2, 3) = cov(3, 3) * cosTheta * sqr(sinTheta) / p(2); + // for (int i=0; i<5; ++i) cov(i,2) *= -sinTheta/(p(2)*p(2)); + // for (int i=0; i<5; ++i) cov(2,i) *= -sinTheta/(p(2)*p(2)); + return cov; +} + +Matrix5d loadCov(Vector5d const& e) { + Matrix5d cov; + for (int i = 0; i < 5; ++i) + cov(i, i) = e(i) * e(i); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < i; ++j) { + double v = 0.3 * std::sqrt(cov(i, i) * cov(j, j)); // this makes the matrix pos defined + cov(i, j) = (i + j) % 2 ? -0.4 * v : 0.1 * v; + cov(j, i) = cov(i, j); + } + } + return cov; +} + +#include +int main() { + M5T const mf; + + for (auto charge = -1; charge < 2; charge += 2) + for (auto szip = -1; szip < 2; szip += 2) + for (auto stip = -1; stip < 2; stip += 2) { + Vector5d par0; + par0 << 0.2, 0.1, 3.5, 0.8, 0.1; + Vector5d del0; + del0 << 0.01, 0.01, 0.035, -0.03, -0.01; + //!<(phi,Tip,pt,cotan(theta)),Zip) + par0(1) *= stip; + par0(4) *= szip; + + Matrix5d cov0 = loadCov(del0); + + Vector5d par1; + Vector5d par2; + + Matrix5d cov1; + Matrix5d cov2; + + // Matrix5d covf = transfFast(cov0,par0); + + riemannFit::transformToPerigeePlane(par0, cov0, par1, cov1); + + std::cout << "cov1\n" << cov1 << std::endl; + + LocalTrajectoryParameters lpar(par1(0), par1(1), par1(2), par1(3), par1(4), 1.); + AlgebraicSymMatrix55 m; + for (int i = 0; i < 5; ++i) + for (int j = i; j < 5; ++j) + m(i, j) = cov1(i, j); + + float phi = par0(0); + float sp = std::sin(phi); + float cp = std::cos(phi); + Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + + Surface::PositionType bs(0., 0., 0.); + Plane plane(bs, rot); + GlobalTrajectoryParameters gp( + plane.toGlobal(lpar.position()), plane.toGlobal(lpar.momentum()), lpar.charge(), &mf); + std::cout << "global par " << gp.position() << ' ' << gp.momentum() << ' ' << gp.charge() << std::endl; + JacobianLocalToCurvilinear jl2c(plane, lpar, mf); + std::cout << "jac l2c" << jl2c.jacobian() << std::endl; + + AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); + std::cout << "curv error\n" << mo << std::endl; + + /* + + // not accurate as the perigee plane move as well... + Vector5d del1 = par2-par1; + + + // don't ask: guess + std::cout << "charge " << charge << std::endl; + std::cout << "par0 " << par0.transpose() << std::endl; + std::cout << "del0 " << del0.transpose() << std::endl; + + + std::cout << "par1 " << par1.transpose() << std::endl; + std::cout << "del1 " << del1.transpose() << std::endl; + // std::cout << "del2 " << (J*del0).transpose() << std::endl; + + std::cout << "del1^2 " << (del1.array()*del1.array()).transpose() << std::endl; + std::cout << std::endl; + + std::cout << "cov0\n" << cov0 << std::endl; + std::cout << "cov1\n" << cov1 << std::endl; + std::cout << "cov2\n" << cov2 << std::endl; + */ + + std::cout << std::endl << "----------" << std::endl; + + } // lopp over signs + + return 0; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testFits.cpp b/RecoPixelVertexing/PixelTrackFitting/test/testFits.cpp new file mode 100644 index 0000000000000..7c0dab3be3e00 --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/testFits.cpp @@ -0,0 +1,154 @@ +#include + +#include +#include + +#ifdef USE_BL +#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" +#else +#include "RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h" +#endif + +#include "test_common.h" + +using namespace Eigen; + +namespace riemannFit { + constexpr uint32_t maxNumberOfTracks() { return 5 * 1024; } + constexpr uint32_t stride() { return maxNumberOfTracks(); } + // hits + template + using Matrix3xNd = Eigen::Matrix; + template + using Map3xNd = Eigen::Map, 0, Eigen::Stride<3 * stride(), stride()> >; + // errors + template + using Matrix6xNf = Eigen::Matrix; + template + using Map6xNf = Eigen::Map, 0, Eigen::Stride<6 * stride(), stride()> >; + // fast fit + using Map4d = Eigen::Map >; + +} // namespace riemannFit + +/* +Hit global: 641,0 2: 2.934787,0.773211,-10.980247 +Error: 641,0 2: 1.424715e-07,-4.996975e-07,1.752614e-06,3.660689e-11,1.644638e-09,7.346080e-05 +Hit global: 641,1 104: 6.314229,1.816356,-23.162731 +Error: 641,1 104: 6.899177e-08,-1.873414e-07,5.087101e-07,-2.078806e-10,-2.210498e-11,4.346079e-06 +Hit global: 641,2 1521: 8.936963,2.765734,-32.759060 +Error: 641,2 1521: 1.406273e-06,4.042467e-07,6.391180e-07,-3.141497e-07,6.513821e-08,1.163863e-07 +Hit global: 641,3 1712: 10.360559,3.330824,-38.061260 +Error: 641,3 1712: 1.176358e-06,2.154100e-07,5.072816e-07,-8.161219e-08,1.437878e-07,5.951832e-08 +Hit global: 641,4 1824: 12.856387,4.422212,-47.518867 +Error: 641,4 1824: 2.852843e-05,7.956492e-06,3.117701e-06,-1.060541e-06,8.777413e-09,1.426417e-07 +*/ + +template +void fillHitsAndHitsCov(M3xN& hits, M6xN& hits_ge) { + constexpr uint32_t N = M3xN::ColsAtCompileTime; + + if (N == 5) { + hits << 2.934787, 6.314229, 8.936963, 10.360559, 12.856387, 0.773211, 1.816356, 2.765734, 3.330824, 4.422212, + -10.980247, -23.162731, -32.759060, -38.061260, -47.518867; + hits_ge.col(0) << 1.424715e-07, -4.996975e-07, 1.752614e-06, 3.660689e-11, 1.644638e-09, 7.346080e-05; + hits_ge.col(1) << 6.899177e-08, -1.873414e-07, 5.087101e-07, -2.078806e-10, -2.210498e-11, 4.346079e-06; + hits_ge.col(2) << 1.406273e-06, 4.042467e-07, 6.391180e-07, -3.141497e-07, 6.513821e-08, 1.163863e-07; + hits_ge.col(3) << 1.176358e-06, 2.154100e-07, 5.072816e-07, -8.161219e-08, 1.437878e-07, 5.951832e-08; + hits_ge.col(4) << 2.852843e-05, 7.956492e-06, 3.117701e-06, -1.060541e-06, 8.777413e-09, 1.426417e-07; + return; + } + + if (N > 3) + hits << 1.98645, 4.72598, 7.65632, 11.3151, 2.18002, 4.88864, 7.75845, 11.3134, 2.46338, 6.99838, 11.808, 17.793; + else + hits << 1.98645, 4.72598, 7.65632, 2.18002, 4.88864, 7.75845, 2.46338, 6.99838, 11.808; + + hits_ge.col(0)[0] = 7.14652e-06; + hits_ge.col(1)[0] = 2.15789e-06; + hits_ge.col(2)[0] = 1.63328e-06; + if (N > 3) + hits_ge.col(3)[0] = 6.27919e-06; + hits_ge.col(0)[2] = 6.10348e-06; + hits_ge.col(1)[2] = 2.08211e-06; + hits_ge.col(2)[2] = 1.61672e-06; + if (N > 3) + hits_ge.col(3)[2] = 6.28081e-06; + hits_ge.col(0)[5] = 5.184e-05; + hits_ge.col(1)[5] = 1.444e-05; + hits_ge.col(2)[5] = 6.25e-06; + if (N > 3) + hits_ge.col(3)[5] = 3.136e-05; + hits_ge.col(0)[1] = -5.60077e-06; + hits_ge.col(1)[1] = -1.11936e-06; + hits_ge.col(2)[1] = -6.24945e-07; + if (N > 3) + hits_ge.col(3)[1] = -5.28e-06; +} + +template +void testFit() { + constexpr double B = 0.0113921; + riemannFit::Matrix3xNd hits; + riemannFit::Matrix6xNf hits_ge = MatrixXf::Zero(6, N); + + fillHitsAndHitsCov(hits, hits_ge); + + std::cout << "sizes " << N << ' ' << sizeof(hits) << ' ' << sizeof(hits_ge) << ' ' << sizeof(Vector4d) << std::endl; + + std::cout << "Generated hits:\n" << hits << std::endl; + std::cout << "Generated cov:\n" << hits_ge << std::endl; + + // FAST_FIT_CPU +#ifdef USE_BL + Vector4d fast_fit_results; + brokenline::fastFit(hits, fast_fit_results); +#else + Vector4d fast_fit_results; + riemannFit::fastFit(hits, fast_fit_results); +#endif + std::cout << "Fitted values (FastFit, [X0, Y0, R, tan(theta)]):\n" << fast_fit_results << std::endl; + + // CIRCLE_FIT CPU + +#ifdef USE_BL + brokenline::PreparedBrokenLineData data; + brokenline::karimaki_circle_fit circle_fit_results; + riemannFit::Matrix3d Jacob; + + brokenline::prepareBrokenLineData(hits, fast_fit_results, B, data); + riemannFit::LineFit line_fit_results; + brokenline::lineFit(hits_ge, fast_fit_results, B, data, line_fit_results); + brokenline::circleFit(hits, hits_ge, fast_fit_results, B, data, circle_fit_results); + Jacob << 1., 0, 0, 0, 1., 0, 0, 0, + -B / std::copysign(riemannFit::sqr(circle_fit_results.par(2)), circle_fit_results.par(2)); + circle_fit_results.par(2) = B / std::abs(circle_fit_results.par(2)); + circle_fit_results.cov = Jacob * circle_fit_results.cov * Jacob.transpose(); +#else + riemannFit::VectorNd rad = (hits.block(0, 0, 2, N).colwise().norm()); + riemannFit::Matrix2Nd hits_cov = riemannFit::Matrix2Nd::Zero(); + riemannFit::loadCovariance2D(hits_ge, hits_cov); + riemannFit::CircleFit circle_fit_results = + riemannFit::circleFit(hits.block(0, 0, 2, N), hits_cov, fast_fit_results, rad, B, true); + // LINE_FIT CPU + riemannFit::LineFit line_fit_results = + riemannFit::lineFit(hits, hits_ge, circle_fit_results, fast_fit_results, B, true); + riemannFit::par_uvrtopak(circle_fit_results, B, true); + +#endif + + std::cout << "Fitted values (CircleFit):\n" + << circle_fit_results.par << "\nchi2 " << circle_fit_results.chi2 << std::endl; + std::cout << "Fitted values (LineFit):\n" << line_fit_results.par << "\nchi2 " << line_fit_results.chi2 << std::endl; + + std::cout << "Fitted cov (CircleFit) CPU:\n" << circle_fit_results.cov << std::endl; + std::cout << "Fitted cov (LineFit): CPU\n" << line_fit_results.cov << std::endl; +} + +int main(int argc, char* argv[]) { + testFit<4>(); + testFit<3>(); + testFit<5>(); + + return 0; +} diff --git a/RecoPixelVertexing/PixelTrackFitting/test/test_common.h b/RecoPixelVertexing/PixelTrackFitting/test/test_common.h new file mode 100644 index 0000000000000..6377628b0eeca --- /dev/null +++ b/RecoPixelVertexing/PixelTrackFitting/test/test_common.h @@ -0,0 +1,47 @@ +#ifndef RecoPixelVertexing__PixelTrackFitting__test_common_h +#define RecoPixelVertexing__PixelTrackFitting__test_common_h + +#include +#include +#include + +template +__host__ __device__ void printIt(C* m) { +#ifdef TEST_DEBUG + printf("\nMatrix %dx%d\n", (int)m->rows(), (int)m->cols()); + for (u_int r = 0; r < m->rows(); ++r) { + for (u_int c = 0; c < m->cols(); ++c) { + printf("Matrix(%d,%d) = %f\n", r, c, (*m)(r, c)); + } + } +#endif +} + +template +bool isEqualFuzzy(C1 a, C2 b, double epsilon = 1e-6) { + for (unsigned int i = 0; i < a.rows(); ++i) { + for (unsigned int j = 0; j < a.cols(); ++j) { + assert(std::abs(a(i, j) - b(i, j)) < std::min(std::abs(a(i, j)), std::abs(b(i, j))) * epsilon); + } + } + return true; +} + +bool isEqualFuzzy(double a, double b, double epsilon = 1e-6) { + return std::abs(a - b) < std::min(std::abs(a), std::abs(b)) * epsilon; +} + +template +void fillMatrix(T& t) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> dis(0.0, 2.0); + for (int row = 0; row < t.rows(); ++row) { + for (int col = 0; col < t.cols(); ++col) { + t(row, col) = dis(gen); + } + } + return; +} + +#endif diff --git a/RecoPixelVertexing/PixelTriplets/interface/CAHitQuadrupletGenerator.h b/RecoPixelVertexing/PixelTriplets/interface/CAHitQuadrupletGenerator.h index 9d149533eefbc..deb2beb6099ee 100644 --- a/RecoPixelVertexing/PixelTriplets/interface/CAHitQuadrupletGenerator.h +++ b/RecoPixelVertexing/PixelTriplets/interface/CAHitQuadrupletGenerator.h @@ -42,7 +42,7 @@ class CAHitQuadrupletGenerator { ~CAHitQuadrupletGenerator() = default; static void fillDescriptions(edm::ParameterSetDescription& desc); - static const char* fillDescriptionsLabel() { return "caHitQuadruplet"; } + static const char* fillDescriptionsLabel() { return "caHitQuadrupletDefault"; } void initEvent(const edm::Event& ev, const edm::EventSetup& es); diff --git a/RecoPixelVertexing/PixelTriplets/interface/CircleEq.h b/RecoPixelVertexing/PixelTriplets/interface/CircleEq.h new file mode 100644 index 0000000000000..986fe2e2992b9 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/interface/CircleEq.h @@ -0,0 +1,97 @@ +#ifndef RecoPixelVertexingPixelTripletsCircleEq_H +#define RecoPixelVertexingPixelTripletsCircleEq_H +/** +| 1) circle is parameterized as: | +| C*[(X-Xp)**2+(Y-Yp)**2] - 2*alpha*(X-Xp) - 2*beta*(Y-Yp) = 0 | +| Xp,Yp is a point on the track; | +| C = 1/r0 is the curvature ( sign of C is charge of particle ); | +| alpha & beta are the direction cosines of the radial vector at Xp,Yp | +| i.e. alpha = C*(X0-Xp), | +| beta = C*(Y0-Yp), | +| where center of circle is at X0,Y0. | +| | +| Slope dy/dx of tangent at Xp,Yp is -alpha/beta. | +| 2) the z dimension of the helix is parameterized by gamma = dZ/dSperp | +| this is also the tangent of the pitch angle of the helix. | +| with this parameterization, (alpha,beta,gamma) rotate like a vector. | +| 3) For tracks going inward at (Xp,Yp), C, alpha, beta, and gamma change sign| +| +*/ + +#include + +template +class CircleEq { +public: + CircleEq() {} + + constexpr CircleEq(T x1, T y1, T x2, T y2, T x3, T y3) { compute(x1, y1, x2, y2, x3, y3); } + + constexpr void compute(T x1, T y1, T x2, T y2, T x3, T y3); + + // dca to origin divided by curvature + constexpr T dca0() const { + auto x = m_c * m_xp + m_alpha; + auto y = m_c * m_yp + m_beta; + return std::sqrt(x * x + y * y) - T(1); + } + + // dca to given point (divided by curvature) + constexpr T dca(T x, T y) const { + x = m_c * (m_xp - x) + m_alpha; + y = m_c * (m_yp - y) + m_beta; + return std::sqrt(x * x + y * y) - T(1); + } + + // curvature + constexpr auto curvature() const { return m_c; } + + // alpha and beta + constexpr std::pair cosdir() const { return std::make_pair(m_alpha, m_beta); } + + // alpha and beta af given point + constexpr std::pair cosdir(T x, T y) const { + return std::make_pair(m_alpha - m_c * (x - m_xp), m_beta - m_c * (y - m_yp)); + } + + // center + constexpr std::pair center() const { return std::make_pair(m_xp + m_alpha / m_c, m_yp + m_beta / m_c); } + + constexpr auto radius() const { return T(1) / m_c; } + + T m_xp = 0; + T m_yp = 0; + T m_c = 0; + T m_alpha = 0; + T m_beta = 0; +}; + +template +constexpr void CircleEq::compute(T x1, T y1, T x2, T y2, T x3, T y3) { + bool noflip = std::abs(x3 - x1) < std::abs(y3 - y1); + + auto x1p = noflip ? x1 - x2 : y1 - y2; + auto y1p = noflip ? y1 - y2 : x1 - x2; + auto d12 = x1p * x1p + y1p * y1p; + auto x3p = noflip ? x3 - x2 : y3 - y2; + auto y3p = noflip ? y3 - y2 : x3 - x2; + auto d32 = x3p * x3p + y3p * y3p; + + auto num = x1p * y3p - y1p * x3p; // num also gives correct sign for CT + auto det = d12 * y3p - d32 * y1p; + + auto st2 = (d12 * x3p - d32 * x1p); + auto seq = det * det + st2 * st2; + auto al2 = T(1.) / std::sqrt(seq); + auto be2 = -st2 * al2; + auto ct = T(2.) * num * al2; + al2 *= det; + + m_xp = x2; + m_yp = y2; + m_c = noflip ? ct : -ct; + m_alpha = noflip ? al2 : -be2; + m_beta = noflip ? be2 : -al2; +} + +#endif diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cc new file mode 100644 index 0000000000000..bebfe0e08008e --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cc @@ -0,0 +1,70 @@ +#include "BrokenLineFitOnGPU.h" + +void HelixFitOnGPU::launchBrokenLineKernelsOnCPU(HitsView const* hv, uint32_t hitsInFit, uint32_t maxNumberOfTuples) { + assert(tuples_); + + // Fit internals + auto hitsGPU_ = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double)); + auto hits_geGPU_ = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float)); + auto fast_fit_resultsGPU_ = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // fit triplets + kernel_BLFastFit<3>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 3, offset); + + kernel_BLFit<3>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 3, + offset); + + // fit quads + kernel_BLFastFit<4>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 4, offset); + + kernel_BLFit<4>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 4, + offset); + + if (fit5as4_) { + // fit penta (only first 4) + kernel_BLFastFit<4>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 5, offset); + + kernel_BLFit<4>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 5, + offset); + } else { + // fit penta (all 5) + kernel_BLFastFit<5>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 5, offset); + + kernel_BLFit<5>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 5, + offset); + } + + } // loop on concurrent fits +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu new file mode 100644 index 0000000000000..d2ca583e86bd0 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu @@ -0,0 +1,85 @@ +#include "BrokenLineFitOnGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" + +void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv, + uint32_t hitsInFit, + uint32_t maxNumberOfTuples, + cudaStream_t stream) { + assert(tuples_); + + auto blockSize = 64; + auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; + + // Fit internals + auto hitsGPU_ = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double), stream); + auto hits_geGPU_ = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float), stream); + auto fast_fit_resultsGPU_ = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // fit triplets + kernel_BLFastFit<3><<>>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 3, offset); + cudaCheck(cudaGetLastError()); + + kernel_BLFit<3><<>>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 3, + offset); + cudaCheck(cudaGetLastError()); + + // fit quads + kernel_BLFastFit<4><<>>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 4, offset); + cudaCheck(cudaGetLastError()); + + kernel_BLFit<4><<>>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 4, + offset); + cudaCheck(cudaGetLastError()); + + if (fit5as4_) { + // fit penta (only first 4) + kernel_BLFastFit<4><<>>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 5, offset); + cudaCheck(cudaGetLastError()); + + kernel_BLFit<4><<>>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 5, + offset); + cudaCheck(cudaGetLastError()); + } else { + // fit penta (all 5) + kernel_BLFastFit<5><<>>( + tuples_, tupleMultiplicity_, hv, hitsGPU_.get(), hits_geGPU_.get(), fast_fit_resultsGPU_.get(), 5, offset); + cudaCheck(cudaGetLastError()); + + kernel_BLFit<5><<>>(tupleMultiplicity_, + bField_, + outputSoa_, + hitsGPU_.get(), + hits_geGPU_.get(), + fast_fit_resultsGPU_.get(), + 5, + offset); + cudaCheck(cudaGetLastError()); + } + + } // loop on concurrent fits +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.h new file mode 100644 index 0000000000000..ee5065e81fc45 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.h @@ -0,0 +1,184 @@ +// +// Author: Felice Pantaleo, CERN +// + +// #define BROKENLINE_DEBUG + +#include + +#include + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" + +#include "HelixFitOnGPU.h" + +using HitsOnGPU = TrackingRecHit2DSOAView; +using Tuples = pixelTrack::HitContainer; +using OutputSoA = pixelTrack::TrackSoA; + +// #define BL_DUMP_HITS + +template +__global__ void kernel_BLFastFit(Tuples const *__restrict__ foundNtuplets, + caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity, + HitsOnGPU const *__restrict__ hhp, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit, + uint32_t nHits, + uint32_t offset) { + constexpr uint32_t hitsInFit = N; + + assert(hitsInFit <= nHits); + + assert(hhp); + assert(pfast_fit); + assert(foundNtuplets); + assert(tupleMultiplicity); + + // look in bin for this hit multiplicity + auto local_start = blockIdx.x * blockDim.x + threadIdx.x; + +#ifdef BROKENLINE_DEBUG + if (0 == local_start) { + printf("%d total Ntuple\n", foundNtuplets->nbins()); + printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit); + } +#endif + + for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; + local_idx += gridDim.x * blockDim.x) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it from the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + assert(tkid < foundNtuplets->nbins()); + + assert(foundNtuplets->size(tkid) == nHits); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + +#ifdef BL_DUMP_HITS + __shared__ int done; + done = 0; + __syncthreads(); + bool dump = (foundNtuplets->size(tkid) == 5 && 0 == atomicAdd(&done, 1)); +#endif + + // Prepare data structure + auto const *hitId = foundNtuplets->begin(tkid); + for (unsigned int i = 0; i < hitsInFit; ++i) { + auto hit = hitId[i]; + float ge[6]; + hhp->cpeParams() + .detParams(hhp->detectorIndex(hit)) + .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge); +#ifdef BL_DUMP_HITS + if (dump) { + printf("Hit global: %d: %d hits.col(%d) << %f,%f,%f\n", + tkid, + hhp->detectorIndex(hit), + i, + hhp->xGlobal(hit), + hhp->yGlobal(hit), + hhp->zGlobal(hit)); + printf("Error: %d: %d hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", + tkid, + hhp->detetectorIndex(hit), + i, + ge[0], + ge[1], + ge[2], + ge[3], + ge[4], + ge[5]); + } +#endif + hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit); + hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; + } + brokenline::fastFit(hits, fast_fit); + + // no NaN here.... + assert(fast_fit(0) == fast_fit(0)); + assert(fast_fit(1) == fast_fit(1)); + assert(fast_fit(2) == fast_fit(2)); + assert(fast_fit(3) == fast_fit(3)); + } +} + +template +__global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity, + double bField, + OutputSoA *results, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit, + uint32_t nHits, + uint32_t offset) { + assert(N <= nHits); + + assert(results); + assert(pfast_fit); + + // same as above... + + // look in bin for this hit multiplicity + auto local_start = blockIdx.x * blockDim.x + threadIdx.x; + for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; + local_idx += gridDim.x * blockDim.x) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it for the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + brokenline::PreparedBrokenLineData data; + + brokenline::karimaki_circle_fit circle; + riemannFit::LineFit line; + + brokenline::prepareBrokenLineData(hits, fast_fit, bField, data); + brokenline::lineFit(hits_ge, fast_fit, bField, data, line); + brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle); + + results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid); + results->pt(tkid) = float(bField) / float(std::abs(circle.par(2))); + results->eta(tkid) = asinhf(line.par(0)); + results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5); + +#ifdef BROKENLINE_DEBUG + if (!(circle.chi2 >= 0) || !(line.chi2 >= 0)) + printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2); + printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", + N, + nHits, + tkid, + circle.par(0), + circle.par(1), + circle.par(2)); + printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1)); + printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n", + circle.chi2, + line.chi2, + circle.cov(0, 0), + circle.cov(1, 1), + circle.cov(2, 2), + line.cov(0, 0), + line.cov(1, 1)); +#endif + } +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BuildFile.xml b/RecoPixelVertexing/PixelTriplets/plugins/BuildFile.xml index f76451675de59..3a54cd1134bc2 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/BuildFile.xml +++ b/RecoPixelVertexing/PixelTriplets/plugins/BuildFile.xml @@ -1,10 +1,14 @@ + + + + + + + - + - - - diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h b/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h new file mode 100644 index 0000000000000..5342141d2c9e4 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAConstants.h @@ -0,0 +1,83 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_CAConstants_h +#define RecoPixelVertexing_PixelTriplets_plugins_CAConstants_h + +#include + +#include + +#include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HistoContainer.h" + +//#define ONLY_PHICUT + +// Cellular automaton constants +namespace caConstants { + + // constants +#ifdef ONLY_PHICUT + constexpr uint32_t maxCellNeighbors = 64; + constexpr uint32_t maxCellTracks = 64; + constexpr uint32_t maxNumberOfTuples = 48 * 1024; + constexpr uint32_t maxNumberOfDoublets = 2 * 1024 * 1024; + constexpr uint32_t maxCellsPerHit = 8 * 128; +#else // ONLY_PHICUT + constexpr uint32_t maxCellNeighbors = 36; + constexpr uint32_t maxCellTracks = 48; +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxNumberOfTuples = 3 * 1024; + constexpr uint32_t maxNumberOfDoublets = 128 * 1024; + constexpr uint32_t maxCellsPerHit = 128 / 2; +#else // GPU_SMALL_EVENTS + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxNumberOfTuples = 24 * 1024; + constexpr uint32_t maxNumberOfDoublets = 512 * 1024; + constexpr uint32_t maxCellsPerHit = 128; +#endif // GPU_SMALL_EVENTS +#endif // ONLY_PHICUT + constexpr uint32_t maxNumOfActiveDoublets = maxNumberOfDoublets / 8; + constexpr uint32_t maxNumberOfQuadruplets = maxNumberOfTuples; + + constexpr uint32_t maxNumberOfLayerPairs = 20; + constexpr uint32_t maxNumberOfLayers = 10; + constexpr uint32_t maxTuples = maxNumberOfTuples; + + // Modules constants + constexpr uint32_t max_ladder_bpx0 = 12; + constexpr uint32_t first_ladder_bpx0 = 0; + constexpr float module_length_bpx0 = 6.7f; + constexpr float module_tolerance_bpx0 = 0.4f; // projection to cylinder is inaccurate on BPIX1 + constexpr uint32_t max_ladder_bpx4 = 64; + constexpr uint32_t first_ladder_bpx4 = 84; + constexpr float radius_even_ladder = 15.815f; + constexpr float radius_odd_ladder = 16.146f; + constexpr float module_length_bpx4 = 6.7f; + constexpr float module_tolerance_bpx4 = 0.2f; + constexpr float barrel_z_length = 26.f; + constexpr float forward_z_begin = 32.f; + + // Last indexes + constexpr uint32_t last_bpix1_detIndex = 96; + constexpr uint32_t last_barrel_detIndex = 1184; + + // types + using hindex_type = uint32_t; // FIXME from siPixelRecHitsHeterogeneousProduct + using tindex_type = uint16_t; // for tuples + + using CellNeighbors = cms::cuda::VecArray; + using CellTracks = cms::cuda::VecArray; + + using CellNeighborsVector = cms::cuda::SimpleVector; + using CellTracksVector = cms::cuda::SimpleVector; + + using OuterHitOfCell = cms::cuda::VecArray; + using TuplesContainer = cms::cuda::OneToManyAssoc; + using HitToTuple = + cms::cuda::OneToManyAssoc; // 3.5 should be enough + using TupleMultiplicity = cms::cuda::OneToManyAssoc; + +} // namespace caConstants + +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAConstants_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc new file mode 100644 index 0000000000000..beba54c33f513 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc @@ -0,0 +1,83 @@ +#include + +#include "CUDADataFormats/Common/interface/Product.h" +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/RunningAverage.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" + +#include "CAHitNtupletGeneratorOnGPU.h" +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" + +class CAHitNtupletCUDA : public edm::global::EDProducer<> { +public: + explicit CAHitNtupletCUDA(const edm::ParameterSet& iConfig); + ~CAHitNtupletCUDA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + bool onGPU_; + + edm::EDGetTokenT> tokenHitGPU_; + edm::EDPutTokenT> tokenTrackGPU_; + edm::EDGetTokenT tokenHitCPU_; + edm::EDPutTokenT tokenTrackCPU_; + + CAHitNtupletGeneratorOnGPU gpuAlgo_; +}; + +CAHitNtupletCUDA::CAHitNtupletCUDA(const edm::ParameterSet& iConfig) + : onGPU_(iConfig.getParameter("onGPU")), gpuAlgo_(iConfig, consumesCollector()) { + if (onGPU_) { + tokenHitGPU_ = + consumes>(iConfig.getParameter("pixelRecHitSrc")); + tokenTrackGPU_ = produces>(); + } else { + tokenHitCPU_ = consumes(iConfig.getParameter("pixelRecHitSrc")); + tokenTrackCPU_ = produces(); + } +} + +void CAHitNtupletCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("onGPU", true); + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingCUDA")); + + CAHitNtupletGeneratorOnGPU::fillDescriptions(desc); + descriptions.add("caHitNtupletCUDA", desc); +} + +void CAHitNtupletCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& es) const { + auto bf = 1. / PixelRecoUtilities::fieldInInvGev(es); + + if (onGPU_) { + auto hHits = iEvent.getHandle(tokenHitGPU_); + + cms::cuda::ScopedContextProduce ctx{*hHits}; + auto const& hits = ctx.get(*hHits); + + ctx.emplace(iEvent, tokenTrackGPU_, gpuAlgo_.makeTuplesAsync(hits, bf, ctx.stream())); + } else { + auto const& hits = iEvent.get(tokenHitCPU_); + iEvent.emplace(tokenTrackCPU_, gpuAlgo_.makeTuples(hits, bf)); + } +} + +DEFINE_FWK_MODULE(CAHitNtupletCUDA); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc new file mode 100644 index 0000000000000..041254e78aa36 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc @@ -0,0 +1,191 @@ +#include "RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h" + +template <> +void CAHitNtupletGeneratorKernelsCPU::printCounters(Counters const *counters) { + kernel_printCounters(counters); +} + +template <> +void CAHitNtupletGeneratorKernelsCPU::fillHitDetIndices(HitsView const *hv, TkSoA *tracks_d, cudaStream_t) { + kernel_fillHitDetIndices(&tracks_d->hitIndices, hv, &tracks_d->detIndices); +} + +template <> +void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStream_t stream) { + auto nhits = hh.nHits(); + +#ifdef NTUPLE_DEBUG + std::cout << "building Doublets out of " << nhits << " Hits" << std::endl; +#endif + + // use "nhits" to heuristically dimension the workspace + + // no need to use the Traits allocations, since we know this is being compiled for the CPU + //device_isOuterHitOfCell_ = Traits::template make_unique(std::max(1U, nhits), stream); + device_isOuterHitOfCell_ = std::make_unique(std::max(1U, nhits)); + assert(device_isOuterHitOfCell_.get()); + + auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) + + caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellTracks); + // no need to use the Traits allocations, since we know this is being compiled for the CPU + //cellStorage_ = Traits::template make_unique(cellStorageSize, stream); + cellStorage_ = std::make_unique(cellStorageSize); + device_theCellNeighborsContainer_ = (GPUCACell::CellNeighbors *)cellStorage_.get(); + device_theCellTracksContainer_ = (GPUCACell::CellTracks *)(cellStorage_.get() + caConstants::maxNumOfActiveDoublets * + sizeof(GPUCACell::CellNeighbors)); + + gpuPixelDoublets::initDoublets(device_isOuterHitOfCell_.get(), + nhits, + device_theCellNeighbors_.get(), + device_theCellNeighborsContainer_, + device_theCellTracks_.get(), + device_theCellTracksContainer_); + + // no need to use the Traits allocations, since we know this is being compiled for the CPU + //device_theCells_ = Traits::template make_unique(params_.maxNumberOfDoublets_, stream); + device_theCells_ = std::make_unique(params_.maxNumberOfDoublets_); + if (0 == nhits) + return; // protect against empty events + + // take all layer pairs into account + auto nActualPairs = gpuPixelDoublets::nPairs; + if (not params_.includeJumpingForwardDoublets_) { + // exclude forward "jumping" layer pairs + nActualPairs = gpuPixelDoublets::nPairsForTriplets; + } + if (params_.minHitsPerNtuplet_ > 3) { + // for quadruplets, exclude all "jumping" layer pairs + nActualPairs = gpuPixelDoublets::nPairsForQuadruplets; + } + + assert(nActualPairs <= gpuPixelDoublets::nPairs); + gpuPixelDoublets::getDoubletsFromHisto(device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_theCellTracks_.get(), + hh.view(), + device_isOuterHitOfCell_.get(), + nActualPairs, + params_.idealConditions_, + params_.doClusterCut_, + params_.doZ0Cut_, + params_.doPtCut_, + params_.maxNumberOfDoublets_); +} + +template <> +void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) { + auto *tuples_d = &tracks_d->hitIndices; + auto *quality_d = tracks_d->qualityData(); + + assert(tuples_d && quality_d); + + // zero tuples + cms::cuda::launchZero(tuples_d, cudaStream); + + auto nhits = hh.nHits(); + assert(nhits <= pixelGPUConstants::maxNumberOfHits); + + // std::cout << "N hits " << nhits << std::endl; + // if (nhits<2) std::cout << "too few hits " << nhits << std::endl; + + // + // applying conbinatoric cleaning such as fishbone at this stage is too expensive + // + + kernel_connect(device_hitTuple_apc_, + device_hitToTuple_apc_, // needed only to be reset, ready for next kernel + hh.view(), + device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_isOuterHitOfCell_.get(), + params_.hardCurvCut_, + params_.ptmin_, + params_.CAThetaCutBarrel_, + params_.CAThetaCutForward_, + params_.dcaCutInnerTriplet_, + params_.dcaCutOuterTriplet_); + + if (nhits > 1 && params_.earlyFishbone_) { + gpuPixelDoublets::fishbone( + hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); + } + + kernel_find_ntuplets(hh.view(), + device_theCells_.get(), + device_nCells_, + device_theCellTracks_.get(), + tuples_d, + device_hitTuple_apc_, + quality_d, + params_.minHitsPerNtuplet_); + if (params_.doStats_) + kernel_mark_used(hh.view(), device_theCells_.get(), device_nCells_); + + cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d); + + // remove duplicates (tracks that share a doublet) + kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, quality_d); + + kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get()); + cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream); + kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get()); + + if (nhits > 1 && params_.lateFishbone_) { + gpuPixelDoublets::fishbone( + hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); + } + + if (params_.doStats_) { + kernel_checkOverflows(tuples_d, + device_tupleMultiplicity_.get(), + device_hitToTuple_.get(), + device_hitTuple_apc_, + device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_theCellTracks_.get(), + device_isOuterHitOfCell_.get(), + nhits, + params_.maxNumberOfDoublets_, + counters_); + } +} + +template <> +void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) { + auto const *tuples_d = &tracks_d->hitIndices; + auto *quality_d = tracks_d->qualityData(); + + // classify tracks based on kinematics + kernel_classifyTracks(tuples_d, tracks_d, params_.cuts_, quality_d); + + if (params_.lateFishbone_) { + // apply fishbone cleaning to good tracks + kernel_fishboneCleaner(device_theCells_.get(), device_nCells_, quality_d); + } + + // remove duplicates (tracks that share a doublet) + kernel_fastDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, tracks_d); + + // fill hit->track "map" + kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get()); + cms::cuda::launchFinalize(device_hitToTuple_.get(), cudaStream); + kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get()); + + // remove duplicates (tracks that share a hit) + kernel_tripletCleaner(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get()); + + if (params_.doStats_) { + // counters (add flag???) + kernel_doStatsForHitInTracks(device_hitToTuple_.get(), counters_); + kernel_doStatsForTracks(tuples_d, quality_d, counters_); + } + +#ifdef DUMP_GPU_TK_TUPLES + static std::atomic iev(0); + ++iev; + kernel_print_found_ntuplets(hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); +#endif +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu new file mode 100644 index 0000000000000..179e73f3f23f8 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu @@ -0,0 +1,311 @@ +#include "RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h" + +template <> +void CAHitNtupletGeneratorKernelsGPU::fillHitDetIndices(HitsView const *hv, TkSoA *tracks_d, cudaStream_t cudaStream) { + auto blockSize = 128; + auto numberOfBlocks = (HitContainer::capacity() + blockSize - 1) / blockSize; + + kernel_fillHitDetIndices<<>>( + &tracks_d->hitIndices, hv, &tracks_d->detIndices); + cudaCheck(cudaGetLastError()); +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif +} + +template <> +void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) { + // these are pointer on GPU! + auto *tuples_d = &tracks_d->hitIndices; + auto *quality_d = tracks_d->qualityData(); + + // zero tuples + cms::cuda::launchZero(tuples_d, cudaStream); + + auto nhits = hh.nHits(); + assert(nhits <= pixelGPUConstants::maxNumberOfHits); + + // std::cout << "N hits " << nhits << std::endl; + // if (nhits<2) std::cout << "too few hits " << nhits << std::endl; + + // + // applying conbinatoric cleaning such as fishbone at this stage is too expensive + // + + auto nthTot = 64; + auto stride = 4; + auto blockSize = nthTot / stride; + auto numberOfBlocks = nDoubletBlocks(blockSize); + auto rescale = numberOfBlocks / 65536; + blockSize *= (rescale + 1); + numberOfBlocks = nDoubletBlocks(blockSize); + assert(numberOfBlocks < 65536); + assert(blockSize > 0 && 0 == blockSize % 16); + dim3 blks(1, numberOfBlocks, 1); + dim3 thrs(stride, blockSize, 1); + + kernel_connect<<>>( + device_hitTuple_apc_, + device_hitToTuple_apc_, // needed only to be reset, ready for next kernel + hh.view(), + device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_isOuterHitOfCell_.get(), + params_.hardCurvCut_, + params_.ptmin_, + params_.CAThetaCutBarrel_, + params_.CAThetaCutForward_, + params_.dcaCutInnerTriplet_, + params_.dcaCutOuterTriplet_); + cudaCheck(cudaGetLastError()); + + if (nhits > 1 && params_.earlyFishbone_) { + auto nthTot = 128; + auto stride = 16; + auto blockSize = nthTot / stride; + auto numberOfBlocks = (nhits + blockSize - 1) / blockSize; + dim3 blks(1, numberOfBlocks, 1); + dim3 thrs(stride, blockSize, 1); + gpuPixelDoublets::fishbone<<>>( + hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, false); + cudaCheck(cudaGetLastError()); + } + + blockSize = 64; + numberOfBlocks = (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize; + kernel_find_ntuplets<<>>(hh.view(), + device_theCells_.get(), + device_nCells_, + device_theCellTracks_.get(), + tuples_d, + device_hitTuple_apc_, + quality_d, + params_.minHitsPerNtuplet_); + cudaCheck(cudaGetLastError()); + + if (params_.doStats_) + kernel_mark_used<<>>(hh.view(), device_theCells_.get(), device_nCells_); + cudaCheck(cudaGetLastError()); + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + blockSize = 128; + numberOfBlocks = (HitContainer::totbins() + blockSize - 1) / blockSize; + cms::cuda::finalizeBulk<<>>(device_hitTuple_apc_, tuples_d); + + // remove duplicates (tracks that share a doublet) + numberOfBlocks = nDoubletBlocks(blockSize); + kernel_earlyDuplicateRemover<<>>( + device_theCells_.get(), device_nCells_, tuples_d, quality_d); + cudaCheck(cudaGetLastError()); + + blockSize = 128; + numberOfBlocks = (3 * caConstants::maxTuples / 4 + blockSize - 1) / blockSize; + kernel_countMultiplicity<<>>( + tuples_d, quality_d, device_tupleMultiplicity_.get()); + cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), cudaStream); + kernel_fillMultiplicity<<>>( + tuples_d, quality_d, device_tupleMultiplicity_.get()); + cudaCheck(cudaGetLastError()); + + if (nhits > 1 && params_.lateFishbone_) { + auto nthTot = 128; + auto stride = 16; + auto blockSize = nthTot / stride; + auto numberOfBlocks = (nhits + blockSize - 1) / blockSize; + dim3 blks(1, numberOfBlocks, 1); + dim3 thrs(stride, blockSize, 1); + gpuPixelDoublets::fishbone<<>>( + hh.view(), device_theCells_.get(), device_nCells_, device_isOuterHitOfCell_.get(), nhits, true); + cudaCheck(cudaGetLastError()); + } + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + // free space asap + // device_isOuterHitOfCell_.reset(); +} + +template <> +void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStream_t stream) { + auto nhits = hh.nHits(); + +#ifdef NTUPLE_DEBUG + std::cout << "building Doublets out of " << nhits << " Hits" << std::endl; +#endif + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + // in principle we can use "nhits" to heuristically dimension the workspace... + device_isOuterHitOfCell_ = cms::cuda::make_device_unique(std::max(1U, nhits), stream); + assert(device_isOuterHitOfCell_.get()); + + cellStorage_ = cms::cuda::make_device_unique( + caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) + + caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellTracks), + stream); + device_theCellNeighborsContainer_ = (GPUCACell::CellNeighbors *)cellStorage_.get(); + device_theCellTracksContainer_ = (GPUCACell::CellTracks *)(cellStorage_.get() + caConstants::maxNumOfActiveDoublets * + sizeof(GPUCACell::CellNeighbors)); + + { + int threadsPerBlock = 128; + // at least one block! + int blocks = (std::max(1U, nhits) + threadsPerBlock - 1) / threadsPerBlock; + gpuPixelDoublets::initDoublets<<>>(device_isOuterHitOfCell_.get(), + nhits, + device_theCellNeighbors_.get(), + device_theCellNeighborsContainer_, + device_theCellTracks_.get(), + device_theCellTracksContainer_); + cudaCheck(cudaGetLastError()); + } + + device_theCells_ = cms::cuda::make_device_unique(params_.maxNumberOfDoublets_, stream); + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + + if (0 == nhits) + return; // protect against empty events + + // take all layer pairs into account + auto nActualPairs = gpuPixelDoublets::nPairs; + if (not params_.includeJumpingForwardDoublets_) { + // exclude forward "jumping" layer pairs + nActualPairs = gpuPixelDoublets::nPairsForTriplets; + } + if (params_.minHitsPerNtuplet_ > 3) { + // for quadruplets, exclude all "jumping" layer pairs + nActualPairs = gpuPixelDoublets::nPairsForQuadruplets; + } + + assert(nActualPairs <= gpuPixelDoublets::nPairs); + int stride = 4; + int threadsPerBlock = gpuPixelDoublets::getDoubletsFromHistoMaxBlockSize / stride; + int blocks = (4 * nhits + threadsPerBlock - 1) / threadsPerBlock; + dim3 blks(1, blocks, 1); + dim3 thrs(stride, threadsPerBlock, 1); + gpuPixelDoublets::getDoubletsFromHisto<<>>(device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_theCellTracks_.get(), + hh.view(), + device_isOuterHitOfCell_.get(), + nActualPairs, + params_.idealConditions_, + params_.doClusterCut_, + params_.doZ0Cut_, + params_.doPtCut_, + params_.maxNumberOfDoublets_); + cudaCheck(cudaGetLastError()); + +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif +} + +template <> +void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) { + // these are pointer on GPU! + auto const *tuples_d = &tracks_d->hitIndices; + auto *quality_d = tracks_d->qualityData(); + + auto blockSize = 64; + + // classify tracks based on kinematics + auto numberOfBlocks = nQuadrupletBlocks(blockSize); + kernel_classifyTracks<<>>(tuples_d, tracks_d, params_.cuts_, quality_d); + cudaCheck(cudaGetLastError()); + + if (params_.lateFishbone_) { + // apply fishbone cleaning to good tracks + numberOfBlocks = nDoubletBlocks(blockSize); + kernel_fishboneCleaner<<>>( + device_theCells_.get(), device_nCells_, quality_d); + cudaCheck(cudaGetLastError()); + } + + // remove duplicates (tracks that share a doublet) + numberOfBlocks = nDoubletBlocks(blockSize); + kernel_fastDuplicateRemover<<>>( + device_theCells_.get(), device_nCells_, tuples_d, tracks_d); + cudaCheck(cudaGetLastError()); + + if (params_.minHitsPerNtuplet_ < 4 || params_.doStats_) { + // fill hit->track "map" + numberOfBlocks = nQuadrupletBlocks(blockSize); + kernel_countHitInTracks<<>>( + tuples_d, quality_d, device_hitToTuple_.get()); + cudaCheck(cudaGetLastError()); + cms::cuda::launchFinalize(device_hitToTuple_.get(), cudaStream); + cudaCheck(cudaGetLastError()); + kernel_fillHitInTracks<<>>(tuples_d, quality_d, device_hitToTuple_.get()); + cudaCheck(cudaGetLastError()); + } + if (params_.minHitsPerNtuplet_ < 4) { + // remove duplicates (tracks that share a hit) + numberOfBlocks = (HitToTuple::capacity() + blockSize - 1) / blockSize; + kernel_tripletCleaner<<>>( + hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get()); + cudaCheck(cudaGetLastError()); + } + + if (params_.doStats_) { + auto nhits = hh.nHits(); + numberOfBlocks = (std::max(nhits, params_.maxNumberOfDoublets_) + blockSize - 1) / blockSize; + kernel_checkOverflows<<>>(tuples_d, + device_tupleMultiplicity_.get(), + device_hitToTuple_.get(), + device_hitTuple_apc_, + device_theCells_.get(), + device_nCells_, + device_theCellNeighbors_.get(), + device_theCellTracks_.get(), + device_isOuterHitOfCell_.get(), + nhits, + params_.maxNumberOfDoublets_, + counters_); + cudaCheck(cudaGetLastError()); + } + + if (params_.doStats_) { + // counters (add flag???) + numberOfBlocks = (HitToTuple::capacity() + blockSize - 1) / blockSize; + kernel_doStatsForHitInTracks<<>>(device_hitToTuple_.get(), counters_); + cudaCheck(cudaGetLastError()); + numberOfBlocks = (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize; + kernel_doStatsForTracks<<>>(tuples_d, quality_d, counters_); + cudaCheck(cudaGetLastError()); + } +#ifdef GPU_DEBUG + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); +#endif + +#ifdef DUMP_GPU_TK_TUPLES + static std::atomic iev(0); + ++iev; + kernel_print_found_ntuplets<<<1, 32, 0, cudaStream>>>( + hh.view(), tuples_d, tracks_d, quality_d, device_hitToTuple_.get(), 100, iev); +#endif +} + +template <> +void CAHitNtupletGeneratorKernelsGPU::printCounters(Counters const *counters) { + kernel_printCounters<<<1, 1>>>(counters); +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h new file mode 100644 index 0000000000000..d1a9f3d13a67f --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h @@ -0,0 +1,223 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h +#define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h + +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "GPUCACell.h" + +// #define DUMP_GPU_TK_TUPLES + +namespace cAHitNtupletGenerator { + + // counters + struct Counters { + unsigned long long nEvents; + unsigned long long nHits; + unsigned long long nCells; + unsigned long long nTuples; + unsigned long long nFitTracks; + unsigned long long nGoodTracks; + unsigned long long nUsedHits; + unsigned long long nDupHits; + unsigned long long nKilledCells; + unsigned long long nEmptyCells; + unsigned long long nZeroTrackCells; + }; + + using HitsView = TrackingRecHit2DSOAView; + using HitsOnGPU = TrackingRecHit2DSOAView; + + using HitToTuple = caConstants::HitToTuple; + using TupleMultiplicity = caConstants::TupleMultiplicity; + + using Quality = pixelTrack::Quality; + using TkSoA = pixelTrack::TrackSoA; + using HitContainer = pixelTrack::HitContainer; + + struct QualityCuts { + // chi2 cut = chi2Scale * (chi2Coeff[0] + pT/GeV * (chi2Coeff[1] + pT/GeV * (chi2Coeff[2] + pT/GeV * chi2Coeff[3]))) + float chi2Coeff[4]; + float chi2MaxPt; // GeV + float chi2Scale; + + struct Region { + float maxTip; // cm + float minPt; // GeV + float maxZip; // cm + }; + + Region triplet; + Region quadruplet; + }; + + // params + struct Params { + Params(bool onGPU, + uint32_t minHitsPerNtuplet, + uint32_t maxNumberOfDoublets, + bool useRiemannFit, + bool fit5as4, + bool includeJumpingForwardDoublets, + bool earlyFishbone, + bool lateFishbone, + bool idealConditions, + bool doStats, + bool doClusterCut, + bool doZ0Cut, + bool doPtCut, + float ptmin, + float CAThetaCutBarrel, + float CAThetaCutForward, + float hardCurvCut, + float dcaCutInnerTriplet, + float dcaCutOuterTriplet, + QualityCuts const& cuts) + : onGPU_(onGPU), + minHitsPerNtuplet_(minHitsPerNtuplet), + maxNumberOfDoublets_(maxNumberOfDoublets), + useRiemannFit_(useRiemannFit), + fit5as4_(fit5as4), + includeJumpingForwardDoublets_(includeJumpingForwardDoublets), + earlyFishbone_(earlyFishbone), + lateFishbone_(lateFishbone), + idealConditions_(idealConditions), + doStats_(doStats), + doClusterCut_(doClusterCut), + doZ0Cut_(doZ0Cut), + doPtCut_(doPtCut), + ptmin_(ptmin), + CAThetaCutBarrel_(CAThetaCutBarrel), + CAThetaCutForward_(CAThetaCutForward), + hardCurvCut_(hardCurvCut), + dcaCutInnerTriplet_(dcaCutInnerTriplet), + dcaCutOuterTriplet_(dcaCutOuterTriplet), + cuts_(cuts) {} + + const bool onGPU_; + const uint32_t minHitsPerNtuplet_; + const uint32_t maxNumberOfDoublets_; + const bool useRiemannFit_; + const bool fit5as4_; + const bool includeJumpingForwardDoublets_; + const bool earlyFishbone_; + const bool lateFishbone_; + const bool idealConditions_; + const bool doStats_; + const bool doClusterCut_; + const bool doZ0Cut_; + const bool doPtCut_; + const float ptmin_; + const float CAThetaCutBarrel_; + const float CAThetaCutForward_; + const float hardCurvCut_; + const float dcaCutInnerTriplet_; + const float dcaCutOuterTriplet_; + + // quality cuts + QualityCuts cuts_{// polynomial coefficients for the pT-dependent chi2 cut + {0.68177776, 0.74609577, -0.08035491, 0.00315399}, + // max pT used to determine the chi2 cut + 10., + // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit + 30., + // regional cuts for triplets + { + 0.3, // |Tip| < 0.3 cm + 0.5, // pT > 0.5 GeV + 12.0 // |Zip| < 12.0 cm + }, + // regional cuts for quadruplets + { + 0.5, // |Tip| < 0.5 cm + 0.3, // pT > 0.3 GeV + 12.0 // |Zip| < 12.0 cm + }}; + + }; // Params + +} // namespace cAHitNtupletGenerator + +template +class CAHitNtupletGeneratorKernels { +public: + using Traits = TTraits; + + using QualityCuts = cAHitNtupletGenerator::QualityCuts; + using Params = cAHitNtupletGenerator::Params; + using Counters = cAHitNtupletGenerator::Counters; + + template + using unique_ptr = typename Traits::template unique_ptr; + + using HitsView = TrackingRecHit2DSOAView; + using HitsOnGPU = TrackingRecHit2DSOAView; + using HitsOnCPU = TrackingRecHit2DHeterogeneous; + + using HitToTuple = caConstants::HitToTuple; + using TupleMultiplicity = caConstants::TupleMultiplicity; + + using Quality = pixelTrack::Quality; + using TkSoA = pixelTrack::TrackSoA; + using HitContainer = pixelTrack::HitContainer; + + CAHitNtupletGeneratorKernels(Params const& params) + : params_(params), paramsMaxDoubletes3Quarters_(3 * params.maxNumberOfDoublets_ / 4) {} + ~CAHitNtupletGeneratorKernels() = default; + + TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.get(); } + + void launchKernels(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream); + + void classifyTuples(HitsOnCPU const& hh, TkSoA* tuples_d, cudaStream_t cudaStream); + + void fillHitDetIndices(HitsView const* hv, TkSoA* tuples_d, cudaStream_t cudaStream); + + void buildDoublets(HitsOnCPU const& hh, cudaStream_t stream); + void allocateOnGPU(cudaStream_t stream); + void cleanup(cudaStream_t cudaStream); + + static void printCounters(Counters const* counters); + void setCounters(Counters* counters) { counters_ = counters; } + +private: + Counters* counters_ = nullptr; + + // workspace + unique_ptr cellStorage_; + unique_ptr device_theCellNeighbors_; + caConstants::CellNeighbors* device_theCellNeighborsContainer_; + unique_ptr device_theCellTracks_; + caConstants::CellTracks* device_theCellTracksContainer_; + + unique_ptr device_theCells_; + unique_ptr device_isOuterHitOfCell_; + uint32_t* device_nCells_ = nullptr; + + unique_ptr device_hitToTuple_; + cms::cuda::AtomicPairCounter* device_hitToTuple_apc_ = nullptr; + + cms::cuda::AtomicPairCounter* device_hitTuple_apc_ = nullptr; + + unique_ptr device_tupleMultiplicity_; + + unique_ptr device_storage_; + // params + Params const& params_; + /// Intermediate result avoiding repeated computations. + const uint32_t paramsMaxDoubletes3Quarters_; + /// Compute the number of doublet blocks for block size + inline uint32_t nDoubletBlocks(uint32_t blockSize) { + // We want (3 * params_.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize, but first part is pre-computed. + return (paramsMaxDoubletes3Quarters_ + blockSize - 1) / blockSize; + } + + /// Compute the number of quadruplet blocks for block size + inline uint32_t nQuadrupletBlocks(uint32_t blockSize) { + // caConstants::maxNumberOfQuadruplets is a constexpr, so the compiler will pre compute the 3*max/4 + return (3 * caConstants::maxNumberOfQuadruplets / 4 + blockSize - 1) / blockSize; + } +}; + +using CAHitNtupletGeneratorKernelsGPU = CAHitNtupletGeneratorKernels; +using CAHitNtupletGeneratorKernelsCPU = CAHitNtupletGeneratorKernels; + +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorKernels_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc new file mode 100644 index 0000000000000..96381673388ca --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc @@ -0,0 +1 @@ +#include "CAHitNtupletGeneratorKernelsAlloc.h" diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cu new file mode 100644 index 0000000000000..96381673388ca --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cu @@ -0,0 +1 @@ +#include "CAHitNtupletGeneratorKernelsAlloc.h" diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h new file mode 100644 index 0000000000000..1d19aa43d6e1b --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h @@ -0,0 +1,35 @@ +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "CAHitNtupletGeneratorKernels.h" + +template <> +#ifdef __CUDACC__ +void CAHitNtupletGeneratorKernelsGPU::allocateOnGPU(cudaStream_t stream) { +#else +void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) { +#endif + ////////////////////////////////////////////////////////// + // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) + ////////////////////////////////////////////////////////// + + device_theCellNeighbors_ = Traits::template make_unique(stream); + device_theCellTracks_ = Traits::template make_unique(stream); + + device_hitToTuple_ = Traits::template make_unique(stream); + + device_tupleMultiplicity_ = Traits::template make_unique(stream); + + device_storage_ = Traits::template make_unique(3, stream); + + device_hitTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get(); + device_hitToTuple_apc_ = (cms::cuda::AtomicPairCounter*)device_storage_.get() + 1; + device_nCells_ = (uint32_t*)(device_storage_.get() + 2); + + if constexpr (std::is_same::value) { + cudaCheck(cudaMemsetAsync(device_nCells_, 0, sizeof(uint32_t), stream)); + } else { + *device_nCells_ = 0; + } + cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream); + cms::cuda::launchZero(device_hitToTuple_.get(), stream); // we may wish to keep it in the edm... +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h new file mode 100644 index 0000000000000..7c0cec51b8057 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsImpl.h @@ -0,0 +1,593 @@ +// +// Original Author: Felice Pantaleo, CERN +// + +// #define NTUPLE_DEBUG + +#include +#include + +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" + +#include "CAConstants.h" +#include "CAHitNtupletGeneratorKernels.h" +#include "GPUCACell.h" +#include "gpuFishbone.h" +#include "gpuPixelDoublets.h" + +using HitsOnGPU = TrackingRecHit2DSOAView; +using HitsOnCPU = TrackingRecHit2DCUDA; + +using HitToTuple = caConstants::HitToTuple; +using TupleMultiplicity = caConstants::TupleMultiplicity; + +using Quality = pixelTrack::Quality; +using TkSoA = pixelTrack::TrackSoA; +using HitContainer = pixelTrack::HitContainer; + +__global__ void kernel_checkOverflows(HitContainer const *foundNtuplets, + caConstants::TupleMultiplicity const *tupleMultiplicity, + CAHitNtupletGeneratorKernelsGPU::HitToTuple const *hitToTuple, + cms::cuda::AtomicPairCounter *apc, + GPUCACell const *__restrict__ cells, + uint32_t const *__restrict__ nCells, + gpuPixelDoublets::CellNeighborsVector const *cellNeighbors, + gpuPixelDoublets::CellTracksVector const *cellTracks, + GPUCACell::OuterHitOfCell const *__restrict__ isOuterHitOfCell, + uint32_t nHits, + uint32_t maxNumberOfDoublets, + CAHitNtupletGeneratorKernelsGPU::Counters *counters) { + auto first = threadIdx.x + blockIdx.x * blockDim.x; + + auto &c = *counters; + // counters once per event + if (0 == first) { + atomicAdd(&c.nEvents, 1); + atomicAdd(&c.nHits, nHits); + atomicAdd(&c.nCells, *nCells); + atomicAdd(&c.nTuples, apc->get().m); + atomicAdd(&c.nFitTracks, tupleMultiplicity->size()); + } + +#ifdef NTUPLE_DEBUG + if (0 == first) { + printf("number of found cells %d, found tuples %d with total hits %d out of %d\n", + *nCells, + apc->get().m, + apc->get().n, + nHits); + if (apc->get().m < caConstants::maxNumberOfQuadruplets()) { + assert(foundNtuplets->size(apc->get().m) == 0); + assert(foundNtuplets->size() == apc->get().n); + } + } + + for (int idx = first, nt = foundNtuplets->nbins(); idx < nt; idx += gridDim.x * blockDim.x) { + if (foundNtuplets->size(idx) > 5) + printf("ERROR %d, %d\n", idx, foundNtuplets->size(idx)); + assert(foundNtuplets->size(idx) < 6); + for (auto ih = foundNtuplets->begin(idx); ih != foundNtuplets->end(idx); ++ih) + assert(*ih < nHits); + } +#endif + + if (0 == first) { + if (apc->get().m >= caConstants::maxNumberOfQuadruplets) + printf("Tuples overflow\n"); + if (*nCells >= maxNumberOfDoublets) + printf("Cells overflow\n"); + if (cellNeighbors && cellNeighbors->full()) + printf("cellNeighbors overflow\n"); + if (cellTracks && cellTracks->full()) + printf("cellTracks overflow\n"); + } + + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto const &thisCell = cells[idx]; + if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId]; + printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId()); + if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId]; + printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId()); + if (thisCell.isKilled()) + atomicAdd(&c.nKilledCells, 1); + if (thisCell.unused()) + atomicAdd(&c.nEmptyCells, 1); + if (0 == hitToTuple->size(thisCell.inner_hit_id()) && 0 == hitToTuple->size(thisCell.outer_hit_id())) + atomicAdd(&c.nZeroTrackCells, 1); + } + + for (int idx = first, nt = nHits; idx < nt; idx += gridDim.x * blockDim.x) { + if (isOuterHitOfCell[idx].full()) // ++tooManyOuterHitOfCell; + printf("OuterHitOfCell overflow %d\n", idx); + } +} + +__global__ void kernel_fishboneCleaner(GPUCACell const *cells, uint32_t const *__restrict__ nCells, Quality *quality) { + constexpr auto bad = pixelTrack::Quality::bad; + + auto first = threadIdx.x + blockIdx.x * blockDim.x; + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto const &thisCell = cells[idx]; + if (!thisCell.isKilled()) + continue; + + for (auto it : thisCell.tracks()) + quality[it] = bad; + } +} + +__global__ void kernel_earlyDuplicateRemover(GPUCACell const *cells, + uint32_t const *__restrict__ nCells, + HitContainer *foundNtuplets, + Quality *quality) { + // constexpr auto bad = trackQuality::bad; + constexpr auto dup = pixelTrack::Quality::dup; + // constexpr auto loose = trackQuality::loose; + + assert(nCells); + auto first = threadIdx.x + blockIdx.x * blockDim.x; + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto const &thisCell = cells[idx]; + + if (thisCell.tracks().size() < 2) + continue; + //if (0==thisCell.theUsed) continue; + // if (thisCell.theDoubletId < 0) continue; + + uint32_t maxNh = 0; + + // find maxNh + for (auto it : thisCell.tracks()) { + auto nh = foundNtuplets->size(it); + maxNh = std::max(nh, maxNh); + } + + for (auto it : thisCell.tracks()) { + if (foundNtuplets->size(it) != maxNh) + quality[it] = dup; //no race: simple assignment of the same constant + } + } +} + +__global__ void kernel_fastDuplicateRemover(GPUCACell const *__restrict__ cells, + uint32_t const *__restrict__ nCells, + HitContainer const *__restrict__ foundNtuplets, + TkSoA *__restrict__ tracks) { + constexpr auto bad = pixelTrack::Quality::bad; + constexpr auto dup = pixelTrack::Quality::dup; + constexpr auto loose = pixelTrack::Quality::loose; + + assert(nCells); + + auto first = threadIdx.x + blockIdx.x * blockDim.x; + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto const &thisCell = cells[idx]; + if (thisCell.tracks().size() < 2) + continue; + // if (thisCell.theDoubletId < 0) continue; + + float mc = 10000.f; + uint16_t im = 60000; + + auto score = [&](auto it) { + return std::abs(tracks->tip(it)); // tip + // return tracks->chi2(it); //chi2 + }; + + // find min score + for (auto it : thisCell.tracks()) { + if (tracks->quality(it) == loose && score(it) < mc) { + mc = score(it); + im = it; + } + } + // mark all other duplicates + for (auto it : thisCell.tracks()) { + if (tracks->quality(it) != bad && it != im) + tracks->quality(it) = dup; //no race: simple assignment of the same constant + } + } +} + +__global__ void kernel_connect(cms::cuda::AtomicPairCounter *apc1, + cms::cuda::AtomicPairCounter *apc2, // just to zero them, + GPUCACell::Hits const *__restrict__ hhp, + GPUCACell *cells, + uint32_t const *__restrict__ nCells, + gpuPixelDoublets::CellNeighborsVector *cellNeighbors, + GPUCACell::OuterHitOfCell const *__restrict__ isOuterHitOfCell, + float hardCurvCut, + float ptmin, + float CAThetaCutBarrel, + float CAThetaCutForward, + float dcaCutInnerTriplet, + float dcaCutOuterTriplet) { + auto const &hh = *hhp; + + auto firstCellIndex = threadIdx.y + blockIdx.y * blockDim.y; + auto first = threadIdx.x; + auto stride = blockDim.x; + + if (0 == (firstCellIndex + first)) { + (*apc1) = 0; + (*apc2) = 0; + } // ready for next kernel + + for (int idx = firstCellIndex, nt = (*nCells); idx < nt; idx += gridDim.y * blockDim.y) { + auto cellIndex = idx; + auto &thisCell = cells[idx]; + auto innerHitId = thisCell.inner_hit_id(); + int numberOfPossibleNeighbors = isOuterHitOfCell[innerHitId].size(); + auto vi = isOuterHitOfCell[innerHitId].data(); + + auto ri = thisCell.inner_r(hh); + auto zi = thisCell.inner_z(hh); + + auto ro = thisCell.outer_r(hh); + auto zo = thisCell.outer_z(hh); + auto isBarrel = thisCell.inner_detIndex(hh) < caConstants::last_barrel_detIndex; + + for (int j = first; j < numberOfPossibleNeighbors; j += stride) { + auto otherCell = __ldg(vi + j); + auto &oc = cells[otherCell]; + auto r1 = oc.inner_r(hh); + auto z1 = oc.inner_z(hh); + bool aligned = GPUCACell::areAlignedRZ( + r1, + z1, + ri, + zi, + ro, + zo, + ptmin, + isBarrel ? CAThetaCutBarrel : CAThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts + if (aligned && thisCell.dcaCut(hh, + oc, + oc.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet + : dcaCutOuterTriplet, + hardCurvCut)) { // FIXME tune cuts + oc.addOuterNeighbor(cellIndex, *cellNeighbors); + thisCell.setUsedBit(1); + oc.setUsedBit(1); + } + } // loop on inner cells + } // loop on outer cells +} + +__global__ void kernel_find_ntuplets(GPUCACell::Hits const *__restrict__ hhp, + GPUCACell *__restrict__ cells, + uint32_t const *nCells, + gpuPixelDoublets::CellTracksVector *cellTracks, + HitContainer *foundNtuplets, + cms::cuda::AtomicPairCounter *apc, + Quality *__restrict__ quality, + unsigned int minHitsPerNtuplet) { + // recursive: not obvious to widen + auto const &hh = *hhp; + + auto first = threadIdx.x + blockIdx.x * blockDim.x; + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto const &thisCell = cells[idx]; + if (thisCell.isKilled()) + continue; // cut by earlyFishbone + + auto pid = thisCell.layerPairId(); + auto doit = minHitsPerNtuplet > 3 ? pid < 3 : pid < 8 || pid > 12; + if (doit) { + GPUCACell::TmpTuple stack; + stack.reset(); + thisCell.find_ntuplets(hh, cells, *cellTracks, *foundNtuplets, *apc, quality, stack, minHitsPerNtuplet, pid < 3); + assert(stack.empty()); + // printf("in %d found quadruplets: %d\n", cellIndex, apc->get()); + } + } +} + +__global__ void kernel_mark_used(GPUCACell::Hits const *__restrict__ hhp, + GPUCACell *__restrict__ cells, + uint32_t const *nCells) { + auto first = threadIdx.x + blockIdx.x * blockDim.x; + for (int idx = first, nt = (*nCells); idx < nt; idx += gridDim.x * blockDim.x) { + auto &thisCell = cells[idx]; + if (!thisCell.tracks().empty()) + thisCell.setUsedBit(2); + } +} + +__global__ void kernel_countMultiplicity(HitContainer const *__restrict__ foundNtuplets, + Quality const *__restrict__ quality, + caConstants::TupleMultiplicity *tupleMultiplicity) { + auto first = blockIdx.x * blockDim.x + threadIdx.x; + for (int it = first, nt = foundNtuplets->nbins(); it < nt; it += gridDim.x * blockDim.x) { + auto nhits = foundNtuplets->size(it); + if (nhits < 3) + continue; + if (quality[it] == pixelTrack::Quality::dup) + continue; + assert(quality[it] == pixelTrack::Quality::bad); + if (nhits > 5) + printf("wrong mult %d %d\n", it, nhits); + assert(nhits < 8); + tupleMultiplicity->countDirect(nhits); + } +} + +__global__ void kernel_fillMultiplicity(HitContainer const *__restrict__ foundNtuplets, + Quality const *__restrict__ quality, + caConstants::TupleMultiplicity *tupleMultiplicity) { + auto first = blockIdx.x * blockDim.x + threadIdx.x; + for (int it = first, nt = foundNtuplets->nbins(); it < nt; it += gridDim.x * blockDim.x) { + auto nhits = foundNtuplets->size(it); + if (nhits < 3) + continue; + if (quality[it] == pixelTrack::Quality::dup) + continue; + assert(quality[it] == pixelTrack::Quality::bad); + if (nhits > 5) + printf("wrong mult %d %d\n", it, nhits); + assert(nhits < 8); + tupleMultiplicity->fillDirect(nhits, it); + } +} + +__global__ void kernel_classifyTracks(HitContainer const *__restrict__ tuples, + TkSoA const *__restrict__ tracks, + CAHitNtupletGeneratorKernelsGPU::QualityCuts cuts, + Quality *__restrict__ quality) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int it = first, nt = tuples->nbins(); it < nt; it += gridDim.x * blockDim.x) { + auto nhits = tuples->size(it); + if (nhits == 0) + break; // guard + + // if duplicate: not even fit + if (quality[it] == pixelTrack::Quality::dup) + continue; + + assert(quality[it] == pixelTrack::Quality::bad); + + // mark doublets as bad + if (nhits < 3) + continue; + + // if the fit has any invalid parameters, mark it as bad + bool isNaN = false; + for (int i = 0; i < 5; ++i) { + isNaN |= std::isnan(tracks->stateAtBS.state(it)(i)); + } + if (isNaN) { +#ifdef NTUPLE_DEBUG + printf("NaN in fit %d size %d chi2 %f\n", it, tuples->size(it), tracks->chi2(it)); +#endif + continue; + } + + // compute a pT-dependent chi2 cut + // default parameters: + // - chi2MaxPt = 10 GeV + // - chi2Coeff = { 0.68177776, 0.74609577, -0.08035491, 0.00315399 } + // - chi2Scale = 30 for broken line fit, 45 for Riemann fit + // (see CAHitNtupletGeneratorGPU.cc) + float pt = std::min(tracks->pt(it), cuts.chi2MaxPt); + float chi2Cut = cuts.chi2Scale * + (cuts.chi2Coeff[0] + pt * (cuts.chi2Coeff[1] + pt * (cuts.chi2Coeff[2] + pt * cuts.chi2Coeff[3]))); + // above number were for Quads not normalized so for the time being just multiple by ndof for Quads (triplets to be understood) + if (3.f * tracks->chi2(it) >= chi2Cut) { +#ifdef NTUPLE_DEBUG + printf("Bad fit %d size %d pt %f eta %f chi2 %f\n", + it, + tuples->size(it), + tracks->pt(it), + tracks->eta(it), + 3.f * tracks->chi2(it)); +#endif + continue; + } + + // impose "region cuts" based on the fit results (phi, Tip, pt, cotan(theta)), Zip) + // default cuts: + // - for triplets: |Tip| < 0.3 cm, pT > 0.5 GeV, |Zip| < 12.0 cm + // - for quadruplets: |Tip| < 0.5 cm, pT > 0.3 GeV, |Zip| < 12.0 cm + // (see CAHitNtupletGeneratorGPU.cc) + auto const ®ion = (nhits > 3) ? cuts.quadruplet : cuts.triplet; + bool isOk = (std::abs(tracks->tip(it)) < region.maxTip) and (tracks->pt(it) > region.minPt) and + (std::abs(tracks->zip(it)) < region.maxZip); + + if (isOk) + quality[it] = pixelTrack::Quality::loose; + } +} + +__global__ void kernel_doStatsForTracks(HitContainer const *__restrict__ tuples, + Quality const *__restrict__ quality, + CAHitNtupletGeneratorKernelsGPU::Counters *counters) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int idx = first, ntot = tuples->nbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + if (tuples->size(idx) == 0) + break; //guard + if (quality[idx] != pixelTrack::Quality::loose) + continue; + atomicAdd(&(counters->nGoodTracks), 1); + } +} + +__global__ void kernel_countHitInTracks(HitContainer const *__restrict__ tuples, + Quality const *__restrict__ quality, + CAHitNtupletGeneratorKernelsGPU::HitToTuple *hitToTuple) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int idx = first, ntot = tuples->nbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + if (tuples->size(idx) == 0) + break; // guard + if (quality[idx] != pixelTrack::Quality::loose) + continue; + for (auto h = tuples->begin(idx); h != tuples->end(idx); ++h) + hitToTuple->countDirect(*h); + } +} + +__global__ void kernel_fillHitInTracks(HitContainer const *__restrict__ tuples, + Quality const *__restrict__ quality, + CAHitNtupletGeneratorKernelsGPU::HitToTuple *hitToTuple) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int idx = first, ntot = tuples->nbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + if (tuples->size(idx) == 0) + break; // guard + if (quality[idx] != pixelTrack::Quality::loose) + continue; + for (auto h = tuples->begin(idx); h != tuples->end(idx); ++h) + hitToTuple->fillDirect(*h, idx); + } +} + +__global__ void kernel_fillHitDetIndices(HitContainer const *__restrict__ tuples, + TrackingRecHit2DSOAView const *__restrict__ hhp, + HitContainer *__restrict__ hitDetIndices) { + int first = blockDim.x * blockIdx.x + threadIdx.x; + // copy offsets + for (int idx = first, ntot = tuples->totbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + hitDetIndices->off[idx] = tuples->off[idx]; + } + // fill hit indices + auto const &hh = *hhp; + auto nhits = hh.nHits(); + for (int idx = first, ntot = tuples->size(); idx < ntot; idx += gridDim.x * blockDim.x) { + assert(tuples->bins[idx] < nhits); + hitDetIndices->bins[idx] = hh.detectorIndex(tuples->bins[idx]); + } +} + +__global__ void kernel_doStatsForHitInTracks(CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ hitToTuple, + CAHitNtupletGeneratorKernelsGPU::Counters *counters) { + auto &c = *counters; + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int idx = first, ntot = hitToTuple->nbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + if (hitToTuple->size(idx) == 0) + continue; // SHALL NOT BE break + atomicAdd(&c.nUsedHits, 1); + if (hitToTuple->size(idx) > 1) + atomicAdd(&c.nDupHits, 1); + } +} + +__global__ void kernel_tripletCleaner(TrackingRecHit2DSOAView const *__restrict__ hhp, + HitContainer const *__restrict__ ptuples, + TkSoA const *__restrict__ ptracks, + Quality *__restrict__ quality, + CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ phitToTuple) { + constexpr auto bad = pixelTrack::Quality::bad; + constexpr auto dup = pixelTrack::Quality::dup; + // constexpr auto loose = trackQuality::loose; + + auto &hitToTuple = *phitToTuple; + auto const &foundNtuplets = *ptuples; + auto const &tracks = *ptracks; + + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int idx = first, ntot = hitToTuple.nbins(); idx < ntot; idx += gridDim.x * blockDim.x) { + if (hitToTuple.size(idx) < 2) + continue; + + float mc = 10000.f; + uint16_t im = 60000; + uint32_t maxNh = 0; + + // find maxNh + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + uint32_t nh = foundNtuplets.size(*it); + maxNh = std::max(nh, maxNh); + } + // kill all tracks shorter than maxHn (only triplets???) + for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { + uint32_t nh = foundNtuplets.size(*it); + if (maxNh != nh) + quality[*it] = dup; + } + + if (maxNh > 3) + continue; + // for triplets choose best tip! + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (quality[it] != bad && std::abs(tracks.tip(it)) < mc) { + mc = std::abs(tracks.tip(it)); + im = it; + } + } + // mark duplicates + for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { + auto const it = *ip; + if (quality[it] != bad && it != im) + quality[it] = dup; //no race: simple assignment of the same constant + } + } // loop over hits +} + +__global__ void kernel_print_found_ntuplets(TrackingRecHit2DSOAView const *__restrict__ hhp, + HitContainer const *__restrict__ ptuples, + TkSoA const *__restrict__ ptracks, + Quality const *__restrict__ quality, + CAHitNtupletGeneratorKernelsGPU::HitToTuple const *__restrict__ phitToTuple, + uint32_t maxPrint, + int iev) { + auto const &foundNtuplets = *ptuples; + auto const &tracks = *ptracks; + int first = blockDim.x * blockIdx.x + threadIdx.x; + for (int i = first, np = std::min(maxPrint, foundNtuplets.nbins()); i < np; i += blockDim.x * gridDim.x) { + auto nh = foundNtuplets.size(i); + if (nh < 3) + continue; + printf("TK: %d %d %d %f %f %f %f %f %f %f %d %d %d %d %d\n", + 10000 * iev + i, + int(quality[i]), + nh, + tracks.charge(i), + tracks.pt(i), + tracks.eta(i), + tracks.phi(i), + tracks.tip(i), + tracks.zip(i), + // asinhf(fit_results[i].par(3)), + tracks.chi2(i), + *foundNtuplets.begin(i), + *(foundNtuplets.begin(i) + 1), + *(foundNtuplets.begin(i) + 2), + nh > 3 ? int(*(foundNtuplets.begin(i) + 3)) : -1, + nh > 4 ? int(*(foundNtuplets.begin(i) + 4)) : -1); + } +} + +__global__ void kernel_printCounters(cAHitNtupletGenerator::Counters const *counters) { + auto const &c = *counters; + printf( + "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nGoodTracks | nUsedHits | nDupHits | " + "nKilledCells | " + "nEmptyCells | nZeroTrackCells ||\n"); + printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + c.nEvents, + c.nHits, + c.nCells, + c.nTuples, + c.nGoodTracks, + c.nFitTracks, + c.nUsedHits, + c.nDupHits, + c.nKilledCells, + c.nEmptyCells, + c.nZeroTrackCells); + printf("Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f||\n", + c.nEvents, + c.nHits / double(c.nEvents), + c.nCells / double(c.nEvents), + c.nTuples / double(c.nEvents), + c.nFitTracks / double(c.nEvents), + c.nGoodTracks / double(c.nEvents), + c.nUsedHits / double(c.nEvents), + c.nDupHits / double(c.nEvents), + c.nKilledCells / double(c.nEvents), + c.nEmptyCells / double(c.nCells), + c.nZeroTrackCells / double(c.nCells)); +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc new file mode 100644 index 0000000000000..c2c7c2b869752 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc @@ -0,0 +1,229 @@ +// +// Original Author: Felice Pantaleo, CERN +// + +#include +#include +#include +#include + +#include "DataFormats/Common/interface/Handle.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/EDMException.h" +#include "FWCore/Utilities/interface/isFinite.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "TrackingTools/DetLayers/interface/BarrelDetLayer.h" + +#include "CAHitNtupletGeneratorOnGPU.h" + +namespace { + + template + T sqr(T x) { + return x * x; + } + + cAHitNtupletGenerator::QualityCuts makeQualityCuts(edm::ParameterSet const& pset) { + auto coeff = pset.getParameter>("chi2Coeff"); + if (coeff.size() != 4) { + throw edm::Exception(edm::errors::Configuration, + "CAHitNtupletGeneratorOnGPU.trackQualityCuts.chi2Coeff must have 4 elements"); + } + return cAHitNtupletGenerator::QualityCuts{// polynomial coefficients for the pT-dependent chi2 cut + {(float)coeff[0], (float)coeff[1], (float)coeff[2], (float)coeff[3]}, + // max pT used to determine the chi2 cut + (float)pset.getParameter("chi2MaxPt"), + // chi2 scale factor: 30 for broken line fit, 45 for Riemann fit + (float)pset.getParameter("chi2Scale"), + // regional cuts for triplets + {(float)pset.getParameter("tripletMaxTip"), + (float)pset.getParameter("tripletMinPt"), + (float)pset.getParameter("tripletMaxZip")}, + // regional cuts for quadruplets + {(float)pset.getParameter("quadrupletMaxTip"), + (float)pset.getParameter("quadrupletMinPt"), + (float)pset.getParameter("quadrupletMaxZip")}}; + } + +} // namespace + +using namespace std; + +CAHitNtupletGeneratorOnGPU::CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector& iC) + : m_params(cfg.getParameter("onGPU"), + cfg.getParameter("minHitsPerNtuplet"), + cfg.getParameter("maxNumberOfDoublets"), + cfg.getParameter("useRiemannFit"), + cfg.getParameter("fit5as4"), + cfg.getParameter("includeJumpingForwardDoublets"), + cfg.getParameter("earlyFishbone"), + cfg.getParameter("lateFishbone"), + cfg.getParameter("idealConditions"), + cfg.getParameter("fillStatistics"), + cfg.getParameter("doClusterCut"), + cfg.getParameter("doZ0Cut"), + cfg.getParameter("doPtCut"), + cfg.getParameter("ptmin"), + cfg.getParameter("CAThetaCutBarrel"), + cfg.getParameter("CAThetaCutForward"), + cfg.getParameter("hardCurvCut"), + cfg.getParameter("dcaCutInnerTriplet"), + cfg.getParameter("dcaCutOuterTriplet"), + makeQualityCuts(cfg.getParameterSet("trackQualityCuts"))) { +#ifdef DUMP_GPU_TK_TUPLES + printf("TK: %s %s % %s %s %s %s %s %s %s %s %s %s %s %s %s\n", + "tid", + "qual", + "nh", + "charge", + "pt", + "eta", + "phi", + "tip", + "zip", + "chi2", + "h1", + "h2", + "h3", + "h4", + "h5"); +#endif + + if (m_params.onGPU_) { + // allocate pinned host memory only if CUDA is available + edm::Service cs; + if (cs and cs->enabled()) { + cudaCheck(cudaMalloc(&m_counters, sizeof(Counters))); + cudaCheck(cudaMemset(m_counters, 0, sizeof(Counters))); + } + } else { + m_counters = new Counters(); + memset(m_counters, 0, sizeof(Counters)); + } +} + +CAHitNtupletGeneratorOnGPU::~CAHitNtupletGeneratorOnGPU() { + if (m_params.onGPU_) { + // print the gpu statistics and free pinned host memory only if CUDA is available + edm::Service cs; + if (cs and cs->enabled()) { + if (m_params.doStats_) { + // crash on multi-gpu processes + CAHitNtupletGeneratorKernelsGPU::printCounters(m_counters); + } + cudaFree(m_counters); + } + } else { + if (m_params.doStats_) { + CAHitNtupletGeneratorKernelsCPU::printCounters(m_counters); + } + delete m_counters; + } +} + +void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& desc) { + // 87 cm/GeV = 1/(3.8T * 0.3) + // take less than radius given by the hardPtCut and reject everything below + // auto hardCurvCut = 1.f/(0.35 * 87.f); + desc.add("ptmin", 0.9f)->setComment("Cut on minimum pt"); + desc.add("CAThetaCutBarrel", 0.002f)->setComment("Cut on RZ alignement for Barrel"); + desc.add("CAThetaCutForward", 0.003f)->setComment("Cut on RZ alignment for Forward"); + desc.add("hardCurvCut", 1.f / (0.35 * 87.f))->setComment("Cut on minimum curvature"); + desc.add("dcaCutInnerTriplet", 0.15f)->setComment("Cut on origin radius when the inner hit is on BPix1"); + desc.add("dcaCutOuterTriplet", 0.25f)->setComment("Cut on origin radius when the outer hit is on BPix1"); + desc.add("earlyFishbone", true); + desc.add("lateFishbone", false); + desc.add("idealConditions", true); + desc.add("fillStatistics", false); + desc.add("minHitsPerNtuplet", 4); + desc.add("maxNumberOfDoublets", caConstants::maxNumberOfDoublets); + desc.add("includeJumpingForwardDoublets", false); + desc.add("fit5as4", true); + desc.add("doClusterCut", true); + desc.add("doZ0Cut", true); + desc.add("doPtCut", true); + desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); + + edm::ParameterSetDescription trackQualityCuts; + trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); + trackQualityCuts.add>("chi2Coeff", {0.68177776, 0.74609577, -0.08035491, 0.00315399}) + ->setComment("Polynomial coefficients to derive the pT-dependent chi2 cut"); + trackQualityCuts.add("chi2Scale", 30.) + ->setComment( + "Factor to multiply the pT-dependent chi2 cut (currently: 30 for the broken line fit, 45 for the Riemann " + "fit)"); + trackQualityCuts.add("tripletMinPt", 0.5)->setComment("Min pT for triplets, in GeV"); + trackQualityCuts.add("tripletMaxTip", 0.3)->setComment("Max |Tip| for triplets, in cm"); + trackQualityCuts.add("tripletMaxZip", 12.)->setComment("Max |Zip| for triplets, in cm"); + trackQualityCuts.add("quadrupletMinPt", 0.3)->setComment("Min pT for quadruplets, in GeV"); + trackQualityCuts.add("quadrupletMaxTip", 0.5)->setComment("Max |Tip| for quadruplets, in cm"); + trackQualityCuts.add("quadrupletMaxZip", 12.)->setComment("Max |Zip| for quadruplets, in cm"); + desc.add("trackQualityCuts", trackQualityCuts) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply a pT-dependent chi2 cut;\n - apply \"region " + "cuts\" based on the fit results (pT, Tip, Zip)."); +} + +PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DCUDA const& hits_d, + float bfield, + cudaStream_t stream) const { + PixelTrackHeterogeneous tracks(cms::cuda::make_device_unique(stream)); + + auto* soa = tracks.get(); + + CAHitNtupletGeneratorKernelsGPU kernels(m_params); + kernels.setCounters(m_counters); + + kernels.allocateOnGPU(stream); + + kernels.buildDoublets(hits_d, stream); + kernels.launchKernels(hits_d, soa, stream); + kernels.fillHitDetIndices(hits_d.view(), soa, stream); // in principle needed only if Hits not "available" + + HelixFitOnGPU fitter(bfield, m_params.fit5as4_); + fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa); + if (m_params.useRiemannFit_) { + fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); + } else { + fitter.launchBrokenLineKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream); + } + kernels.classifyTuples(hits_d, soa, stream); + + return tracks; +} + +PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DCPU const& hits_d, float bfield) const { + PixelTrackHeterogeneous tracks(std::make_unique()); + + auto* soa = tracks.get(); + assert(soa); + + CAHitNtupletGeneratorKernelsCPU kernels(m_params); + kernels.setCounters(m_counters); + kernels.allocateOnGPU(nullptr); + + kernels.buildDoublets(hits_d, nullptr); + kernels.launchKernels(hits_d, soa, nullptr); + kernels.fillHitDetIndices(hits_d.view(), soa, nullptr); // in principle needed only if Hits not "available" + + if (0 == hits_d.nHits()) + return tracks; + + // now fit + HelixFitOnGPU fitter(bfield, m_params.fit5as4_); + fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa); + + if (m_params.useRiemannFit_) { + fitter.launchRiemannKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); + } else { + fitter.launchBrokenLineKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets); + } + + kernels.classifyTuples(hits_d, soa, nullptr); + + return tracks; +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.h new file mode 100644 index 0000000000000..564a870f54796 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.h @@ -0,0 +1,65 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h +#define RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h + +#include +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" + +#include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" + +#include "CAHitNtupletGeneratorKernels.h" +#include "HelixFitOnGPU.h" + +#include "GPUCACell.h" + +namespace edm { + class Event; + class EventSetup; + class ParameterSetDescription; +} // namespace edm + +class CAHitNtupletGeneratorOnGPU { +public: + using HitsOnGPU = TrackingRecHit2DSOAView; + using HitsOnCPU = TrackingRecHit2DCUDA; + using hindex_type = TrackingRecHit2DSOAView::hindex_type; + + using Quality = pixelTrack::Quality; + using OutputSoA = pixelTrack::TrackSoA; + using HitContainer = pixelTrack::HitContainer; + using Tuple = HitContainer; + + using QualityCuts = cAHitNtupletGenerator::QualityCuts; + using Params = cAHitNtupletGenerator::Params; + using Counters = cAHitNtupletGenerator::Counters; + +public: + CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector&& iC) + : CAHitNtupletGeneratorOnGPU(cfg, iC) {} + CAHitNtupletGeneratorOnGPU(const edm::ParameterSet& cfg, edm::ConsumesCollector& iC); + + ~CAHitNtupletGeneratorOnGPU(); + + static void fillDescriptions(edm::ParameterSetDescription& desc); + static const char* fillDescriptionsLabel() { return "caHitNtupletOnGPU"; } + + PixelTrackHeterogeneous makeTuplesAsync(TrackingRecHit2DGPU const& hits_d, float bfield, cudaStream_t stream) const; + + PixelTrackHeterogeneous makeTuples(TrackingRecHit2DCPU const& hits_d, float bfield) const; + +private: + void buildDoublets(HitsOnCPU const& hh, cudaStream_t stream) const; + + void hitNtuplets(HitsOnCPU const& hh, const edm::EventSetup& es, bool useRiemannFit, cudaStream_t cudaStream); + + void launchKernels(HitsOnCPU const& hh, bool useRiemannFit, cudaStream_t cudaStream) const; + + Params m_params; + + Counters* m_counters = nullptr; +}; + +#endif // RecoPixelVertexing_PixelTriplets_plugins_CAHitNtupletGeneratorOnGPU_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h b/RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h new file mode 100644 index 0000000000000..0fd514e26d223 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h @@ -0,0 +1,347 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h +#define RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h + +// +// Author: Felice Pantaleo, CERN +// + +// #define ONLY_TRIPLETS_IN_HOLE + +#include + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" +#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h" +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "CAConstants.h" + +class GPUCACell { +public: + using PtrAsInt = unsigned long long; + + static constexpr auto maxCellsPerHit = caConstants::maxCellsPerHit; + using OuterHitOfCell = caConstants::OuterHitOfCell; + using CellNeighbors = caConstants::CellNeighbors; + using CellTracks = caConstants::CellTracks; + using CellNeighborsVector = caConstants::CellNeighborsVector; + using CellTracksVector = caConstants::CellTracksVector; + + using Hits = TrackingRecHit2DSOAView; + using hindex_type = Hits::hindex_type; + + using TmpTuple = cms::cuda::VecArray; + + using HitContainer = pixelTrack::HitContainer; + using Quality = pixelTrack::Quality; + static constexpr auto bad = pixelTrack::Quality::bad; + + GPUCACell() = default; + + __device__ __forceinline__ void init(CellNeighborsVector& cellNeighbors, + CellTracksVector& cellTracks, + Hits const& hh, + int layerPairId, + int doubletId, + hindex_type innerHitId, + hindex_type outerHitId) { + theInnerHitId = innerHitId; + theOuterHitId = outerHitId; + theDoubletId_ = doubletId; + theLayerPairId_ = layerPairId; + theUsed_ = 0; + + // optimization that depends on access pattern + theInnerZ = hh.zGlobal(innerHitId); + theInnerR = hh.rGlobal(innerHitId); + + // link to default empty + theOuterNeighbors = &cellNeighbors[0]; + theTracks = &cellTracks[0]; + assert(outerNeighbors().empty()); + assert(tracks().empty()); + } + + __device__ __forceinline__ int addOuterNeighbor(CellNeighbors::value_t t, CellNeighborsVector& cellNeighbors) { + // use smart cache + if (outerNeighbors().empty()) { + auto i = cellNeighbors.extend(); // maybe wasted.... + if (i > 0) { + cellNeighbors[i].reset(); +#ifdef __CUDACC__ + auto zero = (PtrAsInt)(&cellNeighbors[0]); + atomicCAS((PtrAsInt*)(&theOuterNeighbors), + zero, + (PtrAsInt)(&cellNeighbors[i])); // if fails we cannot give "i" back... +#else + theOuterNeighbors = &cellNeighbors[i]; +#endif + } else + return -1; + } + __threadfence(); + return outerNeighbors().push_back(t); + } + + __device__ __forceinline__ int addTrack(CellTracks::value_t t, CellTracksVector& cellTracks) { + if (tracks().empty()) { + auto i = cellTracks.extend(); // maybe wasted.... + if (i > 0) { + cellTracks[i].reset(); +#ifdef __CUDACC__ + auto zero = (PtrAsInt)(&cellTracks[0]); + atomicCAS((PtrAsInt*)(&theTracks), zero, (PtrAsInt)(&cellTracks[i])); // if fails we cannot give "i" back... +#else + theTracks = &cellTracks[i]; +#endif + } else + return -1; + } + __threadfence(); + return tracks().push_back(t); + } + + __device__ __forceinline__ CellTracks& tracks() { return *theTracks; } + __device__ __forceinline__ CellTracks const& tracks() const { return *theTracks; } + __device__ __forceinline__ CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } + __device__ __forceinline__ CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } + __device__ __forceinline__ float inner_x(Hits const& hh) const { return hh.xGlobal(theInnerHitId); } + __device__ __forceinline__ float outer_x(Hits const& hh) const { return hh.xGlobal(theOuterHitId); } + __device__ __forceinline__ float inner_y(Hits const& hh) const { return hh.yGlobal(theInnerHitId); } + __device__ __forceinline__ float outer_y(Hits const& hh) const { return hh.yGlobal(theOuterHitId); } + __device__ __forceinline__ float inner_z(Hits const& hh) const { return theInnerZ; } + // { return hh.zGlobal(theInnerHitId); } // { return theInnerZ; } + __device__ __forceinline__ float outer_z(Hits const& hh) const { return hh.zGlobal(theOuterHitId); } + __device__ __forceinline__ float inner_r(Hits const& hh) const { return theInnerR; } + // { return hh.rGlobal(theInnerHitId); } // { return theInnerR; } + __device__ __forceinline__ float outer_r(Hits const& hh) const { return hh.rGlobal(theOuterHitId); } + + __device__ __forceinline__ auto inner_iphi(Hits const& hh) const { return hh.iphi(theInnerHitId); } + __device__ __forceinline__ auto outer_iphi(Hits const& hh) const { return hh.iphi(theOuterHitId); } + + __device__ __forceinline__ float inner_detIndex(Hits const& hh) const { return hh.detectorIndex(theInnerHitId); } + __device__ __forceinline__ float outer_detIndex(Hits const& hh) const { return hh.detectorIndex(theOuterHitId); } + + constexpr unsigned int inner_hit_id() const { return theInnerHitId; } + constexpr unsigned int outer_hit_id() const { return theOuterHitId; } + + __device__ void print_cell() const { + printf("printing cell: %d, on layerPair: %d, innerHitId: %d, outerHitId: %d \n", + theDoubletId_, + theLayerPairId_, + theInnerHitId, + theOuterHitId); + } + + __device__ bool check_alignment(Hits const& hh, + GPUCACell const& otherCell, + const float ptmin, + const float hardCurvCut, + const float caThetaCutBarrel, + const float caThetaCutForward, + const float dcaCutInnerTriplet, + const float dcaCutOuterTriplet) const { + // detIndex of the layerStart for the Phase1 Pixel Detector: + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + auto ri = inner_r(hh); + auto zi = inner_z(hh); + + auto ro = outer_r(hh); + auto zo = outer_z(hh); + + auto r1 = otherCell.inner_r(hh); + auto z1 = otherCell.inner_z(hh); + auto isBarrel = otherCell.outer_detIndex(hh) < caConstants::last_barrel_detIndex; + bool aligned = areAlignedRZ(r1, + z1, + ri, + zi, + ro, + zo, + ptmin, + isBarrel ? caThetaCutBarrel : caThetaCutForward); // 2.f*thetaCut); // FIXME tune cuts + return (aligned && dcaCut(hh, + otherCell, + otherCell.inner_detIndex(hh) < caConstants::last_bpix1_detIndex ? dcaCutInnerTriplet + : dcaCutOuterTriplet, + hardCurvCut)); // FIXME tune cuts + } + + __device__ __forceinline__ static bool areAlignedRZ( + float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { + float radius_diff = std::abs(r1 - ro); + float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); + + float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by + // radius_diff later + + float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri)); + return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff; + } + + __device__ inline bool dcaCut(Hits const& hh, + GPUCACell const& otherCell, + const float region_origin_radius_plus_tolerance, + const float maxCurv) const { + auto x1 = otherCell.inner_x(hh); + auto y1 = otherCell.inner_y(hh); + + auto x2 = inner_x(hh); + auto y2 = inner_y(hh); + + auto x3 = outer_x(hh); + auto y3 = outer_y(hh); + + CircleEq eq(x1, y1, x2, y2, x3, y3); + + if (eq.curvature() > maxCurv) + return false; + + return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); + } + + __device__ __forceinline__ static bool dcaCutH(float x1, + float y1, + float x2, + float y2, + float x3, + float y3, + const float region_origin_radius_plus_tolerance, + const float maxCurv) { + CircleEq eq(x1, y1, x2, y2, x3, y3); + + if (eq.curvature() > maxCurv) + return false; + + return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); + } + + __device__ inline bool hole0(Hits const& hh, GPUCACell const& innerCell) const { + using caConstants::first_ladder_bpx0; + using caConstants::max_ladder_bpx0; + using caConstants::module_length_bpx0; + using caConstants::module_tolerance_bpx0; + int p = innerCell.inner_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx0 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx0; + auto il = first_ladder_bpx0 + p; + auto r0 = hh.averageGeometry().ladderR[il]; + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = outer_r(hh); + auto zo = outer_z(hh); + auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]); + auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0); + auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0); + return gap; + } + + __device__ inline bool hole4(Hits const& hh, GPUCACell const& innerCell) const { + using caConstants::first_ladder_bpx4; + using caConstants::max_ladder_bpx4; + using caConstants::module_length_bpx4; + using caConstants::module_tolerance_bpx4; + int p = outer_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx4 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx4; + auto il = first_ladder_bpx4 + p; + auto r4 = hh.averageGeometry().ladderR[il]; + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = outer_r(hh); + auto zo = outer_z(hh); + auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]); + auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4); + auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4); + auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0]; + auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1]; + return gap || holeP || holeN; + } + + // trying to free the track building process from hardcoded layers, leaving + // the visit of the graph based on the neighborhood connections between cells. + __device__ inline void find_ntuplets(Hits const& hh, + GPUCACell* __restrict__ cells, + CellTracksVector& cellTracks, + HitContainer& foundNtuplets, + cms::cuda::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet, + bool startAt0) const { + // the building process for a track ends if: + // it has no right neighbor + // it has no compatible neighbor + // the ntuplets is then saved if the number of hits it contains is greater + // than a threshold + + tmpNtuplet.push_back_unsafe(theDoubletId_); + assert(tmpNtuplet.size() <= 4); + + bool last = true; + for (unsigned int otherCell : outerNeighbors()) { + if (cells[otherCell].theDoubletId_ < 0) + continue; // killed by earlyFishbone + last = false; + cells[otherCell].find_ntuplets( + hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); + } + if (last) { // if long enough save... + if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { +#ifdef ONLY_TRIPLETS_IN_HOLE + // triplets accepted only pointing to the hole + if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || + ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) +#endif + { + hindex_type hits[6]; + auto nh = 0U; + for (auto c : tmpNtuplet) { + hits[nh++] = cells[c].theInnerHitId; + } + hits[nh] = theOuterHitId; + auto it = foundNtuplets.bulkFill(apc, hits, tmpNtuplet.size() + 1); + if (it >= 0) { // if negative is overflow.... + for (auto c : tmpNtuplet) + cells[c].addTrack(it, cellTracks); + quality[it] = bad; // initialize to bad + } + } + } + } + tmpNtuplet.pop_back(); + assert(tmpNtuplet.size() < 4); + } + + // Cell status management + __device__ __forceinline__ void kill() { theDoubletId_ = -1; } + __device__ __forceinline__ bool isKilled() const { return theDoubletId_ < 0; } + + __device__ __forceinline__ int16_t layerPairId() const { return theLayerPairId_; } + + __device__ __forceinline__ bool unused() const { return !theUsed_; } + __device__ __forceinline__ void setUsedBit(uint16_t bit) { theUsed_ |= bit; } + +private: + CellNeighbors* theOuterNeighbors; + CellTracks* theTracks; + + int32_t theDoubletId_; + int16_t theLayerPairId_; + uint16_t theUsed_; // tbd + + float theInnerZ; + float theInnerR; + hindex_type theInnerHitId; + hindex_type theOuterHitId; +}; + +#endif // RecoPixelVertexing_PixelTriplets_plugins_GPUCACell_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.cc new file mode 100644 index 0000000000000..880bdb47dfb5c --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.cc @@ -0,0 +1,16 @@ +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HelixFitOnGPU.h" + +void HelixFitOnGPU::allocateOnGPU(Tuples const *tuples, + TupleMultiplicity const *tupleMultiplicity, + OutputSoA *helix_fit_results) { + tuples_ = tuples; + tupleMultiplicity_ = tupleMultiplicity; + outputSoa_ = helix_fit_results; + + assert(tuples_); + assert(tupleMultiplicity_); + assert(outputSoa_); +} + +void HelixFitOnGPU::deallocateOnGPU() {} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h new file mode 100644 index 0000000000000..938994840f8c0 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/HelixFitOnGPU.h @@ -0,0 +1,68 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_HelixFitOnGPU_h +#define RecoPixelVertexing_PixelTriplets_plugins_HelixFitOnGPU_h + +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitResult.h" + +#include "CAConstants.h" + +namespace riemannFit { + // in case of memory issue can be made smaller + constexpr uint32_t maxNumberOfConcurrentFits = caConstants::maxNumberOfTuples; + constexpr uint32_t stride = maxNumberOfConcurrentFits; + using Matrix3x4d = Eigen::Matrix; + using Map3x4d = Eigen::Map >; + using Matrix6x4f = Eigen::Matrix; + using Map6x4f = Eigen::Map >; + + // hits + template + using Matrix3xNd = Eigen::Matrix; + template + using Map3xNd = Eigen::Map, 0, Eigen::Stride<3 * stride, stride> >; + // errors + template + using Matrix6xNf = Eigen::Matrix; + template + using Map6xNf = Eigen::Map, 0, Eigen::Stride<6 * stride, stride> >; + // fast fit + using Map4d = Eigen::Map >; + +} // namespace riemannFit + +class HelixFitOnGPU { +public: + using HitsView = TrackingRecHit2DSOAView; + + using Tuples = pixelTrack::HitContainer; + using OutputSoA = pixelTrack::TrackSoA; + + using TupleMultiplicity = caConstants::TupleMultiplicity; + + explicit HelixFitOnGPU(float bf, bool fit5as4) : bField_(bf), fit5as4_(fit5as4) {} + ~HelixFitOnGPU() { deallocateOnGPU(); } + + void setBField(double bField) { bField_ = bField; } + void launchRiemannKernels(HitsView const *hv, uint32_t nhits, uint32_t maxNumberOfTuples, cudaStream_t cudaStream); + void launchBrokenLineKernels(HitsView const *hv, uint32_t nhits, uint32_t maxNumberOfTuples, cudaStream_t cudaStream); + + void launchRiemannKernelsOnCPU(HitsView const *hv, uint32_t nhits, uint32_t maxNumberOfTuples); + void launchBrokenLineKernelsOnCPU(HitsView const *hv, uint32_t nhits, uint32_t maxNumberOfTuples); + + void allocateOnGPU(Tuples const *tuples, TupleMultiplicity const *tupleMultiplicity, OutputSoA *outputSoA); + void deallocateOnGPU(); + +private: + static constexpr uint32_t maxNumberOfConcurrentFits_ = riemannFit::maxNumberOfConcurrentFits; + + // fowarded + Tuples const *tuples_ = nullptr; + TupleMultiplicity const *tupleMultiplicity_ = nullptr; + OutputSoA *outputSoa_; + float bField_; + + const bool fit5as4_; +}; + +#endif // RecoPixelVertexing_PixelTriplets_plugins_HelixFitOnGPU_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cc new file mode 100644 index 0000000000000..491dd0df2004f --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cc @@ -0,0 +1,113 @@ +#include "RiemannFitOnGPU.h" + +void HelixFitOnGPU::launchRiemannKernelsOnCPU(HitsView const *hv, uint32_t nhits, uint32_t maxNumberOfTuples) { + assert(tuples_); + + // Fit internals + auto hitsGPU = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double)); + auto hits_geGPU = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float)); + auto fast_fit_resultsGPU = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double)); + auto circle_fit_resultsGPU_holder = + std::make_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit)); + riemannFit::CircleFit *circle_fit_resultsGPU = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get()); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // triplets + kernel_FastFit<3>( + tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + + kernel_CircleFit<3>(tupleMultiplicity_, + 3, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + kernel_LineFit<3>(tupleMultiplicity_, + 3, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + // quads + kernel_FastFit<4>( + tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + + kernel_CircleFit<4>(tupleMultiplicity_, + 4, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + kernel_LineFit<4>(tupleMultiplicity_, + 4, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + if (fit5as4_) { + // penta + kernel_FastFit<4>( + tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + + kernel_CircleFit<4>(tupleMultiplicity_, + 5, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + kernel_LineFit<4>(tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + } else { + // penta all 5 + kernel_FastFit<5>( + tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + + kernel_CircleFit<5>(tupleMultiplicity_, + 5, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + + kernel_LineFit<5>(tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU, + offset); + } + } +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu new file mode 100644 index 0000000000000..90af2ac13730b --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu @@ -0,0 +1,131 @@ +#include "RiemannFitOnGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" + +void HelixFitOnGPU::launchRiemannKernels(HitsView const *hv, + uint32_t nhits, + uint32_t maxNumberOfTuples, + cudaStream_t stream) { + assert(tuples_); + + auto blockSize = 64; + auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; + + // Fit internals + auto hitsGPU = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<4>) / sizeof(double), stream); + auto hits_geGPU = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6x4f) / sizeof(float), stream); + auto fast_fit_resultsGPU = cms::cuda::make_device_unique( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), stream); + auto circle_fit_resultsGPU_holder = + cms::cuda::make_device_unique(maxNumberOfConcurrentFits_ * sizeof(riemannFit::CircleFit), stream); + riemannFit::CircleFit *circle_fit_resultsGPU_ = (riemannFit::CircleFit *)(circle_fit_resultsGPU_holder.get()); + + for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { + // triplets + kernel_FastFit<3><<>>( + tuples_, tupleMultiplicity_, 3, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + cudaCheck(cudaGetLastError()); + + kernel_CircleFit<3><<>>(tupleMultiplicity_, + 3, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + kernel_LineFit<3><<>>(tupleMultiplicity_, + 3, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + // quads + kernel_FastFit<4><<>>( + tuples_, tupleMultiplicity_, 4, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + cudaCheck(cudaGetLastError()); + + kernel_CircleFit<4><<>>(tupleMultiplicity_, + 4, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + kernel_LineFit<4><<>>(tupleMultiplicity_, + 4, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + if (fit5as4_) { + // penta + kernel_FastFit<4><<>>( + tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + cudaCheck(cudaGetLastError()); + + kernel_CircleFit<4><<>>(tupleMultiplicity_, + 5, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + kernel_LineFit<4><<>>(tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + } else { + // penta all 5 + kernel_FastFit<5><<>>( + tuples_, tupleMultiplicity_, 5, hv, hitsGPU.get(), hits_geGPU.get(), fast_fit_resultsGPU.get(), offset); + cudaCheck(cudaGetLastError()); + + kernel_CircleFit<5><<>>(tupleMultiplicity_, + 5, + bField_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + + kernel_LineFit<5><<>>(tupleMultiplicity_, + 5, + bField_, + outputSoa_, + hitsGPU.get(), + hits_geGPU.get(), + fast_fit_resultsGPU.get(), + circle_fit_resultsGPU_, + offset); + cudaCheck(cudaGetLastError()); + } + } +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.h b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.h new file mode 100644 index 0000000000000..5b661bc3be028 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.h @@ -0,0 +1,187 @@ +// +// Author: Felice Pantaleo, CERN +// + +#include + +#include + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/RiemannFit.h" + +#include "HelixFitOnGPU.h" + +using HitsOnGPU = TrackingRecHit2DSOAView; +using Tuples = pixelTrack::HitContainer; +using OutputSoA = pixelTrack::TrackSoA; + +template +__global__ void kernel_FastFit(Tuples const *__restrict__ foundNtuplets, + caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + HitsOnGPU const *__restrict__ hhp, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit, + uint32_t offset) { + constexpr uint32_t hitsInFit = N; + + assert(hitsInFit <= nHits); + + assert(pfast_fit); + assert(foundNtuplets); + assert(tupleMultiplicity); + + // look in bin for this hit multiplicity + auto local_start = blockIdx.x * blockDim.x + threadIdx.x; + +#ifdef RIEMANN_DEBUG + if (0 == local_start) + printf("%d Ntuple of size %d for %d hits to fit\n", tupleMultiplicity->size(nHits), nHits, hitsInFit); +#endif + + for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; + local_idx += gridDim.x * blockDim.x) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it from the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + assert(tkid < foundNtuplets->nbins()); + + assert(foundNtuplets->size(tkid) == nHits); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + // Prepare data structure + auto const *hitId = foundNtuplets->begin(tkid); + for (unsigned int i = 0; i < hitsInFit; ++i) { + auto hit = hitId[i]; + // printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]); + float ge[6]; + hhp->cpeParams() + .detParams(hhp->detectorIndex(hit)) + .frame.toGlobal(hhp->xerrLocal(hit), 0, hhp->yerrLocal(hit), ge); + // printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]); + + hits.col(i) << hhp->xGlobal(hit), hhp->yGlobal(hit), hhp->zGlobal(hit); + hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; + } + riemannFit::fastFit(hits, fast_fit); + + // no NaN here.... + assert(fast_fit(0) == fast_fit(0)); + assert(fast_fit(1) == fast_fit(1)); + assert(fast_fit(2) == fast_fit(2)); + assert(fast_fit(3) == fast_fit(3)); + } +} + +template +__global__ void kernel_CircleFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + double bField, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit_input, + riemannFit::CircleFit *circle_fit, + uint32_t offset) { + assert(circle_fit); + assert(N <= nHits); + + // same as above... + + // look in bin for this hit multiplicity + auto local_start = blockIdx.x * blockDim.x + threadIdx.x; + for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; + local_idx += gridDim.x * blockDim.x) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + riemannFit::VectorNd rad = (hits.block(0, 0, 2, N).colwise().norm()); + + riemannFit::Matrix2Nd hits_cov = riemannFit::Matrix2Nd::Zero(); + riemannFit::loadCovariance2D(hits_ge, hits_cov); + + circle_fit[local_idx] = riemannFit::circleFit(hits.block(0, 0, 2, N), hits_cov, fast_fit, rad, bField, true); + +#ifdef RIEMANN_DEBUG +// auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); +// printf("kernelCircleFit circle.par(0,1,2): %d %f,%f,%f\n", tkid, +// circle_fit[local_idx].par(0), circle_fit[local_idx].par(1), circle_fit[local_idx].par(2)); +#endif + } +} + +template +__global__ void kernel_LineFit(caConstants::TupleMultiplicity const *__restrict__ tupleMultiplicity, + uint32_t nHits, + double bField, + OutputSoA *results, + double *__restrict__ phits, + float *__restrict__ phits_ge, + double *__restrict__ pfast_fit_input, + riemannFit::CircleFit *__restrict__ circle_fit, + uint32_t offset) { + assert(results); + assert(circle_fit); + assert(N <= nHits); + + // same as above... + + // look in bin for this hit multiplicity + auto local_start = (blockIdx.x * blockDim.x + threadIdx.x); + for (int local_idx = local_start, nt = riemannFit::maxNumberOfConcurrentFits; local_idx < nt; + local_idx += gridDim.x * blockDim.x) { + auto tuple_idx = local_idx + offset; + if (tuple_idx >= tupleMultiplicity->size(nHits)) + break; + + // get it for the ntuple container (one to one to helix) + auto tkid = *(tupleMultiplicity->begin(nHits) + tuple_idx); + + riemannFit::Map3xNd hits(phits + local_idx); + riemannFit::Map4d fast_fit(pfast_fit_input + local_idx); + riemannFit::Map6xNf hits_ge(phits_ge + local_idx); + + auto const &line_fit = riemannFit::lineFit(hits, hits_ge, circle_fit[local_idx], fast_fit, bField, true); + + riemannFit::fromCircleToPerigee(circle_fit[local_idx]); + + results->stateAtBS.copyFromCircle( + circle_fit[local_idx].par, circle_fit[local_idx].cov, line_fit.par, line_fit.cov, 1.f / float(bField), tkid); + results->pt(tkid) = bField / std::abs(circle_fit[local_idx].par(2)); + results->eta(tkid) = asinhf(line_fit.par(0)); + results->chi2(tkid) = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5); + +#ifdef RIEMANN_DEBUG + printf("kernelLineFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", + N, + nHits, + tkid, + circle_fit[local_idx].par(0), + circle_fit[local_idx].par(1), + circle_fit[local_idx].par(2)); + printf("kernelLineFit line.par(0,1): %d %f,%f\n", tkid, line_fit.par(0), line_fit.par(1)); + printf("kernelLineFit chi2 cov %f/%f %e,%e,%e,%e,%e\n", + circle_fit[local_idx].chi2, + line_fit.chi2, + circle_fit[local_idx].cov(0, 0), + circle_fit[local_idx].cov(1, 1), + circle_fit[local_idx].cov(2, 2), + line_fit.cov(0, 0), + line_fit.cov(1, 1)); +#endif + } +} diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h new file mode 100644 index 0000000000000..09cd5c18e65ae --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/gpuFishbone.h @@ -0,0 +1,91 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuFishbone_h +#define RecoPixelVertexing_PixelTriplets_plugins_gpuFishbone_h + +#include +#include +#include +#include +#include + +#include "DataFormats/Math/interface/approx_atan2.h" +#include "Geometry/TrackerGeometryBuilder/interface/phase1PixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + +#include "GPUCACell.h" + +namespace gpuPixelDoublets { + + // __device__ + // __forceinline__ + __global__ void fishbone(GPUCACell::Hits const* __restrict__ hhp, + GPUCACell* cells, + uint32_t const* __restrict__ nCells, + GPUCACell::OuterHitOfCell const* __restrict__ isOuterHitOfCell, + uint32_t nHits, + bool checkTrack) { + constexpr auto maxCellsPerHit = GPUCACell::maxCellsPerHit; + + auto const& hh = *hhp; + + // x run faster... + auto firstY = threadIdx.y + blockIdx.y * blockDim.y; + auto firstX = threadIdx.x; + + float x[maxCellsPerHit], y[maxCellsPerHit], z[maxCellsPerHit], n[maxCellsPerHit]; + uint16_t d[maxCellsPerHit]; // uint8_t l[maxCellsPerHit]; + uint32_t cc[maxCellsPerHit]; + + for (int idy = firstY, nt = nHits; idy < nt; idy += gridDim.y * blockDim.y) { + auto const& vc = isOuterHitOfCell[idy]; + auto size = vc.size(); + if (size < 2) + continue; + // if alligned kill one of the two. + // in principle one could try to relax the cut (only in r-z?) for jumping-doublets + auto const& c0 = cells[vc[0]]; + auto xo = c0.outer_x(hh); + auto yo = c0.outer_y(hh); + auto zo = c0.outer_z(hh); + auto sg = 0; + for (int32_t ic = 0; ic < size; ++ic) { + auto& ci = cells[vc[ic]]; + if (ci.unused()) + continue; // for triplets equivalent to next + if (checkTrack && ci.tracks().empty()) + continue; + cc[sg] = vc[ic]; + d[sg] = ci.inner_detIndex(hh); + x[sg] = ci.inner_x(hh) - xo; + y[sg] = ci.inner_y(hh) - yo; + z[sg] = ci.inner_z(hh) - zo; + n[sg] = x[sg] * x[sg] + y[sg] * y[sg] + z[sg] * z[sg]; + ++sg; + } + if (sg < 2) + continue; + // here we parallelize + for (int32_t ic = firstX; ic < sg - 1; ic += blockDim.x) { + auto& ci = cells[cc[ic]]; + for (auto jc = ic + 1; jc < sg; ++jc) { + auto& cj = cells[cc[jc]]; + // must be different detectors (in the same layer) + // if (d[ic]==d[jc]) continue; + // || l[ic]!=l[jc]) continue; + auto cos12 = x[ic] * x[jc] + y[ic] * y[jc] + z[ic] * z[jc]; + if (d[ic] != d[jc] && cos12 * cos12 >= 0.99999f * n[ic] * n[jc]) { + // alligned: kill farthest (prefer consecutive layers) + if (n[ic] > n[jc]) { + ci.kill(); + break; + } else { + cj.kill(); + } + } + } //cj + } // ci + } // hits + } +} // namespace gpuPixelDoublets + +#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuFishbone_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h new file mode 100644 index 0000000000000..4e2e241e92605 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoublets.h @@ -0,0 +1,132 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoublets_h +#define RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoublets_h + +#include "RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h" + +#define CONSTANT_VAR __constant__ + +namespace gpuPixelDoublets { + + constexpr int nPairsForQuadruplets = 13; // quadruplets require hits in all layers + constexpr int nPairsForTriplets = nPairsForQuadruplets + 2; // include barrel "jumping" layer pairs + constexpr int nPairs = nPairsForTriplets + 4; // include forward "jumping" layer pairs + static_assert(nPairs <= caConstants::maxNumberOfLayerPairs); + + // start constants + // clang-format off + + CONSTANT_VAR const uint8_t layerPairs[2 * nPairs] = { + 0, 1, 0, 4, 0, 7, // BPIX1 (3) + 1, 2, 1, 4, 1, 7, // BPIX2 (6) + 4, 5, 7, 8, // FPIX1 (8) + 2, 3, 2, 4, 2, 7, 5, 6, 8, 9, // BPIX3 & FPIX2 (13) + 0, 2, 1, 3, // Jumping Barrel (15) + 0, 5, 0, 8, // Jumping Forward (BPIX1,FPIX2) + 4, 6, 7, 9 // Jumping Forward (19) + }; + + constexpr int16_t phi0p05 = 522; // round(521.52189...) = phi2short(0.05); + constexpr int16_t phi0p06 = 626; // round(625.82270...) = phi2short(0.06); + constexpr int16_t phi0p07 = 730; // round(730.12648...) = phi2short(0.07); + + CONSTANT_VAR const int16_t phicuts[nPairs]{phi0p05, + phi0p07, + phi0p07, + phi0p05, + phi0p06, + phi0p06, + phi0p05, + phi0p05, + phi0p06, + phi0p06, + phi0p06, + phi0p05, + phi0p05, + phi0p05, + phi0p05, + phi0p05, + phi0p05, + phi0p05, + phi0p05}; + // phi0p07, phi0p07, phi0p06,phi0p06, phi0p06,phi0p06}; // relaxed cuts + + CONSTANT_VAR float const minz[nPairs] = { + -20., 0., -30., -22., 10., -30., -70., -70., -22., 15., -30, -70., -70., -20., -22., 0, -30., -70., -70.}; + CONSTANT_VAR float const maxz[nPairs] = { + 20., 30., 0., 22., 30., -10., 70., 70., 22., 30., -15., 70., 70., 20., 22., 30., 0., 70., 70.}; + CONSTANT_VAR float const maxr[nPairs] = { + 20., 9., 9., 20., 7., 7., 5., 5., 20., 6., 6., 5., 5., 20., 20., 9., 9., 9., 9.}; + + // end constants + // clang-format on + + using CellNeighbors = caConstants::CellNeighbors; + using CellTracks = caConstants::CellTracks; + using CellNeighborsVector = caConstants::CellNeighborsVector; + using CellTracksVector = caConstants::CellTracksVector; + + __global__ void initDoublets(GPUCACell::OuterHitOfCell* isOuterHitOfCell, + int nHits, + CellNeighborsVector* cellNeighbors, + CellNeighbors* cellNeighborsContainer, + CellTracksVector* cellTracks, + CellTracks* cellTracksContainer) { + assert(isOuterHitOfCell); + int first = blockIdx.x * blockDim.x + threadIdx.x; + for (int i = first; i < nHits; i += gridDim.x * blockDim.x) + isOuterHitOfCell[i].reset(); + + if (0 == first) { + cellNeighbors->construct(caConstants::maxNumOfActiveDoublets, cellNeighborsContainer); + cellTracks->construct(caConstants::maxNumOfActiveDoublets, cellTracksContainer); + auto i = cellNeighbors->extend(); + assert(0 == i); + (*cellNeighbors)[0].reset(); + i = cellTracks->extend(); + assert(0 == i); + (*cellTracks)[0].reset(); + } + } + + constexpr auto getDoubletsFromHistoMaxBlockSize = 64; // for both x and y + constexpr auto getDoubletsFromHistoMinBlocksPerMP = 16; + + __global__ +#ifdef __CUDACC__ + __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) +#endif + void getDoubletsFromHisto(GPUCACell* cells, + uint32_t* nCells, + CellNeighborsVector* cellNeighbors, + CellTracksVector* cellTracks, + TrackingRecHit2DSOAView const* __restrict__ hhp, + GPUCACell::OuterHitOfCell* isOuterHitOfCell, + int nActualPairs, + bool ideal_cond, + bool doClusterCut, + bool doZ0Cut, + bool doPtCut, + uint32_t maxNumOfDoublets) { + auto const& __restrict__ hh = *hhp; + doubletsFromHisto(layerPairs, + nActualPairs, + cells, + nCells, + cellNeighbors, + cellTracks, + hh, + isOuterHitOfCell, + phicuts, + minz, + maxz, + maxr, + ideal_cond, + doClusterCut, + doZ0Cut, + doPtCut, + maxNumOfDoublets); + } + +} // namespace gpuPixelDoublets + +#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoublets_h diff --git a/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h new file mode 100644 index 0000000000000..a12dee0785b36 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/plugins/gpuPixelDoubletsAlgos.h @@ -0,0 +1,243 @@ +#ifndef RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h +#define RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h + +#include +#include +#include +#include +#include + +#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "HeterogeneousCore/CUDAUtilities/interface/VecArray.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" + +#include "CAConstants.h" +#include "GPUCACell.h" + +namespace gpuPixelDoublets { + + using CellNeighbors = caConstants::CellNeighbors; + using CellTracks = caConstants::CellTracks; + using CellNeighborsVector = caConstants::CellNeighborsVector; + using CellTracksVector = caConstants::CellTracksVector; + + __device__ __forceinline__ void doubletsFromHisto(uint8_t const* __restrict__ layerPairs, + uint32_t nPairs, + GPUCACell* cells, + uint32_t* nCells, + CellNeighborsVector* cellNeighbors, + CellTracksVector* cellTracks, + TrackingRecHit2DSOAView const& __restrict__ hh, + GPUCACell::OuterHitOfCell* isOuterHitOfCell, + int16_t const* __restrict__ phicuts, + float const* __restrict__ minz, + float const* __restrict__ maxz, + float const* __restrict__ maxr, + bool ideal_cond, + bool doClusterCut, + bool doZ0Cut, + bool doPtCut, + uint32_t maxNumOfDoublets) { + // ysize cuts (z in the barrel) times 8 + // these are used if doClusterCut is true + constexpr int minYsizeB1 = 36; + constexpr int minYsizeB2 = 28; + constexpr int maxDYsize12 = 28; + constexpr int maxDYsize = 20; + constexpr int maxDYPred = 20; + constexpr float dzdrFact = 8 * 0.0285 / 0.015; // from dz/dr to "DY" + + bool isOuterLadder = ideal_cond; + + using PhiBinner = TrackingRecHit2DSOAView::PhiBinner; + + auto const& __restrict__ phiBinner = hh.phiBinner(); + uint32_t const* __restrict__ offsets = hh.hitsLayerStart(); + assert(offsets); + + auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; + + // nPairsMax to be optimized later (originally was 64). + // If it should be much bigger, consider using a block-wide parallel prefix scan, + // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html + const int nPairsMax = caConstants::maxNumberOfLayerPairs; + assert(nPairs <= nPairsMax); + __shared__ uint32_t innerLayerCumulativeSize[nPairsMax]; + __shared__ uint32_t ntot; + if (threadIdx.y == 0 && threadIdx.x == 0) { + innerLayerCumulativeSize[0] = layerSize(layerPairs[0]); + for (uint32_t i = 1; i < nPairs; ++i) { + innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(layerPairs[2 * i]); + } + ntot = innerLayerCumulativeSize[nPairs - 1]; + } + __syncthreads(); + + // x runs faster + auto idy = blockIdx.y * blockDim.y + threadIdx.y; + auto first = threadIdx.x; + auto stride = blockDim.x; + + uint32_t pairLayerId = 0; // cannot go backward + for (auto j = idy; j < ntot; j += blockDim.y * gridDim.y) { + while (j >= innerLayerCumulativeSize[pairLayerId++]) + ; + --pairLayerId; // move to lower_bound ?? + + assert(pairLayerId < nPairs); + assert(j < innerLayerCumulativeSize[pairLayerId]); + assert(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); + + uint8_t inner = layerPairs[2 * pairLayerId]; + uint8_t outer = layerPairs[2 * pairLayerId + 1]; + assert(outer > inner); + + auto hoff = PhiBinner::histOff(outer); + + auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; + i += offsets[inner]; + + // printf("Hit in Layer %d %d %d %d\n", i, inner, pairLayerId, j); + + assert(i >= offsets[inner]); + assert(i < offsets[inner + 1]); + + // found hit corresponding to our cuda thread, now do the job + auto mi = hh.detectorIndex(i); + if (mi > gpuClustering::maxNumModules) + continue; // invalid + + /* maybe clever, not effective when zoCut is on + auto bpos = (mi%8)/4; // if barrel is 1 for z>0 + auto fpos = (outer>3) & (outer<7); + if ( ((inner<3) & (outer>3)) && bpos!=fpos) continue; + */ + + auto mez = hh.zGlobal(i); + + if (mez < minz[pairLayerId] || mez > maxz[pairLayerId]) + continue; + + int16_t mes = -1; // make compiler happy + if (doClusterCut) { + // if ideal treat inner ladder as outer + if (inner == 0) + assert(mi < 96); + isOuterLadder = ideal_cond ? true : 0 == (mi / 8) % 2; // only for B1/B2/B3 B4 is opposite, FPIX:noclue... + + // in any case we always test mes>0 ... + mes = inner > 0 || isOuterLadder ? hh.clusterSizeY(i) : -1; + + if (inner == 0 && outer > 3) // B1 and F1 + if (mes > 0 && mes < minYsizeB1) + continue; // only long cluster (5*8) + if (inner == 1 && outer > 3) // B2 and F1 + if (mes > 0 && mes < minYsizeB2) + continue; + } + auto mep = hh.iphi(i); + auto mer = hh.rGlobal(i); + + // all cuts: true if fails + constexpr float z0cut = 12.f; // cm + constexpr float hardPtCut = 0.5f; // GeV + // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field) + constexpr float minRadius = hardPtCut * 87.78f; + constexpr float minRadius2T4 = 4.f * minRadius * minRadius; + auto ptcut = [&](int j, int16_t idphi) { + auto r2t4 = minRadius2T4; + auto ri = mer; + auto ro = hh.rGlobal(j); + auto dphi = short2phi(idphi); + return dphi * dphi * (r2t4 - ri * ro) > (ro - ri) * (ro - ri); + }; + auto z0cutoff = [&](int j) { + auto zo = hh.zGlobal(j); + auto ro = hh.rGlobal(j); + auto dr = ro - mer; + return dr > maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr; + }; + + auto zsizeCut = [&](int j) { + auto onlyBarrel = outer < 4; + auto so = hh.clusterSizeY(j); + auto dy = inner == 0 ? maxDYsize12 : maxDYsize; + // in the barrel cut on difference in size + // in the endcap on the prediction on the first layer (actually in the barrel only: happen to be safe for endcap as well) + // FIXME move pred cut to z0cutoff to optmize loading of and computaiton ... + auto zo = hh.zGlobal(j); + auto ro = hh.rGlobal(j); + return onlyBarrel ? mes > 0 && so > 0 && std::abs(so - mes) > dy + : (inner < 4) && mes > 0 && + std::abs(mes - int(std::abs((mez - zo) / (mer - ro)) * dzdrFact + 0.5f)) > maxDYPred; + }; + + auto iphicut = phicuts[pairLayerId]; + + auto kl = PhiBinner::bin(int16_t(mep - iphicut)); + auto kh = PhiBinner::bin(int16_t(mep + iphicut)); + auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); }; + +#ifdef GPU_DEBUG + int tot = 0; + int nmin = 0; + int tooMany = 0; +#endif + + auto khh = kh; + incr(khh); + for (auto kk = kl; kk != khh; incr(kk)) { +#ifdef GPU_DEBUG + if (kk != kl && kk != kh) + nmin += phiBinner.size(kk + hoff); +#endif + auto const* __restrict__ p = phiBinner.begin(kk + hoff); + auto const* __restrict__ e = phiBinner.end(kk + hoff); + p += first; + for (; p < e; p += stride) { + auto oi = __ldg(p); + assert(oi >= offsets[outer]); + assert(oi < offsets[outer + 1]); + auto mo = hh.detectorIndex(oi); + if (mo > gpuClustering::maxNumModules) + continue; // invalid + + if (doZ0Cut && z0cutoff(oi)) + continue; + + auto mop = hh.iphi(oi); + uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop))); + if (idphi > iphicut) + continue; + + if (doClusterCut && zsizeCut(oi)) + continue; + if (doPtCut && ptcut(oi, idphi)) + continue; + + auto ind = atomicAdd(nCells, 1); + if (ind >= maxNumOfDoublets) { + atomicSub(nCells, 1); + break; + } // move to SimpleVector?? + // int layerPairId, int doubletId, int innerHitId, int outerHitId) + cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, ind, i, oi); + isOuterHitOfCell[oi].push_back(ind); +#ifdef GPU_DEBUG + if (isOuterHitOfCell[oi].full()) + ++tooMany; + ++tot; +#endif + } + } +#ifdef GPU_DEBUG + if (tooMany > 0) + printf("OuterHitOfCell full for %d in layer %d/%d, %d,%d %d\n", i, inner, outer, nmin, tot, tooMany); +#endif + } // loop in block... + } + +} // namespace gpuPixelDoublets + +#endif // RecoPixelVertexing_PixelTriplets_plugins_gpuPixelDoubletsAlgos_h diff --git a/RecoPixelVertexing/PixelTriplets/python/caHitQuadrupletEDProducer_cfi.py b/RecoPixelVertexing/PixelTriplets/python/caHitQuadrupletEDProducer_cfi.py new file mode 100644 index 0000000000000..c72c07ae5a721 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/python/caHitQuadrupletEDProducer_cfi.py @@ -0,0 +1,4 @@ +import FWCore.ParameterSet.Config as cms +from RecoPixelVertexing.PixelTriplets.caHitQuadrupletDefaultEDProducer_cfi import caHitQuadrupletDefaultEDProducer as _caHitQuadrupletDefaultEDProducer + +caHitQuadrupletEDProducer = _caHitQuadrupletDefaultEDProducer.clone() diff --git a/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml b/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml index 6d6f1553b32f3..d480d7408b9e2 100644 --- a/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml +++ b/RecoPixelVertexing/PixelTriplets/test/BuildFile.xml @@ -16,3 +16,14 @@ + + + + + + + + + + + diff --git a/RecoPixelVertexing/PixelTriplets/test/CAsizes_t.cpp b/RecoPixelVertexing/PixelTriplets/test/CAsizes_t.cpp new file mode 100644 index 0000000000000..5cf2e6526b860 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/test/CAsizes_t.cpp @@ -0,0 +1,25 @@ +#include "RecoPixelVertexing/PixelTriplets/plugins/GPUCACell.h" + +#include +#include + +template +void print() { + std::cout << "size of " << typeid(T).name() << ' ' << sizeof(T) << std::endl; +} + +int main() { + using namespace caConstants; + + print(); + print(); + print(); + print(); + print(); + print(); + print(); + + print(); + + return 0; +} diff --git a/RecoPixelVertexing/PixelTriplets/test/CircleEq_t.cpp b/RecoPixelVertexing/PixelTriplets/test/CircleEq_t.cpp new file mode 100644 index 0000000000000..504f9c144b284 --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/test/CircleEq_t.cpp @@ -0,0 +1,77 @@ +#include "RecoPixelVertexing/PixelTriplets/interface/CircleEq.h" +#include + +struct OriCircle { + using T = float; + + float radius = 0; + float x_center = 0; + float y_center = 0; + + constexpr OriCircle(T x1, T y1, T x2, T y2, T x3, T y3) { compute(x1, y1, x2, y2, x3, y3); } + + // dca to origin + constexpr T dca0() const { return std::sqrt(x_center * x_center + y_center * y_center) - radius; } + + // dca to given point + constexpr T dca(T x, T y) const { + x -= x_center; + y -= y_center; + return std::sqrt(x * x + y * y) - radius; + } + + constexpr void compute(T x1, T y1, T x2, T y2, T x3, T y3) { + auto det = (x1 - x2) * (y2 - y3) - (x2 - x3) * (y1 - y2); + + auto offset = x2 * x2 + y2 * y2; + + auto bc = (x1 * x1 + y1 * y1 - offset) * 0.5f; + + auto cd = (offset - x3 * x3 - y3 * y3) * 0.5f; + + auto idet = 1.f / det; + + x_center = (bc * (y2 - y3) - cd * (y1 - y2)) * idet; + y_center = (cd * (x1 - x2) - bc * (x2 - x3)) * idet; + + radius = std::sqrt((x2 - x_center) * (x2 - x_center) + (y2 - y_center) * (y2 - y_center)); + } +}; + +#include + +template +bool equal(T a, T b) { + // return float(a-b)==0; + return std::abs(float(a - b)) < std::abs(0.01f * a); +} + +int main() { + float r1 = 4, r2 = 8, r3 = 15; + for (float phi = -3; phi < 3.1; phi += 0.5) { + float x1 = r1 * cos(phi); + float x2 = r2 * cos(phi); + float y1 = r1 * sin(phi); + float y2 = r2 * sin(phi); + for (float phi3 = phi - 0.31; phi3 < phi + 0.31; phi3 += 0.05) { + float x3 = r3 * cos(phi3); + float y3 = r3 * sin(phi3); + + OriCircle ori(x1, y1, x2, y2, x3, y3); + CircleEq eq(x1, y1, x2, y2, x3, y3); + // std::cout << "r " << ori.radius <<' '<< eq.radius() << std::endl; + assert(equal(ori.radius, std::abs(eq.radius()))); + auto c = eq.center(); + auto dir = eq.cosdir(); + assert(equal(1.f, dir.first * dir.first + dir.second * dir.second)); + assert(equal(ori.x_center, c.first)); + assert(equal(ori.y_center, c.second)); + // std::cout << "dca " << ori.dca0() <<' '<< eq.radius()*eq.dca0() << std::endl; + assert(equal(std::abs(ori.dca0()), std::abs(eq.radius() * eq.dca0()))); + // std::cout << "dca " << ori.dca(1.,1.) <<' '<< eq.radius()*eq.dca(1.,1.) << std::endl; + assert(equal(std::abs(ori.dca(1., 1.)), std::abs(eq.radius() * eq.dca(1., 1.)))); + } + } + + return 0; +} diff --git a/RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp b/RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp new file mode 100644 index 0000000000000..8538970a196ff --- /dev/null +++ b/RecoPixelVertexing/PixelTriplets/test/fastDPHI_t.cpp @@ -0,0 +1,165 @@ +// this test documents the derivation of the fast deltaphi used in gpu doublet code.. +// +// +// +#include +#include +#include +#include + +/** +| 1) circle is parameterized as: | +| C*[(X-Xp)**2+(Y-Yp)**2] - 2*alpha*(X-Xp) - 2*beta*(Y-Yp) = 0 | +| Xp,Yp is a point on the track (Yp is at the center of the chamber); | +| C = 1/r0 is the curvature ( sign of C is charge of particle ); | +| alpha & beta are the direction cosines of the radial vector at Xp,Yp | +| i.e. alpha = C*(X0-Xp), | +| beta = C*(Y0-Yp), | +| where center of circle is at X0,Y0. | +| Alpha > 0 | +| Slope dy/dx of tangent at Xp,Yp is -alpha/beta. | +| 2) the z dimension of the helix is parameterized by gamma = dZ/dSperp | +| this is also the tangent of the pitch angle of the helix. | +| with this parameterization, (alpha,beta,gamma) rotate like a vector. | +| 3) For tracks going inward at (Xp,Yp), C, alpha, beta, and gamma change sign| +| +*/ + +template +class FastCircle { +public: + FastCircle() {} + FastCircle(T x1, T y1, T x2, T y2, T x3, T y3) { compute(x1, y1, x2, y2, x3, y3); } + + void compute(T x1, T y1, T x2, T y2, T x3, T y3); + + T m_xp; + T m_yp; + T m_c; + T m_alpha; + T m_beta; +}; + +template +void FastCircle::compute(T x1, T y1, T x2, T y2, T x3, T y3) { + bool flip = std::abs(x3 - x1) > std::abs(y3 - y1); + + auto x1p = x1 - x2; + auto y1p = y1 - y2; + auto d12 = x1p * x1p + y1p * y1p; + auto x3p = x3 - x2; + auto y3p = y3 - y2; + auto d32 = x3p * x3p + y3p * y3p; + + if (flip) { + std::swap(x1p, y1p); + std::swap(x3p, y3p); + } + + auto num = x1p * y3p - y1p * x3p; // num also gives correct sign for CT + auto det = d12 * y3p - d32 * y1p; + if (std::abs(det) == 0) { + // and why we flip???? + } + auto ct = num / det; + auto sn = det > 0 ? T(1.) : T(-1.); + auto st2 = (d12 * x3p - d32 * x1p) / det; + auto seq = T(1.) + st2 * st2; + auto al2 = sn / std::sqrt(seq); + auto be2 = -st2 * al2; + ct *= T(2.) * al2; + + if (flip) { + std::swap(x1p, y1p); + std::swap(al2, be2); + al2 = -al2; + be2 = -be2; + ct = -ct; + } + + m_xp = x1; + m_yp = y1; + m_c = ct; + m_alpha = al2 - ct * x1p; + m_beta = be2 - ct * y1p; +} + +// compute curvature given two points (and origin) +float fastDPHI(float ri, float ro, float dphi) { + /* + x3=0 y1=0 x1=0; + y3=ro + */ + + // auto x2 = ri*dphi; + // auto y2 = ri*(1.f-0.5f*dphi*dphi); + + /* + auto x1p = x1-x2; + auto y1p = y1-y2; + auto d12 = x1p*x1p + y1p*y1p; + auto x3p = x3-x2; + auto y3p = y3-y2; + auto d32 = x3p*x3p + y3p*y3p; + */ + + /* + auto x1p = -x2; + auto y1p = -y2; + auto d12 = ri*ri; + auto x3p = -x2; + auto y3p = ro-y2; + auto d32 = ri*ri + ro*ro - 2.f*ro*y2; + */ + + // auto rat = (ro -2.f*y2); + // auto det = ro - ri - (ro - 2.f*ri -0.5f*ro)*dphi*dphi; + + //auto det2 = (ro-ri)*(ro-ri) -2.*(ro-ri)*(ro - 2.f*ri -0.5f*ro)*dphi*dphi; + // auto seq = det2 + dphi*dphi*(ro-2.f*ri)*(ro-2.f*ri); // *rat2; + // auto seq = (ro-ri)*(ro-ri) + dphi*dphi*ri*ro; + + // and little by little simplifing and removing higher over terms + // we get + auto r2 = (ro - ri) * (ro - ri) / (dphi * dphi) + ri * ro; + + // d2 = (ro-ri)*(ro-ri)/(4.f*r2 -ri*ro); + // return -2.f*dphi/std::sqrt(seq); + + return -1.f / std::sqrt(r2 / 4.f); +} + +#include + +template +bool equal(T a, T b) { + // return float(a-b)==0; + return std::abs(float(a - b)) < std::abs(0.01f * a); +} + +int n = 0; +void go(float ri, float ro, float dphi, bool print = false) { + ++n; + float x3 = 0.f, y3 = ro; + float x2 = ri * sin(dphi); + float y2 = ri * cos(dphi); + + FastCircle c(0, 0, x2, y2, x3, y3); + + auto cc = fastDPHI(ri, ro, dphi); + if (print) + std::cout << c.m_c << ' ' << cc << std::endl; + assert(equal(c.m_c, cc)); +} + +int main() { + go(4., 7., 0.1, true); + + for (float r1 = 2; r1 < 15; r1 += 1) + for (float dr = 0.5; dr < 10; dr += 0.5) + for (float dphi = 0.02; dphi < 0.2; dphi += 0.2) + go(r1, r1 + dr, dphi); + + std::cout << "done " << n << std::endl; + return 0; +}; diff --git a/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py new file mode 100644 index 0000000000000..24774bbda649c --- /dev/null +++ b/RecoTracker/Configuration/python/customizePixelOnlyForProfiling.py @@ -0,0 +1,59 @@ +import FWCore.ParameterSet.Config as cms + +# Customise the Pixel-only reconstruction to run on GPU +# +# Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU. +def customizePixelOnlyForProfilingGPUOnly(process): + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('caHitNtupletCUDA', 'pixelVertexCUDA') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process + + +# Customise the Pixel-only reconstruction to run on GPU, and copy the data to the host +# +# Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU, +# and copy all the products to the host in SoA format. +# +# The same customisation can be also used on the SoA CPU workflow, running up to the +# tracks and vertices on the CPU in SoA format, without conversion to legacy format. +def customizePixelOnlyForProfilingGPUWithHostCopy(process): + + #? process.siPixelRecHitSoAFromLegacy.convertToLegacy = False + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('pixelTrackSoA', 'pixelVertexSoA') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process + + +# Customise the Pixel-only reconstruction to run on GPU, copy the data to the host, +# and convert to legacy format +# +# Run the unpacker, clustering, ntuplets, track fit and vertex reconstruction on GPU; +# copy all the products to the host in SoA format; and convert them to legacy format. +# +# The same customisation can be also used on the CPU workflow, running up to the +# tracks and vertices on the CPU. +def customizePixelOnlyForProfiling(process): + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('pixelTracks', 'pixelVertices') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process diff --git a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml index 4dc0bfafbb439..65c849c69bbdf 100644 --- a/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml +++ b/RecoTracker/TkSeedGenerator/plugins/BuildFile.xml @@ -1,19 +1,15 @@ - + - - - - - - + + @@ -26,14 +22,21 @@ + + + + + + + @@ -44,5 +47,4 @@ - diff --git a/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc b/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc new file mode 100644 index 0000000000000..0e5823fc46c46 --- /dev/null +++ b/RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc @@ -0,0 +1,170 @@ +#include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/GeometrySurface/interface/Plane.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "DataFormats/TrackingRecHit/interface/InvalidTrackingRecHit.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" +#include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" +#include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/PluginManager/interface/ModuleDef.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoPixelVertexing/PixelTrackFitting/interface/FitUtils.h" +#include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" +#include "TrackingTools/MaterialEffects/interface/PropagatorWithMaterial.h" +#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" +#include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" +#include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" + +/* + produces seeds directly from cuda produced tuples +*/ +class SeedProducerFromSoA : public edm::global::EDProducer<> { +public: + explicit SeedProducerFromSoA(const edm::ParameterSet& iConfig); + ~SeedProducerFromSoA() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + // Event data tokens + const edm::EDGetTokenT tBeamSpot_; + const edm::EDGetTokenT tokenTrack_; + // Event setup tokens + const edm::ESGetToken idealMagneticFieldToken_; + const edm::ESGetToken trackerDigiGeometryToken_; + const edm::ESGetToken trackerPropagatorToken_; + int32_t minNumberOfHits_; +}; + +SeedProducerFromSoA::SeedProducerFromSoA(const edm::ParameterSet& iConfig) + : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), + tokenTrack_(consumes(iConfig.getParameter("src"))), + idealMagneticFieldToken_(esConsumes()), + trackerDigiGeometryToken_(esConsumes()), + trackerPropagatorToken_(esConsumes(edm::ESInputTag("PropagatorWithMaterial"))), + minNumberOfHits_(iConfig.getParameter("minNumberOfHits")) + +{ + produces(); +} + +void SeedProducerFromSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + desc.add("src", edm::InputTag("pixelTrackSoA")); + desc.add("minNumberOfHits", 0); + + descriptions.addWithDefaultLabel(desc); +} + +void SeedProducerFromSoA::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // std::cout << "Converting gpu helix to trajectory seed" << std::endl; + auto result = std::make_unique(); + + auto const& fieldESH = iSetup.getHandle(idealMagneticFieldToken_); + auto const& tracker = iSetup.getHandle(trackerDigiGeometryToken_); + auto const& dus = tracker->detUnits(); + + auto const& propagatorHandle = iSetup.getHandle(trackerPropagatorToken_); + const Propagator* propagator = &(*propagatorHandle); + + const auto& bsh = iEvent.get(tBeamSpot_); + // std::cout << "beamspot " << bsh.x0() << ' ' << bsh.y0() << ' ' << bsh.z0() << std::endl; + GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); + + const auto& tsoa = *(iEvent.get(tokenTrack_)); + + auto const* quality = tsoa.qualityData(); + auto const& fit = tsoa.stateAtBS; + auto const& detIndices = tsoa.detIndices; + auto maxTracks = tsoa.stride(); + + int32_t nt = 0; + for (int32_t it = 0; it < maxTracks; ++it) { + auto nHits = tsoa.nHits(it); + if (nHits == 0) + break; // this is a guard: maybe we need to move to nTracks... + + auto q = quality[it]; + if (q != pixelTrack::Quality::loose) + continue; // FIXME + if (nHits < minNumberOfHits_) + continue; + ++nt; + + // fill hits with invalid just to hold the detId + auto b = detIndices.begin(it); + edm::OwnVector hits; + for (int iHit = 0; iHit < nHits; ++iHit) { + auto const* det = dus[*(b + iHit)]; + // FIXME at some point get a proper type ... + hits.push_back(new InvalidTrackingRecHit(*det, TrackingRecHit::bad)); + } + + // mind: this values are respect the beamspot! + + float phi = tsoa.phi(it); + + riemannFit::Vector5d ipar, opar; + riemannFit::Matrix5d icov, ocov; + fit.copyToDense(ipar, icov, it); + riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); + + LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); + AlgebraicSymMatrix55 m; + for (int i = 0; i < 5; ++i) + for (int j = i; j < 5; ++j) + m(i, j) = ocov(i, j); + + float sp = std::sin(phi); + float cp = std::cos(phi); + Surface::RotationType rot(sp, -cp, 0, 0, 0, -1.f, cp, sp, 0); + + Plane impPointPlane(bs, rot); + GlobalTrajectoryParameters gp(impPointPlane.toGlobal(lpar.position()), + impPointPlane.toGlobal(lpar.momentum()), + lpar.charge(), + fieldESH.product()); + + JacobianLocalToCurvilinear jl2c(impPointPlane, lpar, *fieldESH.product()); + + AlgebraicSymMatrix55 mo = ROOT::Math::Similarity(jl2c.jacobian(), m); + + FreeTrajectoryState fts(gp, CurvilinearTrajectoryError(mo)); + + auto const& lastHit = hits.back(); + + TrajectoryStateOnSurface outerState = propagator->propagate(fts, *lastHit.surface()); + + if (!outerState.isValid()) { + edm::LogError("SeedFromGPU") << " was trying to create a seed from:\n" + << fts << "\n propagating to: " << lastHit.geographicalId().rawId(); + continue; + } + + auto const& pTraj = trajectoryStateTransform::persistentState(outerState, lastHit.geographicalId().rawId()); + + result->emplace_back(pTraj, hits, alongMomentum); + } + + iEvent.put(std::move(result)); +} + +DEFINE_FWK_MODULE(SeedProducerFromSoA); diff --git a/Validation/RecoTrack/python/PostProcessorTracker_cfi.py b/Validation/RecoTrack/python/PostProcessorTracker_cfi.py index df73b303d5061..54fa0364fe239 100644 --- a/Validation/RecoTrack/python/PostProcessorTracker_cfi.py +++ b/Validation/RecoTrack/python/PostProcessorTracker_cfi.py @@ -350,9 +350,9 @@ def _addNoFlow(module): postProcessorTrackTrackingOnly = postProcessorTrack.clone() -postProcessorTrackTrackingOnly.subDirs.extend(["Tracking/TrackBHadron/*","Tracking/TrackSeeding/*", "Tracking/PixelTrack/*"]) +postProcessorTrackTrackingOnly.subDirs.extend(["Tracking/TrackBHadron/*", "Tracking/TrackSeeding/*", "Tracking/PixelTrack/*", "Tracking/PixelTrackFromPV/*", "Tracking/PixelTrackFromPVAllTP/*", "Tracking/PixelTrackBHadron/*"]) postProcessorTrackSummaryTrackingOnly = postProcessorTrackSummary.clone() -postProcessorTrackSummaryTrackingOnly.subDirs.extend(["Tracking/TrackBHadron","Tracking/TrackSeeding", "Tracking/PixelTrack"]) +postProcessorTrackSummaryTrackingOnly.subDirs.extend(["Tracking/TrackBHadron", "Tracking/TrackSeeding", "Tracking/PixelTrack", "Tracking/PixelTrackFromPV", "Tracking/PixelTrackFromPVAllTP", "Tracking/PixelTrackBHadron"]) postProcessorTrackSequenceTrackingOnly = cms.Sequence( postProcessorTrackTrackingOnly+ diff --git a/Validation/RecoTrack/python/TrackValidation_cff.py b/Validation/RecoTrack/python/TrackValidation_cff.py index 9882bf2a9ca7f..1fa5484c8bf5c 100644 --- a/Validation/RecoTrack/python/TrackValidation_cff.py +++ b/Validation/RecoTrack/python/TrackValidation_cff.py @@ -974,9 +974,17 @@ def _uniqueFirstLayers(layerList): trackAssociation = "trackingParticlePixelTrackAsssociation" ) +_pixelTracksCustom = dict( + src = "pixelTracks", + vertexTag = "pixelVertices", +) +pixelTracksPt09 = generalTracksPt09.clone(quality = ["undefQuality"], **_pixelTracksCustom) +pixelTracksFromPV = generalTracksFromPV.clone(quality = "undefQuality", **_pixelTracksCustom) +pixelTracksFromPVPt09 = pixelTracksPt09.clone(src = "pixelTracksFromPV") + trackValidatorPixelTrackingOnly = trackValidator.clone( dirName = "Tracking/PixelTrack/", - label = ["pixelTracks"], + label = ["pixelTracks", "pixelTracksPt09"], doResolutionPlotsForLabels = [], trackCollectionForDrCalculation = "pixelTracks", associators = ["trackingParticlePixelTrackAsssociation"], @@ -985,13 +993,54 @@ def _uniqueFirstLayers(layerList): dodEdxPlots = False, cores = cms.InputTag(""), ) +trackValidatorFromPVPixelTrackingOnly = trackValidatorPixelTrackingOnly.clone( + dirName = "Tracking/PixelTrackFromPV/", + label = ["pixelTracksFromPV", "pixelTracksFromPVPt09"], + label_tp_effic = "trackingParticlesSignal", + label_tp_fake = "trackingParticlesSignal", + label_tp_effic_refvector = True, + label_tp_fake_refvector = True, + trackCollectionForDrCalculation = "pixelTracksFromPV", + doPlotsOnlyForTruePV = True, + doPVAssociationPlots = False, + doResolutionPlotsForLabels = ["disabled"], +) +trackValidatorFromPVAllTPPixelTrackingOnly = trackValidatorFromPVPixelTrackingOnly.clone( + dirName = "Tracking/PixelTrackFromPVAllTP/", + label_tp_effic = trackValidatorPixelTrackingOnly.label_tp_effic.value(), + label_tp_fake = trackValidatorPixelTrackingOnly.label_tp_fake.value(), + label_tp_effic_refvector = False, + label_tp_fake_refvector = False, + doSimPlots = False, + doSimTrackPlots = False, +) +trackValidatorBHadronPixelTrackingOnly = trackValidatorPixelTrackingOnly.clone( + dirName = "Tracking/PixelTrackBHadron/", + label_tp_effic = "trackingParticlesBHadron", + label_tp_effic_refvector = True, + doSimPlots = True, + doRecoTrackPlots = False, # Fake rate is defined wrt. all TPs, and that is already included in trackValidator + dodEdxPlots = False, +) tracksValidationTruthPixelTrackingOnly = tracksValidationTruth.copy() tracksValidationTruthPixelTrackingOnly.replace(trackingParticleRecoTrackAsssociation, trackingParticlePixelTrackAsssociation) tracksValidationTruthPixelTrackingOnly.replace(VertexAssociatorByPositionAndTracks, PixelVertexAssociatorByPositionAndTracks) +tracksValidationTruthPixelTrackingOnly.add(trackingParticlesBHadron) + +tracksPreValidationPixelTrackingOnly = cms.Task( + tracksValidationTruthPixelTrackingOnly, + trackingParticlesSignal, + pixelTracksPt09, + pixelTracksFromPV, + pixelTracksFromPVPt09, +) tracksValidationPixelTrackingOnly = cms.Sequence( - trackValidatorPixelTrackingOnly, - tracksValidationTruthPixelTrackingOnly + trackValidatorPixelTrackingOnly + + trackValidatorFromPVPixelTrackingOnly + + trackValidatorFromPVAllTPPixelTrackingOnly + + trackValidatorBHadronPixelTrackingOnly, + tracksPreValidationPixelTrackingOnly ) diff --git a/Validation/RecoTrack/python/plotting/html.py b/Validation/RecoTrack/python/plotting/html.py index a9fed5cc12975..3985f8edc9abf 100644 --- a/Validation/RecoTrack/python/plotting/html.py +++ b/Validation/RecoTrack/python/plotting/html.py @@ -63,8 +63,14 @@ def _allToHP(s): return s.replace("All", "High purity") def _allToBTV(s): return s.replace("All", "BTV-like") +def _allPtCut(s): + return s.replace("All tracks", "Tracks pT > 0.9 GeV") def _ptCut(s): return s.replace("Tracks", "Tracks pT > 0.9 GeV").replace("tracks", "tracks pT > 0.9 GeV") +def _allToPixel(s): + return s.replace("All", "Pixel") +def _toPixel(s): + return s.replace("Tracks", "Pixel tracks") _trackQualityNameOrder = collections.OrderedDict([ ("seeding_seeds", "Seeds"), ("seeding_seedsa", "Seeds A"), @@ -75,8 +81,8 @@ def _ptCut(s): ("building_", "Built tracks"), ("", _allName), ("highPurity", _allToHP(_allName)), - ("Pt09", "Tracks pT > 0.9 GeV"), - ("highPurityPt09", "High purity tracks pT > 0.9 GeV"), + ("Pt09", _allPtCut(_allName)), + ("highPurityPt09", _ptCut(_allToHP(_allName))), ("ByOriginalAlgo", _toOriAlgo(_allName)), ("highPurityByOriginalAlgo", _toOriAlgo(_toHP(_allName))), ("ByAlgoMask", _toAlgoMask(_allName)), @@ -120,6 +126,15 @@ def _ptCut(s): ("displaced_highPurityByOriginalAlgo", _toOriAlgo(_allToHP(_displacedName))), ("displaced_ByAlgoMask", _toAlgoMask(_displacedName)), ("displaced_highPurityByAlgoMask", _toAlgoMask(_allToHP(_displacedName))), + # Pixel tracks + ("pixel_", _allToPixel(_allName)), + ("pixel_Pt09", _ptCut(_allToPixel(_allName))), + ("pixelFromPV_", _toPixel(_fromPVName)), + ("pixelFromPV_Pt09", _ptCut(_toPixel(_fromPVName))), + ("pixelFromPVAllTP_", _toPixel(_fromPVAllTPName)), + ("pixelFromPVAllTP_Pt09", _ptCut(_toPixel(_fromPVAllTPName))), + ("pixelbhadron_", _allToPixel(_bhadronName)), + ("pixelbhadron_Pt09", _ptCut(_allToPixel(_bhadronName))), ]) _trackAlgoName = { @@ -134,6 +149,7 @@ def _ptCut(s): "iter7" : "Iterative Step 7", "iter9" : "Iterative Step 9", "iter10": "Iterative Step 10", + "pixel": "Pixel tracks", } _trackAlgoOrder = [ @@ -169,6 +185,7 @@ def _ptCut(s): 'iter7', 'iter9', 'iter10', + "pixel", ] _pageNameMap = { @@ -186,10 +203,10 @@ def _ptCut(s): # These are for the summary page ("seeding_seeds", "Seeds"), ("building", "Built tracks"), - ("", "All tracks"), - ("Pt09", "All tracks (pT>0.9 GeV)"), - ("highPurity", "High purity tracks"), - ("highPurityPt09", "High purity tracks (pT>0.9 GeV)"), + ("", _allName), + ("Pt09", _allPtCut(_allName)), + ("highPurity", _allToHP(_allName)), + ("highPurityPt09", _ptCut(_allToHP(_allName))), ("tpPtLess09", _tpPtLess09Name), ("tpPtLess09_highPurity", _allToHP(_tpPtLess09Name)), ("tpEtaGreater2p7", _tpEtaGreater2p7Name), @@ -209,7 +226,14 @@ def _ptCut(s): ("displaced", _displacedName), ("displaced_highPurity", _allToHP(_displacedName)), # Pixel tracks - ("pixel", "Pixel tracks"), + ("pixel", _allToPixel(_allName)), + ("pixelPt09", _ptCut(_allToPixel(_allName))), + ("pixelFromPV", _toPixel(_fromPVName)), + ("pixelFromPVPt09", _ptCut(_toPixel(_fromPVName))), + ("pixelFromPVAllTP", _toPixel(_fromPVAllTPName)), + ("pixelFromPVAllTPPt09", _ptCut(_toPixel(_fromPVAllTPName))), + ("pixelbhadron", _allToPixel(_bhadronName)), + ("pixelbhadronPt09", _ptCut(_allToPixel(_bhadronName))), # These are for vertices ("genvertex", "Gen vertices"), ("pixelVertices", "Pixel vertices"), @@ -233,6 +257,7 @@ def _ptCut(s): _fromPVAllTP2Legend = "Tracks from reco PV (another method), fake rate numerator contains all TrackingParticles (separates fake tracks from pileup tracks)" _fromPVAllTPPt2Legend = "Tracks (pT > 0.9 GeV) from reco PV (another method), fake rate numerator contains all TrackingParticles (separates fake tracks from pileup tracks)" _bhadronLegend = "All tracks, efficiency denominator contains only TrackingParticles from B-hadron decays" +_bhadronPtLegend = "Tracks (pT > 0.9 GeV), efficiency denominator contains only TrackingParticles from B-hadron decays" def _sectionNameLegend(): return { @@ -258,6 +283,12 @@ def _sectionNameLegend(): "bhadron_": _bhadronLegend, "bhadron_highPurity": _allToHP(_bhadronLegend), "bhadron_btvLike": _bhadronLegend.replace("All tracks", _btvLegend), + "pixelFromPV_": _fromPVLegend, + "pixelFromPV_Pt09": _fromPVPtLegend, + "pixelFromPVAllTP_": _fromPVAllTPLegend, + "pixelFromPVAllTP_Pt09": _fromPVAllTPPtLegend, + "pixelbhadron_": _bhadronLegend, + "pixelbhadron_Pt09": _bhadronPtLegend, } class Table: @@ -701,7 +732,7 @@ def __init__(self, sample, title, fastVsFull, pileupComparison): self._timingPage = PageSet(*params) self._pfPages = PageSet(*params) self._hltPages = PageSet(*params, dqmSubFolderTranslatedToSectionName=lambda algoQuality: algoQuality[0]) - self._pixelPages = PageSet(*params, dqmSubFolderTranslatedToSectionName=lambda algoQuality: algoQuality[0]) + self._pixelPages = TrackingPageSet(*params) self._otherPages = PageSet(*params) self._purposePageMap = { diff --git a/Validation/RecoTrack/python/plotting/trackingPlots.py b/Validation/RecoTrack/python/plotting/trackingPlots.py index 7f5d84738ee81..fda80114d7e8f 100644 --- a/Validation/RecoTrack/python/plotting/trackingPlots.py +++ b/Validation/RecoTrack/python/plotting/trackingPlots.py @@ -615,6 +615,8 @@ def _mapCollectionToAlgoQuality(collName): prefixes = ["cutsreco", "cutsrecofrompv", "cutsrecofrompv2", "cutsrecofrompvalltp", "cutsrecoetagreater2p7"] if collNameLow in ["general", "generalfrompv", "generaletagreater2p7"]+prefixes: algo = "ootb" + elif collNameLow in ["pixel", "pixelfrompv", "pixelfrompvalltp"]: + algo = "pixel" else: def testColl(coll): for pfx in prefixes: @@ -939,6 +941,7 @@ class HighPurityPt09: pass class BTVLike: pass class AK4PFJets: pass class Pixel: pass + class PixelPt09: pass def __init__(self, section, collection=GeneralTracks): self._collection = collection @@ -981,6 +984,8 @@ def _getN(hname): return _getAlgoQuality(data, "ak4PFJets", "") elif self._collection == TrackingSummaryTable.Pixel: return _getAlgoQuality(data, "pixel", "") + elif self._collection == TrackingSummaryTable.PixelPt09: + return _getAlgoQuality(data, "pixel", "Pt09") else: raise Exception("Collection not recognized, %s" % str(self._collection)) def _formatOrNone(num, func): @@ -1354,11 +1359,21 @@ def _appendTrackingPlots(lastDirName, name, algoPlots, onlyForPileup=False, only _appendTrackingPlots("TrackBHadron", "bhadron", _simBasedPlots+_recoBasedPlots, onlyForBHadron=True) _appendTrackingPlots("TrackDisplaced", "displaced", _simBasedPlots+_recoBasedPlots) # Pixel tracks -_common = dict(purpose=PlotPurpose.Pixel, page="pixel") -plotter.append("pixelTrack", _trackingFolders("PixelTrack"), TrackingPlotFolder(*(_simBasedPlots+_recoBasedPlots), **_common)) -plotterExt.append("pixelTrack", _trackingFolders("PixelTrack"), TrackingPlotFolder(*_extendedPlots, **_common)) -plotter.append("pixelTrack_summary", _trackingFolders("PixelTrack"), PlotFolder(_summaryRaw, _summaryRawN, loopSubFolders=False, purpose=PlotPurpose.TrackingSummary, page="summary", section="pixel")) -plotter.appendTable("pixelTrack_summary", _trackingFolders("PixelTrack"), TrackingSummaryTable(section="pixel", collection=TrackingSummaryTable.Pixel)) +def _appendPixelTrackingPlots(lastDirName, name): + _common = dict(purpose=PlotPurpose.Pixel, page="pixel") + _folders = _trackingFolders(lastDirName) + + plotter.append(name, _folders, TrackingPlotFolder(*(_simBasedPlots+_recoBasedPlots), **_common)) + plotterExt.append(name, _folders, TrackingPlotFolder(*_extendedPlots, **_common)) + + plotter.append(name+"_summary", _folders, PlotFolder(_summaryRaw, _summaryRawN, loopSubFolders=False, purpose=PlotPurpose.TrackingSummary, page="summary", section=name)) + plotter.append(name+"_summary", _folders, PlotFolder(_summaryRaw, _summaryRawN, loopSubFolders=False, purpose=PlotPurpose.TrackingSummary, page="summary", section=name+"Pt09")) + plotter.appendTable(name+"_summary", _folders, TrackingSummaryTable(section=name, collection=TrackingSummaryTable.Pixel)) + plotter.appendTable(name+"_summary", _folders, TrackingSummaryTable(section=name+"Pt09", collection=TrackingSummaryTable.PixelPt09)) +_appendPixelTrackingPlots("PixelTrack", "pixel") +_appendPixelTrackingPlots("PixelTrackFromPV", "pixelFromPV") +_appendPixelTrackingPlots("PixelTrackFromPVAllTP", "pixelFromPVAllTP") +_appendPixelTrackingPlots("PixelTrackBHadron", "pixelbhadron") # MiniAOD