diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index 42961bc4f8b83..9bc35cd0a4503 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -1978,7 +1978,7 @@ def setup_(self, step, stepName, stepDict, k, properties): else: stepDict[stepName][k] = merge([stepDict[step][k]]) def condition(self, fragment, stepList, key, hasHarvest): - return fragment=="TTbar_14TeV" and 'Run4' in key + return (fragment=="TTbar_14TeV" or fragment=="SingleMuPt15Eta0p_0p4") and 'Run4' in key upgradeWFs['NGTScouting'] = UpgradeWorkflow_NGTScouting( steps = [ 'Reco', @@ -2012,6 +2012,19 @@ def condition(self, fragment, stepList, key, hasHarvest): '-s':'HARVESTING:@hltValidation' } +upgradeWFs['NGTScoutingAlpaka'] = deepcopy(upgradeWFs['NGTScouting']) +upgradeWFs['NGTScoutingAlpaka'].suffix = '_NGTScoutingAlpaka' +upgradeWFs['NGTScoutingAlpaka'].offset = 0.771 +upgradeWFs['NGTScoutingAlpaka'].step2 = { + '-s':'DIGI:pdigi_valid,L1TrackTrigger,L1,L1P2GT,DIGI2RAW,HLT:NGTScouting,VALIDATION:@hltValidation', + '--procModifiers': 'alpaka', + '--datatier':'GEN-SIM-DIGI-RAW,DQMIO', + '--eventcontent':'FEVTDEBUGHLT,DQMIO' +} +upgradeWFs['NGTScoutingAlpaka'].step3 = { + '-s':'HARVESTING:@hltValidation' +} + class UpgradeWorkflow_L1Complete(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): if 'Digi' in step and 'NoHLT' not in step: diff --git a/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h index 810b58042557f..d2db8b10f5e88 100644 --- a/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h +++ b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h @@ -23,7 +23,8 @@ namespace pixelClustering { constexpr uint16_t clusterThresholdPhase2OtherLayers = 4000; constexpr uint32_t maxNumDigis = 3 * 256 * 1024; // @PU=200 µ=530k σ=50k this is >4σ away - constexpr uint16_t maxNumModules = 5000; // This is an upperlimit taking into account D110 has 4000 modules + constexpr uint16_t maxNumModules = + 5000 + 2872; // This is an upperlimit taking into account D110 has 4000 modules + 3 layer of OT for CA Extension constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h index 8d9a65f69d67c..ffcac69e26f68 100644 --- a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h +++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h @@ -1,6 +1,8 @@ #ifndef DataFormats_TrackSoA_interface_alpaka_TrackUtilities_h #define DataFormats_TrackSoA_interface_alpaka_TrackUtilities_h +// MRMR #define NTUPLE_FIT_DEBUG // MRMR + #include #include #include diff --git a/Geometry/CommonTopologies/interface/SimplePixelTopology.h b/Geometry/CommonTopologies/interface/SimplePixelTopology.h index d775302a993f5..be68e240bf3ce 100644 --- a/Geometry/CommonTopologies/interface/SimplePixelTopology.h +++ b/Geometry/CommonTopologies/interface/SimplePixelTopology.h @@ -9,8 +9,8 @@ namespace pixelTopology { constexpr auto maxNumberOfLadders = 160; - constexpr uint8_t maxLayers = 28; - constexpr uint8_t maxPairs = 64; + constexpr uint8_t maxLayers = 28 + 3; // CA Extension to 3 OT barrel layers + constexpr uint8_t maxPairs = 64 + 64; // CA // TODO // Once CUDA is dropped this could be wrapped in #ifdef CA_TRIPLETS_HOLE @@ -216,7 +216,7 @@ namespace phase2PixelTopology { using pixelTopology::phi0p07; using pixelTopology::phi0p09; - constexpr uint32_t numberOfLayers = 28; + constexpr uint32_t numberOfLayers = 28 + 3; // CA Extension with 3 barrel layers from OT constexpr int nPairs = 23 + 6 + 14 + 8 + 4; // include far forward layer pairs constexpr uint16_t numberOfModules = 4000; @@ -440,6 +440,10 @@ namespace pixelTopology { static constexpr inline uint16_t localY(uint16_t py) { return py; } }; + struct Phase2OT : public Phase2 { + static constexpr uint32_t numberOfLayers = 28 + 3; //OT Barrel Extension + }; + struct Phase1 { // types using hindex_type = uint32_t; // FIXME from siPixelRecHitsHeterogeneousProduct diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelRecHitsExtendedSoA_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelRecHitsExtendedSoA_cfi.py new file mode 100644 index 0000000000000..d103e80b83597 --- /dev/null +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelRecHitsExtendedSoA_cfi.py @@ -0,0 +1,5 @@ +import FWCore.ParameterSet.Config as cms + +from RecoLocalTracker.SiPixelRecHits.SiPixelRecHitExtendedAlpaka_alpaka import SiPixelRecHitExtendedAlpaka_alpaka as _SiPixelRecHitExtendedAlpaka_alpaka + +hltPhase2PixelRecHitsExtendedSoA = _SiPixelRecHitExtendedAlpaka_alpaka() diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py index 6baca6635d7b8..c3eba495604f5 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py @@ -1,26 +1,29 @@ import FWCore.ParameterSet.Config as cms -hltPhase2PixelTracksSoA = cms.EDProducer('CAHitNtupletAlpakaPhase2@alpaka', - pixelRecHitSrc = cms.InputTag('hltPhase2SiPixelRecHitsSoA'), +removeOT = False + +hltPhase2PixelTracksSoA = cms.EDProducer('CAHitNtupletAlpakaPhase2OT@alpaka', + pixelRecHitSrc = cms.InputTag('hltPhase2PixelRecHitsExtendedSoA'), ptmin = cms.double(0.9), - hardCurvCut = cms.double(0.0328407225), +# hardCurvCut = cms.double(0.0328407225), + hardCurvCut = cms.double(0.01425), # corresponds to 800 MeV in 3.8T. earlyFishbone = cms.bool(True), lateFishbone = cms.bool(False), fillStatistics = cms.bool(False), - minHitsPerNtuplet = cms.uint32(4), - maxNumberOfDoublets = cms.string(str(5*512*1024)), - maxNumberOfTuples = cms.string(str(32*1024)), - cellPtCut = cms.double(0.85), - cellZ0Cut = cms.double(7.5), + minHitsPerNtuplet = cms.uint32(5), + maxNumberOfDoublets = cms.string(str(15*512*1024)), + maxNumberOfTuples = cms.string(str(2*60*1024)), + cellPtCut = cms.double(0.85), # Corresponds to 1 GeV * this cut, i.e., 850 MeV, as minimum p_t + cellZ0Cut = cms.double(12.5), # it's half the BS width! It has nothing to do with the sample!! minYsizeB1 = cms.int32(25), minYsizeB2 = cms.int32(15), maxDYsize12 = cms.int32(12), maxDYsize = cms.int32(10), maxDYPred = cms.int32(20), - avgHitsPerTrack = cms.double(7.0), - avgCellsPerHit = cms.double(6), - avgCellsPerCell = cms.double(0.151), - avgTracksPerCell = cms.double(0.040), + avgHitsPerTrack = cms.double(10.0), + avgCellsPerHit = cms.double(25), + avgCellsPerCell = cms.double(5), + avgTracksPerCell = cms.double(5), minHitsForSharingCut = cms.uint32(10), fitNas4 = cms.bool(False), useRiemannFit = cms.bool(False), @@ -30,19 +33,582 @@ trackQualityCuts = cms.PSet( maxChi2 = cms.double(5.0), minPt = cms.double(0.9), - maxTip = cms.double(0.3), - maxZip = cms.double(12.), + maxTip = cms.double(2.5), + maxZip = cms.double(12), ), geometry = cms.PSet( - caDCACuts = cms.vdouble(0.15, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25), - caThetaCuts = cms.vdouble(0.002, 0.002, 0.002, 0.002, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003), - startingPairs = cms.vuint32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), - pairGraph = cms.vuint32(0, 1, 0, 4, 0, 16, 1, 2, 1, 4, 1, 16, 2, 3, 2, 4, 2, 16, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 0, 2, 0, 5, 0, 17, 0, 6, 0, 18, 1, 3, 1, 5, 1, 17, 1, 6, 1, 18, 11, 12, 12, 13, 13, 14, 14, 15, 23, 24, 24, 25, 25, 26, 26, 27, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 16, 18, 17, 19, 18, 20, 19, 21, 20, 22, 21, 23, 22, 24), - phiCuts = cms.vint32(522, 522, 522, 626, 730, 730, 626, 730, 730, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 522, 522, 522, 522, 522, 522, 522, 522), - minZ = cms.vdouble(-16, 4, -22, -17, 6, -22, -18, 11, -22, 23, 30, 39, 50, 65, 82, 109, -28, -35, -44, -55, -70, -87, -113, -16, 7, -22, 11, -22, -17, 9, -22, 13, -22, 137, 173, 199, 229, -142, -177, -203, -233, 23, 30, 39, 50, 65, 82, 109, -28, -35, -44, -55, -70, -87, -113), - maxZ = cms.vdouble(17, 22, -4, 17, 22, -6, 18, 22, -11, 28, 35, 44, 55, 70, 87, 113, -23, -30, -39, -50, -65, -82, -109, 17, 22, -7, 22, -10, 17, 22, -9, 22, -13, 142, 177, 203, 233, -137, -173, -199, -229, 28, 35, 44, 55, 70, 87, 113, -23, -30, -39, -50, -65, -82, -109), - maxR = cms.vdouble(5, 5, 5, 7, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 6, 5, 6, 6, 6, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 6, 5, 5, 5, 6, 5, 5, 5, 9, 9, 9, 8, 8, 8, 11, 9, 9, 9, 8, 8, 8, 11) + # This cut also uses the hardCurvCut parameters inside the + # Kernel_connect "function". This is used to cut connections that have + # either a too low p_t or that do not intersect the BS+tolerance + # region. Internally, this cut is compared against the circle.dca0() in + # natural units divided by circle.curvature(), where circle is the + # circle passing through the 3 points of the triplet under + # investigation. Therefore the cut represent the compatibility of the + # circle in the transverse plane and the units are meant to be cm. + caDCACuts = cms.vdouble( + 0.15, # 0 + 0.25, # 1 + 0.20, # 2 + 0.20, # End PXB # 3 + 0.25, # 4 + 0.25, # 5 + 0.25, # 6 + 0.25, # 7 + 0.25, # 8 + 0.25, # 9 + 0.25, # 10 + 0.25, # 11 + 0.25, # 12 + 0.25, # 13 + 0.25, # 14 + 0.25, # End PXFWD+ # 15 + 0.25, # 16 + 0.25, # 17 + 0.25, # 18 + 0.25, # 19 + 0.25, # 20 + 0.25, # 21 + 0.25, # 22 + 0.25, # 23 + 0.25, # 24 + 0.25, # 25 + 0.25, # 26 + 0.25, # End PXFWD- # 27 + 0.10, # 28 + 0.10, # 29 + 0.25), # End of OT PinPS # 30 + # caThetaCut is used in the areAlignedRZ function to check if two + # sibling cell are compatible in the R-Z plane. In that same function, + # we also use ptmin variable. The caThetaCut is assigned to the SoA of + # the layers, and is percolated into this compatibility function via + # the SoA itself. + caThetaCuts = cms.vdouble( + 0.002, # 0 + 0.002, # 1 + 0.002, # 2 + 0.002, # 3 + 0.003, # 4 + 0.003, # 5 + 0.003, # 6 + 0.003, # 7 + 0.003, # 8 + 0.003, # 9 + 0.003, # 10 + 0.003, # 11 + 0.003, # 12 + 0.003, # 13 + 0.003, # 14 + 0.003, # 15 + 0.003, # 16 + 0.003, # 17 + 0.003, # 18 + 0.003, # 19 + 0.003, # 20 + 0.003, # 21 + 0.003, # 22 + 0.003, # 23 + 0.003, # 24 + 0.003, # 25 + 0.003, # 26 + 0.003, # 26 + 0.003, # 27 + 0.003, # 29 + 0.003), # 30 + startingPairs = cms.vint32( + 0, # PXB0-1 + 1, # PXB0-4 + 2, # PXB0-16 + 3, # PXB1-2 + 4, # PXB1-4 + 5, # PXB1-16 + 6, # PXB2-3 + 7, # PXB2-4 + 8, # PXB2-16 + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28), +# 30, +# 31), +# 29, +# 30, +# 31, +# 32), + pairGraph = cms.vint32( + 0, 1, # 0 + 0, 4, # 1 + 0, 16, # 2 + 1, 2, # 3 + 1, 4, # 4 + 1, 16, # 5 + 2, 3, # 6 + 2, 4, # 7 + 2, 16, # 8 + 4, 5, # 9 + 5, 6, # 10 + 6, 7, # 11 + 7, 8, # 12 + 8, 9, # 13 + 9, 10, # 14 + 10, 11, # 15 + 16, 17, # 16 + 17, 18, # 17 + 18, 19, # 18 + 19, 20, # 19 + 20, 21, # 20 + 21, 22, # 21 + 22, 23, # 22 + 0, 2, # 23 + 0, 5, # 24 + 0, 17, # 25 + 0, 6, # 26 + 0, 18, # 27 + 1, 3, # 28 +# 2, 5, # 29 + 1, 5, # 30 + 1, 17, # 31 +# 1, 18, # last starting pair # 32 + 11, 12, # 33 + 12, 13, # 34 + 13, 14, # 35 + 14, 15, # 36 + 23, 24, # 37 + 24, 25, # 38 + 25, 26, # 39 + 26, 27, # 40 + 4, 6, # 41 + 5, 7, # 42 + 6, 8, # 43 + 7, 9, # 44 + 8, 10, # 45 + 9, 11, # 46 + 10, 12, # 47 + 16, 18, # 48 + 17, 19, # 49 + 18, 20, # 50 + 19, 21, # 51 + 20, 22, # 52 + 21, 23, # 53 + 22, 24, # 54 + 2, 28, # 55 + 3, 28, # 56 +# 3, 29, # 57 + 28, 29, # 58 +# 28, 30, # 59 + 29, 30, # 60 + 4, 28, # 61 + 5, 28, # 62 + 6, 28, # 63 +# 7, 28, # 64 from top 0 # 64 +# 8, 28, # 65 +# 9, 28, # 66 + 16, 28, # 67 + 17, 28, # 68 + 18, 28, # 69 +# 19, 28, # 70 +# 20, 28, # 71 +# 21, 28, # 72 +# 4, 29, # 73 +# 5, 29, # 74 +# 6, 29, # 75 +# 7, 29, # 76 +# 8, 29, # 77 +# 16, 29, # 78 +# 17, 29, # 79 +# 18, 29, # 80 +# 19, 29, # 81 +# 20, 29 # 82 + ), + phiCuts = cms.vint32( + 522, # 0 + 522, # 1 + 522, # 2 + 626, # 3 + 730, # 4 + 730, # 5 + 626, # 6 + 730, # 7 + 730, # 8 + 522, # 9 + 522, # 10 + 522, # 11 + 522, # 12 + 522, # 13 + 522, # 14 + 522, # 15 + 522, # 16 + 522, # 17 + 522, # 18 + 522, # 19 + 522, # 20 + 522, # 21 + 522, # 22 + 522, # 23 + 522, # 24 + 522, # 25 + 522, # 26 + 522, # 27 + 522, # 28 +# 730, # 29 + 730, # 30 + 730, # 31 +# 730, # 32 + 730, # 33 + 730, # 34 + 730, # 35 + 730, # 36 + 730, # 37 + 730, # 38 + 730, # 39 + 730, # 40 + 730, # 41 + 730, # 42 + 730, # 43 + 730, # 44 + 730, # 45 + 730, # 46 + 1000, # 47 + 1000, # 48 + 1000, # 49 + 1000, # 50 + 1000, # 51 + 1000, # 52 + 1000, # 53 + 1000, # 54 + 1000, # 55 + 1000, # 56 +# 1000, # 57 + 1000, # 58 +# 1000, # 59 + 1000, # 60 + 1000, # 61 + 1000, # 62 + 1000, # 63 +# 1000, # 64 # 64 from top 0 +# 1000, # 65 +# 1000, # 66 + 1000, # 67 + 1000, # 68 + 1000, # 69 +# 1000, # 70 +# 1000, # 71 +# 1000, # 72 +# 1000, # 73 +# 1000, # 74 +# 1000, # 75 +# 1000, # 76 +# 1000, # 77 +# 1000, # 78 +# 1000, # 79 +# 1000, # 80 +# 1000, # 81 +# 1000 # 82 + ), + # minZ and maxZ are the limits in Z for the inner cell of a doublets in + # order to be able to make a doublet with the other layer. + minZ = cms.vdouble( + -19, # 0 + 4, # 1 + -22, # 2 + -18, # 3 + 8, # 4 + -22, # 5 + -22, # 6 + 11, # 7 + -22, # 8 + 23, # 9 + 30, # 10 + 39, # 11 + 50, # 12 + 65, # 13 + 82, # 14 + 109, # 15 + -28, # 16 + -35, # 17 + -44, # 18 + -55, # 19 + -70, # 20 + -87, # 21 + -113, # 22 + -19, # 23 + 7, # 24 + -22, # 25 + 11, # 26 + -22, # 27 + -19, # 28 +# 9, # 29 + 7, # 30 + -22, # 31 +# -22, # 32 + 137, # 33 + 173, # 34 + 199, # 35 + 229, # 36 + -142, # 37 + -177, # 38 + -203, # 39 + -233, # 40 + 23, # 41 + 30, # 42 + 39, # 43 + 50, # 44 + 65, # 45 + 82, # 46 + 109, # 47 + -28, # 48 + -35, # 49 + -44, # 50 + -55, # 51 + -70, # 52 + -87, # 53 + -113, # 54 + -20, # 55 + -20, # 56 +# -40, # 57 + -1200, # 58 +# -40, # 59 + -1200, # 60 + 23, # 61 + 30, # 62 + 39, # 63 +# 50, # 64 # 64 from top 0 +# -1000, # 65 +# -1000, # 66 + -28, # 67 + -35, # 68 + -44, # 69 +# -55, # 70 +# -1000, # 71 +# -1000, # 72 +# -1000, # 73 +# -1000, # 74 +# -1000, # 75 +# -1000, # 76 +# -1000, # 77 +# -1000, # 78 +# -1000, # 79 +# -1000, # 80 +# -1000, # 81 +# -1000 # 82 + ), + maxZ = cms.vdouble( + 19, # 0 + 22, # 1 + -4, # 2 + 18, # 3 + 22, # 4 + -8, # 5 + 22, # 6 + 22, # 7 + -11, # 8 + 28, # 9 + 35, # 10 + 44, # 11 + 55, # 12 + 70, # 13 + 87, # 14 + 113, # 15 + -23, # 16 + -30, # 17 + -39, # 18 + -50, # 19 + -65, # 20 + -82, # 21 + -109, # 22 + 19, # 23 + 22, # 24 + -7, # 25 + 22, # 26 + -11, # 27 + 19, # 28 +# 22, # 29 + 22, # 30 + -7, # 31 +# -13, # 32 + 142, # 33 + 177, # 34 + 203, # 35 + 233, # 36 + -137, # 37 + -173, # 38 + -199, # 39 + -229, # 40 + 28, # 41 + 35, # 42 + 44, # 43 + 55, # 44 + 70, # 45 + 87, # 46 + 113, # 47 + -23, # 48 + -30, # 49 + -39, # 50 + -50, # 51 + -65, # 52 + -82, # 53 + -109, # 54 + 20, # 55 + 20, # 56 +# 40, # 57 + 1200, # 58 +# 40, # 59 + 1200, # 60 + 28, # 61 + 35, # 62 + 44, # 63 +# 55, # 64 64 gtom top 0 +# 1000, # 65 +# 1000, # 66 + -23, # 67 + -30, # 68 + -39, # 69 +# -50, # 70 +# 1000, # 71 +# 1000, # 72 +# 1000, # 73 +# 1000, # 74 +# 1000, # 75 +# 1000, # 76 +# 1000, # 77 +# 1000, # 78 +# 1000, # 79 +# 1000, # 80 +# 1000, # 81 +# 1000 # 82 + ), + maxR = cms.vdouble( + 5, # 0 + 5, # 1 + 5, # 2 + 7, # 3 + 8, # 4 + 8, # 5 + 7, # 6 + 7, # 7 + 7, # 8 + 6, # 9 + 6, # 10 + 6, # 11 + 6, # 12 + 5, # 13 + 6, # 14 + 5, # 15 + 6, # 16 + 6, # 17 + 6, # 18 + 6, # 19 + 5, # 20 + 6, # 21 + 5, # 22 + 10, # 23 + 10, # 24 + 10, # 25 + 5, # 26 + 5, # 27 + 10, # 28 +# 10, # 29 + 10, # 30 + 8, # 31 +# 8, # 32 + 6, # 33 + 5, # 34 + 5, # 35 + 5, # 36 + 6, # 37 + 5, # 38 + 5, # 39 + 5, # 40 + 9, # 41 + 9, # 42 + 9, # 43 + 8, # 44 + 8, # 45 + 8, # 46 + 11, # 47 + 9, # 48 + 9, # 49 + 9, # 50 + 8, # 51 + 8, # 52 + 8, # 53 + 11, # 54 + 60, # 55 + 60, # 56 +# 60, # 57 + 60, # 58 +# 60, # 59 + 60, # 60 + 60, # 61 + 60, # 62 + 60, # 63 +# 60, # 64 64 from top 0 +# 60, # 65 +# 60, # 66 + 60, # 67 + 60, # 68 + 60,# 69 +# 60, # 70 +# 60, # 71 +# 60, # 72 +# 60, # 73 +# 60, # 74 +# 60, # 75 +# 60, # 76 +# 60, # 77 +# 60, # 78 +# 60, # 79 +# 60, # 80 +# 60, # 81 +# 60 # 82 + ) ), # autoselect the alpaka backend alpaka = cms.untracked.PSet(backend = cms.untracked.string('')) ) + + +def exclude_layers(hltPhase2PixelTracksSoA, layers_to_exclude): + keep_indices = [] + num_pairs = len(hltPhase2PixelTracksSoA.geometry.pairGraph) // 2 + for i in range(num_pairs): + a = hltPhase2PixelTracksSoA.geometry.pairGraph[2*i] + b = hltPhase2PixelTracksSoA.geometry.pairGraph[2*i + 1] + if a not in layers_to_exclude and b not in layers_to_exclude: + keep_indices.append(i) + # Now update in place + # For pairGraph, build the new flat list from kept pairs + new_pairGraph = [] + for i in keep_indices: + new_pairGraph.extend([hltPhase2PixelTracksSoA.geometry.pairGraph[2*i], hltPhase2PixelTracksSoA.geometry.pairGraph[2*i+1]]) + + hltPhase2PixelTracksSoA.geometry.pairGraph[:] = new_pairGraph + # Update all other lists in place + hltPhase2PixelTracksSoA.geometry.phiCuts[:] = [hltPhase2PixelTracksSoA.geometry.phiCuts[i] for i in keep_indices] + hltPhase2PixelTracksSoA.geometry.minZ[:] = [hltPhase2PixelTracksSoA.geometry.minZ[i] for i in keep_indices] + hltPhase2PixelTracksSoA.geometry.maxZ[:] = [hltPhase2PixelTracksSoA.geometry.maxZ[i] for i in keep_indices] + hltPhase2PixelTracksSoA.geometry.maxR[:] = [hltPhase2PixelTracksSoA.geometry.maxR[i] for i in keep_indices] + + + +if removeOT: + ot_layers_ = [28, 29, 30] + exclude_layers(hltPhase2PixelTracksSoA, layers_to_exclude=ot_layers_) + +#print("Using {} pair connections: {}".format(len(hltPhase2PixelTracksSoA.geometry.pairGraph), hltPhase2PixelTracksSoA.geometry.pairGraph)) + diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py index b06d6b0fcdf9d..dbe639e7492c1 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracks_cfi.py @@ -15,6 +15,8 @@ minNumberOfHits = cms.int32(0), minQuality = cms.string('tight'), pixelRecHitLegacySrc = cms.InputTag("hltSiPixelRecHits"), - trackSrc = cms.InputTag("hltPhase2PixelTracksSoA") + trackSrc = cms.InputTag("hltPhase2PixelTracksSoA"), + outerTrackerRecHitSrc = cms.InputTag("hltSiPhase2RecHits"), + useOTExtension = cms.bool(True) ) alpaka.toReplaceWith(hltPhase2PixelTracks, _hltPhase2PixelTracks) diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTOtLocalRecoSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTOtLocalRecoSequence_cfi.py index 74591b451f439..ce93aee2f2de7 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTOtLocalRecoSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTOtLocalRecoSequence_cfi.py @@ -1,5 +1,14 @@ import FWCore.ParameterSet.Config as cms from ..modules.hltMeasurementTrackerEvent_cfi import * +from ..modules.hltSiPhase2RecHits_cfi import * + +from Configuration.ProcessModifiers.alpaka_cff import alpaka HLTOtLocalRecoSequence = cms.Sequence(hltMeasurementTrackerEvent) + +HLTOtLocalRecoSequenceWithHits_ = cms.Sequence(hltMeasurementTrackerEvent + +hltSiPhase2RecHits + ) + +alpaka.toReplaceWith(HLTOtLocalRecoSequence, HLTOtLocalRecoSequenceWithHits_) diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksSequence_cfi.py index 9277f2dadeace..987cd92fc7a99 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTPhase2PixelTracksSequence_cfi.py @@ -7,6 +7,8 @@ from ..modules.hltPhase2PixelTracksHitDoublets_cfi import * from ..modules.hltPhase2PixelTracksHitSeeds_cfi import * from ..modules.hltPhase2PixelTracksSeedLayers_cfi import * +from ..modules.hltPhase2PixelRecHitsExtendedSoA_cfi import * +from RecoLocalTracker.Phase2TrackerRecHits.phase2OTRecHitsSoAConverter_cfi import * HLTPhase2PixelTracksSequence = cms.Sequence(hltPhase2PixelTracksSeedLayers+hltPhase2PixelTracksAndHighPtStepTrackingRegions+hltPhase2PixelTracksHitDoublets+hltPhase2PixelTracksHitSeeds+hltPhase2PixelFitterByHelixProjections+hltPhase2PixelTrackFilterByKinematics+hltPhase2PixelTracks) from ..sequences.HLTBeamSpotSequence_cfi import HLTBeamSpotSequence @@ -16,6 +18,8 @@ +hltPhase2PixelTracksAndHighPtStepTrackingRegions # needed by highPtTripletStep iteration +hltPhase2PixelFitterByHelixProjections # needed by tracker muons +hltPhase2PixelTrackFilterByKinematics # needed by tracker muons + +phase2OTRecHitsSoAConverter + +hltPhase2PixelRecHitsExtendedSoA +hltPhase2PixelTracksSoA +hltPhase2PixelTracks ) diff --git a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py index 884840d6ef234..e4a64ca25e152 100644 --- a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py +++ b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py @@ -182,6 +182,26 @@ def customizeHLTfor47611(process): return process +def customizeHLTfor47630(process): + attributes_to_remove = [ + 'connectionRetrialPeriod', + 'connectionRetrialTimeOut', + 'connectionTimeOut', + 'enableConnectionSharing', + 'enablePoolAutomaticCleanUp', + 'enableReadOnlySessionOnUpdateConnection', + 'idleConnectionCleanupPeriod' + ] + + for mod in modules_by_type(process, "PoolDBESSource"): + if hasattr(mod, 'DBParameters'): + pset = getattr(mod,'DBParameters') + for attr in attributes_to_remove: + if hasattr(pset, attr): + delattr(mod.DBParameters, attr) + + return process + # CMSSW version specific customizations def customizeHLTforCMSSW(process, menuType="GRun"): diff --git a/RecoLocalTracker/Phase2TrackerRecHits/plugins/BuildFile.xml b/RecoLocalTracker/Phase2TrackerRecHits/plugins/BuildFile.xml index 5ec8e56667ecf..5addf06f89d9b 100644 --- a/RecoLocalTracker/Phase2TrackerRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/Phase2TrackerRecHits/plugins/BuildFile.xml @@ -6,3 +6,14 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc b/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc new file mode 100644 index 0000000000000..4f498bc7d52ab --- /dev/null +++ b/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc @@ -0,0 +1,266 @@ +#include +#include +#include +#include "DataFormats/BeamSpot/interface/BeamSpot.h" +#include "DataFormats/GeometryVector/interface/GlobalPoint.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiStripDetId/interface/StripSubdetector.h" +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" +#include "DataFormats/Math/interface/approx_atan2.h" + +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/EDProducer.h" + +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +#include "Geometry/TrackerGeometryBuilder/interface/StripGeomDetUnit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class Phase2OTRecHitsSoAConverter : public stream::EDProducer<> { + using Hits = reco::TrackingRecHitsSoACollection; + using HitsHost = ::reco::TrackingRecHitHost; + using HMSstorage = typename std::vector; + + public: + explicit Phase2OTRecHitsSoAConverter(const edm::ParameterSet& iConfig); + ~Phase2OTRecHitsSoAConverter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + void beginRun(edm::Run const& run, edm::EventSetup const& setup) override; + + private: + void produce(device::Event& iEvent, const device::EventSetup& es) override; + + const edm::ESGetToken geomToken_; + const edm::ESGetToken geomTokenRun_; + const edm::EDGetTokenT recHitToken_; + const edm::EDGetTokenT<::reco::BeamSpot> beamSpotToken_; + const edm::EDGetTokenT pixelHitsSoA_; + + const device::EDPutToken stripSoADevice_; + const edm::EDPutTokenT hitModuleStart_; + + int modulesInPixel_; + std::unordered_map detIdIsP_; + std::vector orderedModules_; + std::unordered_map moduleIndexToOffset_; + std::map detIdToIndex_; + }; + + Phase2OTRecHitsSoAConverter::Phase2OTRecHitsSoAConverter(const edm::ParameterSet& iConfig) + : stream::EDProducer<>(iConfig), + geomToken_(esConsumes()), + geomTokenRun_(esConsumes()), + recHitToken_{consumes(iConfig.getParameter("otRecHitSource"))}, + beamSpotToken_(consumes<::reco::BeamSpot>(iConfig.getParameter("beamSpot"))), + pixelHitsSoA_{consumes(iConfig.getParameter("pixelRecHitSoASource"))}, + stripSoADevice_{produces()}, + hitModuleStart_{produces()}, + modulesInPixel_(0) {} + + void Phase2OTRecHitsSoAConverter::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("pixelRecHitSoASource", edm::InputTag("hltPhase2SiPixelRecHitsSoA")); + desc.add("otRecHitSource", edm::InputTag("hltSiPhase2RecHits")); + desc.add("beamSpot", edm::InputTag("hltOnlineBeamSpot")); + + descriptions.addWithDefaultLabel(desc); + } + + void Phase2OTRecHitsSoAConverter::beginRun(edm::Run const &iRun, edm::EventSetup const& iSetup) { + const auto& trackerGeometry = &iSetup.getData(geomTokenRun_); + auto isPinPSinOTBarrel = [&](DetId detId) { + // std::cout << (int)trackerGeometry->getDetectorType(detId) << " " << (trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PSP) << "\n"; + // std::cout << (int)detId.subdetId() << " " << (detId.subdetId() == StripSubdetector::TOB) << std::endl; + // Select only P-hits from the OT barrel + return (trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PSP && + detId.subdetId() == StripSubdetector::TOB); + }; + auto isPh2Pixel = [&](DetId detId) { + return (trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXB || + trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXB3D || + trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXF || + trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXF3D); + }; + + auto const& detUnits = trackerGeometry->detUnits(); + + for (auto& detUnit : detUnits) { + DetId detId(detUnit->geographicalId()); + detIdIsP_[detId.rawId()] = isPinPSinOTBarrel(detId); + if (isPh2Pixel(detId)) + modulesInPixel_++; + if (detIdIsP_[detId.rawId()]) { + detIdToIndex_[detUnit->geographicalId()] = detUnit->index(); + moduleIndexToOffset_[detUnit->index()] = orderedModules_.size(); + orderedModules_.push_back(detUnit->index()); + // std::cout << "Inserted " << detUnit->index() << " " << p_modulesInPSInOTBarrel.size() << " on layer " << int((detId.rawId() >> 20) & 0xF) << std::endl; + } + } + } + + //https://github.com/cms-sw/cmssw/blob/3f06ef32d66bd2a7fa04e411fa4db4845193bd3c/RecoTracker/MkFit/plugins/convertHits.h + + void Phase2OTRecHitsSoAConverter::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + auto queue = iEvent.queue(); + auto& bs = iEvent.get(beamSpotToken_); + const auto& trackerGeometry = &iSetup.getData(geomToken_); + auto const& stripHits = iEvent.get(recHitToken_); + + auto const& pixelHitsHost = iEvent.get(pixelHitsSoA_); + int nPixelHits = pixelHitsHost.view().metadata().size(); + + // Count strip hits and active strip modules + const int nStripHits = stripHits.data().size(); + const int activeStripModules = stripHits.size(); + + // Count the number of P hits in the OT to dimension the SoA + int PHitsInOTBarrel = 0; + for (const auto& detSet : stripHits) { + for (const auto& recHit : detSet) { + DetId detId(recHit.geographicalId()); + if (detIdIsP_[detId.rawId()]) + PHitsInOTBarrel++; + } + } + //std::cout << "Tot number of modules in Pixels " << modulesInPixel_ << std::endl; + //std::cout << "Tot number of p_modulesInPSInOTBarrel: " << orderedModules_.size() << std::endl; + //std::cout << "Number of strip (active) modules: " << activeStripModules << std::endl; + //std::cout << "Number of strip hits: " << nStripHits << std::endl; + //std::cout << "Total hits of PinOTBarrel: " << PHitsInOTBarrel << std::endl; + + HitsHost stripHitsHost(queue, PHitsInOTBarrel, orderedModules_.size()); + auto& stripHitsModuleView = stripHitsHost.view<::reco::HitModuleSoA>(); + + std::vector counterOfHitsPerModule(orderedModules_.size(), 0); + assert(orderedModules_.size()); + for (const auto& detSet : stripHits) { + auto firstHit = detSet.begin(); + auto detId = firstHit->rawId(); + auto index = detIdToIndex_[detId]; + int offset = 0; + if (detIdIsP_[detId]) { + offset = moduleIndexToOffset_[index]; + for (const auto& recHit : detSet) { + counterOfHitsPerModule[offset]++; + } + } + } +#if 0 + int modId = 0; + for (auto c : counterOfHitsPerModule) { + std::cout << "On module " << modId << " we have " << c << " hits." << std::endl; + modId++; + } +#endif + + std::vector cumulativeHitPerModule(counterOfHitsPerModule.size()); + std::partial_sum(counterOfHitsPerModule.begin(), counterOfHitsPerModule.end(), cumulativeHitPerModule.begin()); + // Create new vector with first element as 0, then shifted contents from counterOfHitsPerModule + std::vector shifted(cumulativeHitPerModule.size(), 0); + stripHitsModuleView[0].moduleStart() = nPixelHits; + // std::cout << "Module start: 0 with hits: " << stripHitsModuleView[0].moduleStart() << std::endl; + for (size_t i = 1; i < cumulativeHitPerModule.size(); ++i) { + shifted[i] = cumulativeHitPerModule[i - 1]; + stripHitsModuleView[i].moduleStart() = cumulativeHitPerModule[i - 1] + nPixelHits; + // std::cout << "Module start: " << i << " with hits: " << stripHitsModuleView[i].moduleStart() << std::endl; + } + + for (const auto& detSet : stripHits) { + auto firstHit = detSet.begin(); + auto detId = firstHit->rawId(); + auto det = trackerGeometry->idToDet(detId); + auto index = detIdToIndex_[detId]; + int offset = 0; + if (detIdIsP_[detId]) { + offset = moduleIndexToOffset_[index]; + for (const auto& recHit : detSet) { + // Select only P-hits from the OT barrel + if (detIdIsP_[detId]) { + int idx = shifted[offset]++; + assert(idx < PHitsInOTBarrel); + stripHitsHost.view()[idx].xLocal() = recHit.localPosition().x(); + stripHitsHost.view()[idx].yLocal() = recHit.localPosition().y(); + stripHitsHost.view()[idx].xerrLocal() = recHit.localPositionError().xx(); + stripHitsHost.view()[idx].yerrLocal() = recHit.localPositionError().yy(); + //std::cout << "Local (x, y) with (xx, yy) --> (" << recHit.localPosition().x() << ", " << recHit.localPosition().y() << ") with (" << recHit.localPositionError().xx() << ", " << recHit.localPositionError().yy() << ")" << std::endl; + auto globalPosition = det->toGlobal(recHit.localPosition()); + double gx = globalPosition.x() - bs.x0(); + double gy = globalPosition.y() - bs.y0(); + double gz = globalPosition.z() - bs.z0(); + //std::cout << "Global (x, y, z) --> (" << globalPosition.x() << ", " << globalPosition.y() << ", " << globalPosition.z() << ")" << std::endl; + //std::cout << "Corrected Global (x, y, z) --> (" << gx << ", " << gy << ", " << gz << ")" << std::endl; + // std::cout << gx << std::endl; + stripHitsHost.view()[idx].xGlobal() = gx; + stripHitsHost.view()[idx].yGlobal() = gy; + stripHitsHost.view()[idx].zGlobal() = gz; + stripHitsHost.view()[idx].rGlobal() = sqrt(gx * gx + gy * gy); + stripHitsHost.view()[idx].iphi() = unsafe_atan2s<7>(gy, gx); + stripHitsHost.view()[idx].chargeAndStatus().charge = 0; + stripHitsHost.view()[idx].chargeAndStatus().status = {0, 0, 0, 0, 0}; + stripHitsHost.view()[idx].clusterSizeX() = -1; + stripHitsHost.view()[idx].clusterSizeY() = -1; + stripHitsHost.view()[idx].detectorIndex() = modulesInPixel_ + offset; + } + } + } + } + stripHitsModuleView[orderedModules_.size()].moduleStart() = + cumulativeHitPerModule[orderedModules_.size() - 1] + nPixelHits; + + //std::cout << "DONE" << std::endl; +#if 0 + int current = 0; + for (int h = 0; h < stripHitsHost.view().metadata().size(); ++h) { + auto idx = stripHitsHost.view()[h].detectorIndex(); + std::cout << h << " detectorIndexInSoA: " << idx << std::endl; + assert(idx>=current); + current = idx; + } + for (int h = 0; h < stripHitsModuleView.metadata().size(); ++h) { + std::cout << h << " -> " << stripHitsModuleView[h].moduleStart() << std::endl; + } +#endif + + auto moduleStartView = cms::alpakatools::make_host_view(stripHitsModuleView.moduleStart(), + stripHitsModuleView.metadata().size()); + HMSstorage moduleStartVec(stripHitsModuleView.metadata().size()); + + // Put in the event the hit module start vector. + // Now, this could be avoided having the Host Hit SoA + // consumed by the downstream module (converters to legacy formats). + // But this is the common practice at the moment + // also for legacy data formats. + alpaka::memcpy(queue, moduleStartVec, moduleStartView); + iEvent.emplace(hitModuleStart_, std::move(moduleStartVec)); + + Hits stripHitsDevice(queue, stripHitsHost.view().metadata().size(), stripHitsHost.nModules()); + alpaka::memcpy(queue, stripHitsDevice.buffer(), stripHitsHost.buffer()); + stripHitsDevice.updateFromDevice(queue); + + // Would be useful to have a way to prompt a special CopyToDevice for EDProducers + iEvent.emplace(stripSoADevice_, std::move(stripHitsDevice)); + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(Phase2OTRecHitsSoAConverter); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc new file mode 100644 index 0000000000000..08b413339a502 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc @@ -0,0 +1,253 @@ +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/BeamSpot/interface/alpaka/BeamSpotDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" + +#include "PixelRecHitKernel.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelRecHitExtendedAlpaka : public global::EDProducer<> { + public: + explicit SiPixelRecHitExtendedAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitExtendedAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + void produce(edm::StreamID streamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + + const device::EDGetToken pixelRecHitToken_; + const device::EDGetToken trackerRecHitToken_; + + const device::EDPutToken outputRecHitsSoAToken_; + }; + + SiPixelRecHitExtendedAlpaka::SiPixelRecHitExtendedAlpaka(const edm::ParameterSet& iConfig) + : EDProducer(iConfig), + pixelRecHitToken_(consumes(iConfig.getParameter("pixelRecHitsSoA"))), + trackerRecHitToken_(consumes(iConfig.getParameter("trackerRecHitsSoA"))), + outputRecHitsSoAToken_(produces()) {} + + void SiPixelRecHitExtendedAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("pixelRecHitsSoA", edm::InputTag("hltPhase2SiPixelRecHitsSoA")); + desc.add("trackerRecHitsSoA", edm::InputTag("phase2OTRecHitsSoAConverter")); + + descriptions.addWithDefaultLabel(desc); + } + + void SiPixelRecHitExtendedAlpaka::produce(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& es) const { + // get both Pixel and Tracker recHits + auto queue = iEvent.queue(); + const auto& pixelRecHitsSoA = iEvent.get(pixelRecHitToken_); + const auto& otRecHitsSoA = iEvent.get(trackerRecHitToken_); + //std::cout << "----------------- Merging Pixel and Tracker RecHits -----------------" << std::endl; + const int nPixelHits = pixelRecHitsSoA.nHits(); + //std::cout << "Number of Pixel recHits: " << nPixelHits << std::endl; + const int nTrackerHits = otRecHitsSoA.nHits(); + //std::cout << "Number of Tracker recHits: " << nTrackerHits << std::endl; + const int nTotHits = nPixelHits + nTrackerHits; + //std::cout << "Number of Pixel modules: " << pixelRecHitsSoA.nModules() << std::endl; + //std::cout << "Number of Tracker modules: " << otRecHitsSoA.nModules() << std::endl; + const int nTotModules = pixelRecHitsSoA.nModules() + otRecHitsSoA.nModules(); + + auto outputSoA = reco::TrackingRecHitsSoACollection(queue, nTotHits, nTotModules); + //std::cout << "Total number of recHits: " << outputSoA.nHits() << std::endl; + + // copy all columns from pixelRecHitsSoA and otRecHitsSoA to outputSoA + // xLocal + auto xLocalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().xLocal(), nPixelHits); + auto xLocalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().xLocal() + nPixelHits, nTrackerHits); + auto xLocalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().xLocal(), nPixelHits); + auto xLocalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().xLocal(), nTrackerHits); + alpaka::memcpy(queue, xLocalOutputPixel, xLocalPixel); + alpaka::memcpy(queue, xLocalOutputTracker, xLocalTracker); + + // yLocal + auto yLocalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().yLocal(), nPixelHits); + auto yLocalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().yLocal() + nPixelHits, nTrackerHits); + auto yLocalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().yLocal(), nPixelHits); + auto yLocalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().yLocal(), nTrackerHits); + alpaka::memcpy(queue, yLocalOutputPixel, yLocalPixel); + alpaka::memcpy(queue, yLocalOutputTracker, yLocalTracker); + + // xerrLocal + auto xerrLocalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().xerrLocal(), nPixelHits); + auto xerrLocalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().xerrLocal() + nPixelHits, nTrackerHits); + auto xerrLocalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().xerrLocal(), nPixelHits); + auto xerrLocalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().xerrLocal(), nTrackerHits); + alpaka::memcpy(queue, xerrLocalOutputPixel, xerrLocalPixel); + alpaka::memcpy(queue, xerrLocalOutputTracker, xerrLocalTracker); + + // yerrLocal + auto yerrLocalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().yerrLocal(), nPixelHits); + auto yerrLocalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().yerrLocal() + nPixelHits, nTrackerHits); + auto yerrLocalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().yerrLocal(), nPixelHits); + auto yerrLocalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().yerrLocal(), nTrackerHits); + alpaka::memcpy(queue, yerrLocalOutputPixel, yerrLocalPixel); + alpaka::memcpy(queue, yerrLocalOutputTracker, yerrLocalTracker); + + // xGlobal + auto xGlobalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().xGlobal(), nPixelHits); + auto xGlobalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().xGlobal() + nPixelHits, nTrackerHits); + auto xGlobalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().xGlobal(), nPixelHits); + auto xGlobalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().xGlobal(), nTrackerHits); + alpaka::memcpy(queue, xGlobalOutputPixel, xGlobalPixel); + alpaka::memcpy(queue, xGlobalOutputTracker, xGlobalTracker); + + // yGlobal + auto yGlobalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().yGlobal(), nPixelHits); + auto yGlobalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().yGlobal() + nPixelHits, nTrackerHits); + auto yGlobalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().yGlobal(), nPixelHits); + auto yGlobalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().yGlobal(), nTrackerHits); + alpaka::memcpy(queue, yGlobalOutputPixel, yGlobalPixel); + alpaka::memcpy(queue, yGlobalOutputTracker, yGlobalTracker); + + // zGlobal + auto zGlobalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().zGlobal(), nPixelHits); + auto zGlobalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().zGlobal() + nPixelHits, nTrackerHits); + auto zGlobalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().zGlobal(), nPixelHits); + auto zGlobalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().zGlobal(), nTrackerHits); + alpaka::memcpy(queue, zGlobalOutputPixel, zGlobalPixel); + alpaka::memcpy(queue, zGlobalOutputTracker, zGlobalTracker); + + // rGlobal + auto rGlobalOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().rGlobal(), nPixelHits); + auto rGlobalOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().rGlobal() + nPixelHits, nTrackerHits); + auto rGlobalPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().rGlobal(), nPixelHits); + auto rGlobalTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().rGlobal(), nTrackerHits); + alpaka::memcpy(queue, rGlobalOutputPixel, rGlobalPixel); + alpaka::memcpy(queue, rGlobalOutputTracker, rGlobalTracker); + + // iphi + auto iphiOutputPixel = cms::alpakatools::make_device_view(queue, outputSoA.view().iphi(), nPixelHits); + auto iphiOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().iphi() + nPixelHits, nTrackerHits); + auto iphiPixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().iphi(), nPixelHits); + auto iphiTracker = cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().iphi(), nTrackerHits); + alpaka::memcpy(queue, iphiOutputPixel, iphiPixel); + alpaka::memcpy(queue, iphiOutputTracker, iphiTracker); + + // chargeAndStatus + auto chargeAndStatusOutputPixel = + cms::alpakatools::make_device_view(queue, outputSoA.view().chargeAndStatus(), nPixelHits); + auto chargeAndStatusOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().chargeAndStatus() + nPixelHits, nTrackerHits); + auto chargeAndStatusPixel = + cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().chargeAndStatus(), nPixelHits); + auto chargeAndStatusTracker = + cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().chargeAndStatus(), nTrackerHits); + alpaka::memcpy(queue, chargeAndStatusOutputPixel, chargeAndStatusPixel); + alpaka::memcpy(queue, chargeAndStatusOutputTracker, chargeAndStatusTracker); + + // clusterSizeX + auto clusterSizeXOutputPixel = + cms::alpakatools::make_device_view(queue, outputSoA.view().clusterSizeX(), nPixelHits); + auto clusterSizeXOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().clusterSizeX() + nPixelHits, nTrackerHits); + auto clusterSizeXPixel = + cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().clusterSizeX(), nPixelHits); + auto clusterSizeXTracker = + cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().clusterSizeX(), nTrackerHits); + alpaka::memcpy(queue, clusterSizeXOutputPixel, clusterSizeXPixel); + alpaka::memcpy(queue, clusterSizeXOutputTracker, clusterSizeXTracker); + + // clusterSizeY + auto clusterSizeYOutputPixel = + cms::alpakatools::make_device_view(queue, outputSoA.view().clusterSizeY(), nPixelHits); + auto clusterSizeYOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().clusterSizeY() + nPixelHits, nTrackerHits); + auto clusterSizeYPixel = + cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().clusterSizeY(), nPixelHits); + auto clusterSizeYTracker = + cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().clusterSizeY(), nTrackerHits); + alpaka::memcpy(queue, clusterSizeYOutputPixel, clusterSizeYPixel); + alpaka::memcpy(queue, clusterSizeYOutputTracker, clusterSizeYTracker); + + // detectorIndex + auto detectorIndexOutputPixel = + cms::alpakatools::make_device_view(queue, outputSoA.view().detectorIndex(), nPixelHits); + auto detectorIndexOutputTracker = + cms::alpakatools::make_device_view(queue, outputSoA.view().detectorIndex() + nPixelHits, nTrackerHits); + auto detectorIndexPixel = + cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().detectorIndex(), nPixelHits); + auto detectorIndexTracker = + cms::alpakatools::make_device_view(queue, otRecHitsSoA.view().detectorIndex(), nTrackerHits); + alpaka::memcpy(queue, detectorIndexOutputPixel, detectorIndexPixel); + alpaka::memcpy(queue, detectorIndexOutputTracker, detectorIndexTracker); + + auto offsetBPIX2Output = cms::alpakatools::make_device_view(queue, outputSoA.view().offsetBPIX2()); + auto offsetBPIX2Pixel = cms::alpakatools::make_device_view(queue, pixelRecHitsSoA.view().offsetBPIX2()); + alpaka::memcpy(queue, offsetBPIX2Output, offsetBPIX2Pixel); + + // copy the moduleStart from pixelRecHitsSoA and otRecHitsSoA to outputSoA + const int nPixelModules = pixelRecHitsSoA.nModules(); + const int nTrackerModules = otRecHitsSoA.nModules() + 1; + // size of the copy nPixelModules + nTrackerModules + 1 to account for the last "hidden" + // element of the SoA, keeping track of the cumulative sum of all the hits in the previous + // modules (thus one more than the number of modules is required to account for the hits + // in the last tracker module) see also DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h + + auto hitModuleStartOutputPixel = + cms::alpakatools::make_device_view(queue, outputSoA.view<::reco::HitModuleSoA>().moduleStart(), nPixelModules); + auto hitModuleStartOutputTracker = cms::alpakatools::make_device_view( + queue, outputSoA.view<::reco::HitModuleSoA>().moduleStart() + nPixelModules, nTrackerModules); + + const auto hitModuleStartPixel = cms::alpakatools::make_device_view( + queue, pixelRecHitsSoA.view<::reco::HitModuleSoA>().moduleStart(), nPixelModules); + const auto hitModuleStartTracker = cms::alpakatools::make_device_view( + queue, otRecHitsSoA.view<::reco::HitModuleSoA>().moduleStart(), nTrackerModules); + + alpaka::memcpy(queue, hitModuleStartOutputPixel, hitModuleStartPixel); + alpaka::memcpy(queue, hitModuleStartOutputTracker, hitModuleStartTracker); + + outputSoA.updateFromDevice(queue); + // emplace the merged SoA in the event + iEvent.emplace(outputRecHitsSoAToken_, std::move(outputSoA)); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitExtendedAlpaka); \ No newline at end of file diff --git a/RecoTracker/PixelSeeding/interface/CACoupleDevice.h b/RecoTracker/PixelSeeding/interface/CACoupleDevice.h new file mode 100644 index 0000000000000..88f46f427615c --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CACoupleDevice.h @@ -0,0 +1,17 @@ +#ifndef RecoTracker_PixelSeeding_interface_CACoupleDevice_H +#define RecoTracker_PixelSeeding_interface_CACoupleDevice_H + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace caStructures { + template + using CACoupleDevice = PortableDeviceCollection; +} + +#endif // RecoTracker_PixelSeeding_interface_CACoupleDevice_H diff --git a/RecoTracker/PixelSeeding/interface/CACoupleHost.h b/RecoTracker/PixelSeeding/interface/CACoupleHost.h new file mode 100644 index 0000000000000..1d8a3c20affd1 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CACoupleHost.h @@ -0,0 +1,15 @@ +#ifndef RecoTracker_PixelSeeding_interface_CACoupleHost_h +#define RecoTracker_PixelSeeding_interface_CACoupleHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace caStructures { + using CACoupleHost = PortableHostCollection; +} +#endif // RecoTracker_PixelSeeding_interface_CACoupleHost_h diff --git a/RecoTracker/PixelSeeding/interface/CACoupleSoA.h b/RecoTracker/PixelSeeding/interface/CACoupleSoA.h new file mode 100644 index 0000000000000..fd7f20dcd76cd --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CACoupleSoA.h @@ -0,0 +1,20 @@ +#ifndef RecoTracker_PixelSeeding_interface_CACoupleSoA_h +#define RecoTracker_PixelSeeding_interface_CACoupleSoA_h + +#include + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +namespace caStructures { + + GENERATE_SOA_LAYOUT(CACoupleLayout, SOA_COLUMN(uint32_t, inner), SOA_COLUMN(uint32_t, outer)) + + using CACoupleSoA = CACoupleLayout<>; + using CACoupleSoAView = CACoupleSoA::View; + using CACoupleSoAConstView = CACoupleSoA::ConstView; + +} // namespace caStructures + +#endif // RecoTracker_PixelSeeding_interface_CACoupleSoA_h diff --git a/RecoTracker/PixelSeeding/interface/alpaka/CACoupleSoACollection.h b/RecoTracker/PixelSeeding/interface/alpaka/CACoupleSoACollection.h new file mode 100644 index 0000000000000..d368fb136c302 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/alpaka/CACoupleSoACollection.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_PixelSeeding_interface_CACoupleSoACollection_h +#define RecoTracker_PixelSeeding_interface_CACoupleSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleDevice.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleHost.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using ::caStructures::CACoupleDevice; + using ::caStructures::CACoupleHost; + using CACoupleSoACollection = + std::conditional_t, CACoupleHost, CACoupleDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(CACoupleSoACollection, ::caStructures::CACoupleHost); + +#endif // RecoTracker_PixelSeeding_interface_CACoupleSoACollection_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc index 60317b3da3fd6..df93a309f9ba4 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -1,6 +1,6 @@ -// #define BROKENLINE_DEBUG -// #define BL_DUMP_HITS -// #define GPU_DEBUG +// MRMR #define BROKENLINE_DEBUG // MRMR +// MRMR #define BL_DUMP_HITS // MRMR +// MRMR #define GPU_DEBUG // MRMR #include #include @@ -136,8 +136,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #ifdef BL_DUMP_HITS bool dump = foundNtuplets->size(tkid) == 5; + bool dump = foundNtuplets->size(tkid) >= 4; if (dump) { - printf("Track id %d %d Hit %d on %d\nGlobal: hits.col(%d) << %f,%f,%f\n", + printf("Track local_id %d tkid: %d Hit %d on det: %d\nGlobal: hits.col(%d) << x: %f,y: %f, r(%f),z: %f\n", local_idx, tkid, hit, @@ -145,7 +146,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { i, hh[hit].xGlobal(), hh[hit].yGlobal(), + sqrt(hh[hit].xGlobal()*hh[hit].xGlobal()+hh[hit].yGlobal()*hh[hit].yGlobal()), hh[hit].zGlobal()); + printf("Local Error(%d): x2: %e, x[um]: %e, y2: %e, y[um]: %e\n", i, hh[hit].xerrLocal(), 1.e4*sqrt(hh[hit].xerrLocal()), hh[hit].yerrLocal(), 1.e4*sqrt(hh[hit].yerrLocal())); + printf("Error: hits_ge.col(%d) x[um]: %e, y[um]: %e, z[um]: %e\n", i, 1.e4*sqrt(ge[0]), 1.e4*sqrt(ge[2]), 1.e4*sqrt(ge[5])); printf("Error: hits_ge.col(%d) << %e,%e,%e,%e,%e,%e\n", i, ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]); } #endif @@ -154,6 +158,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; } brokenline::fastFit(acc, hits, fast_fit); +#if 0 + printf("Fast Fit: %f, %f, %f, %f\n", fast_fit(0), fast_fit(1), fast_fit(2), fast_fit(3)); +#endif #ifdef BROKENLINE_DEBUG // any NaN value should cause the track to be rejected at a later stage @@ -218,7 +225,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #ifdef BROKENLINE_DEBUG if (!(circle.chi2 >= 0) || !(line.chi2 >= 0)) printf("kernelBLFit failed! %f/%f\n", circle.chi2, line.chi2); - printf("kernelBLFit size %d for %d hits circle.par(0,1,2): %d %f,%f,%f\n", + printf("kernelBLFit size %d for %d hits of tkid %d circle.par(0,1,2): %f,%f,%f\n", N, N, tkid, @@ -226,7 +233,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { circle.par(1), circle.par(2)); printf("kernelBLHits line.par(0,1): %d %f,%f\n", tkid, line.par(0), line.par(1)); - printf("kernelBLHits chi2 cov %f/%f %e,%e,%e,%e,%e\n", + printf("kernelBLHits chi2_circle: %f chi2_line: %f, cov(0-3)_circle: %e, %e, %e cov(1-2)_line %e,%e\n", circle.chi2, line.chi2, circle.cov(0, 0), @@ -418,6 +425,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class HelixFit; template class HelixFit; + template class HelixFit; template class HelixFit; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h index 3c4fcf506e139..eabbc8a8d0a36 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h @@ -1,9 +1,7 @@ #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CACell_h #define RecoTracker_PixelSeeding_plugins_alpaka_CACell_h -// #define GPU_DEBUG -// #define CA_DEBUG -// #define CA_WARNINGS +// #define ONLY_TRIPLETS_IN_HOLE #include #include @@ -19,137 +17,157 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "RecoTracker/PixelSeeding/interface/CircleEq.h" #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" -#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" #include "CAStructures.h" +#include "CASimpleCell.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace ::caStructures; template - class CACell { + class CACellT : public CASimpleCell { public: - ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(const HitsConstView& hh, + using typename CASimpleCell::StatusBit; + using typename CASimpleCell::Quality; + using typename CASimpleCell::HitContainer; + using typename CASimpleCell::TmpTuple; + + using PtrAsInt = unsigned long long; + + using OuterHitOfCellContainer = OuterHitOfCellContainerT; + using OuterHitOfCell = OuterHitOfCellT; + using CellNeighbors = CellNeighborsT; + using CellTracks = CellTracksT; + using CellNeighborsVector = CellNeighborsVectorT; + using CellTracksVector = CellTracksVectorT; + + CACellT() = default; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(CellNeighborsVector& cellNeighbors, + CellTracksVector& cellTracks, + const HitsConstView& hh, int layerPairId, uint8_t theInnerLayer, uint8_t theOuterLayer, hindex_type innerHitId, hindex_type outerHitId) { - theInnerHitId_ = innerHitId; - theOuterHitId_ = outerHitId; - theLayerPairId_ = layerPairId; - theInnerLayer_ = theInnerLayer; - theOuterLayer_ = theOuterLayer; - theStatus_ = 0; - theFishboneId_ = invalidHitId; + this->theInnerHitId = innerHitId; + this->theOuterHitId = outerHitId; + this->theLayerPairId_ = layerPairId; + this->theInnerLayer_ = theInnerLayer; + this->theOuterLayer_ = theOuterLayer; + this->theStatus_ = 0; + this->theFishboneId = this->invalidHitId; // optimization that depends on access pattern - theInnerZ_ = hh[innerHitId].zGlobal(); - theInnerR_ = hh[innerHitId].rGlobal(); + this->theInnerZ = hh[innerHitId].zGlobal(); + this->theInnerR = hh[innerHitId].rGlobal(); + + // link to default empty + theOuterNeighbors = &cellNeighbors[0]; + theTracks = &cellTracks[0]; + ALPAKA_ASSERT_ACC(outerNeighbors().empty()); + ALPAKA_ASSERT_ACC(tracks().empty()); } - using hindex_type = ::caStructures::hindex_type; - using tindex_type = ::caStructures::tindex_type; - - static constexpr auto invalidHitId = std::numeric_limits::max(); - - using TmpTuple = cms::alpakatools::VecArray; - using HitContainer = caStructures::SequentialContainer; - using CellToCell = caStructures::GenericContainer; - using CellToTracks = caStructures::GenericContainer; - using CAPairSoAView = caStructures::CAPairSoAView; - - using Quality = ::pixelTrack::Quality; - static constexpr auto bad = ::pixelTrack::Quality::bad; - - enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 }; - - CACell() = default; - - constexpr unsigned int inner_hit_id() const { return theInnerHitId_; } - constexpr unsigned int outer_hit_id() const { return theOuterHitId_; } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t innerLayer() const { return theInnerLayer_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t outerLayer() const { return theOuterLayer_; } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId_].xGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId_].xGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId_].yGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId_].yGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId_].zGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId_].rGlobal(); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId_].iphi(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId_].iphi(); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_detIndex(const HitsConstView& hh) const { - return hh[theInnerHitId_].detectorIndex(); - } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_detIndex(const HitsConstView& hh) const { - return hh[theOuterHitId_].detectorIndex(); + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addOuterNeighbor( + const TAcc& acc, typename TrackerTraits::cindex_type t, CellNeighborsVector& cellNeighbors) { + // use smart cache + if (outerNeighbors().empty()) { + auto i = cellNeighbors.extend(acc); // maybe wasted.... + if (i > 0) { + cellNeighbors[i].reset(); + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + theOuterNeighbors = &cellNeighbors[i]; +#else + auto zero = (PtrAsInt)(&cellNeighbors[0]); + alpaka::atomicCas(acc, + (PtrAsInt*)(&theOuterNeighbors), + zero, + (PtrAsInt)(&cellNeighbors[i]), + alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... +#endif + } else + return -1; + } + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); + return outerNeighbors().push_back(acc, t); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId_ != invalidHitId; } - - ALPAKA_FN_ACC void print_cell() const { - printf("printing cell: on layerPair: %d, innerLayer: %d, outerLayer: %d, innerHitId: %d, outerHitId: %d \n", - theLayerPairId_, - theInnerLayer_, - theOuterLayer_, - theInnerHitId_, - theOuterHitId_); - } + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addTrack(TAcc const& acc, + tindex_type t, + CellTracksVector& cellTracks) { + if (tracks().empty()) { + auto i = cellTracks.extend(acc); // maybe wasted.... + if (i > 0) { + cellTracks[i].reset(); + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); +#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + theTracks = &cellTracks[i]; +#else + auto zero = (PtrAsInt)(&cellTracks[0]); + alpaka::atomicCas(acc, + (PtrAsInt*)(&theTracks), + zero, + (PtrAsInt)(&cellTracks[i]), + alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... - ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(Acc2D const& acc, hindex_type id, float z, const HitsConstView& hh) { - // make it deterministic: use the farther apart (in z) - auto old = theFishboneId_; - while ( - old != - alpaka::atomicCas( - acc, - &theFishboneId_, - old, - (invalidHitId == old || std::abs(z - theInnerZ_) > std::abs(hh[old].zGlobal() - theInnerZ_)) ? id : old, - alpaka::hierarchy::Blocks{})) - old = theFishboneId_; +#endif + } else + return -1; + } + alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); + return tracks().push_back(acc, t); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE static bool areAlignedRZ( - float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { - float radius_diff = std::abs(r1 - ro); - float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); - - float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by - // radius_diff later - - float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri)); - return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff; + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks& tracks() { return *theTracks; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks const& tracks() const { return *theTracks; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } + + ALPAKA_FN_ACC bool check_alignment(const HitsConstView& hh, + CACellT const& otherCell, + const float ptmin, + const float hardCurvCut, + const float caThetaCutBarrel, + const float caThetaCutForward, + const float dcaCutInnerTriplet, + const float dcaCutOuterTriplet) const { + // detIndex of the layerStart for the Phase1 Pixel Detector: + // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] + // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] + auto ri = this->inner_r(hh); + auto zi = this->inner_z(hh); + + auto ro = this->outer_r(hh); + auto zo = this->outer_z(hh); + + auto r1 = otherCell.inner_r(hh); + auto z1 = otherCell.inner_z(hh); + auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex; + // TODO tune CA cuts below (theta and dca) + bool aligned = areAlignedRZ(r1, z1, ri, zi, ro, zo, ptmin, isBarrel ? caThetaCutBarrel : caThetaCutForward); + return (aligned && dcaCut(hh, + otherCell, + otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet + : dcaCutOuterTriplet, + hardCurvCut)); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool dcaCut(const HitsConstView& hh, - CACell const& otherCell, - const float region_origin_radius_plus_tolerance, - const float maxCurv) const { - auto x1 = otherCell.inner_x(hh); - auto y1 = otherCell.inner_y(hh); - - auto x2 = inner_x(hh); - auto y2 = inner_y(hh); - - auto x3 = outer_x(hh); - auto y3 = outer_y(hh); - + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool dcaCutH( + float x1, + float y1, + float x2, + float y2, + float x3, + float y3, + const float region_origin_radius_plus_tolerance, + const float maxCurv) { CircleEq eq(x1, y1, x2, y2, x3, y3); if (std::abs(eq.curvature()) > maxCurv) @@ -158,116 +176,121 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); } +#ifdef ONLY_TRIPLETS_IN_HOLE + + // These functions have never been used in production + // They need an AverageGeometry to be filled + // Commenting for the moment since they are the only reason we + // fill the AverageGeometry and attach to the hit SoA + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole0(const HitsConstView& hh, + AverageGeometryConstView& ag, + CACellT const& innerCell) const { + using namespace phase1PixelTopology; + + int p = innerCell.inner_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx0 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx0; + auto il = first_ladder_bpx0 + p; + auto r0 = ag[il].ladderR(); + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = this->outer_r(hh); + auto zo = this->outer_z(hh); + auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z0 - ag[il].ladderZ()); + auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0); + auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0); + return gap; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole4(const HitsConstView& hh, CACellT const& innerCell) const { + using namespace phase1PixelTopology; + + int p = this->outer_iphi(hh); + if (p < 0) + p += std::numeric_limits::max(); + p = (max_ladder_bpx4 * p) / std::numeric_limits::max(); + p %= max_ladder_bpx4; + auto il = first_ladder_bpx4 + p; + auto r4 = ag[il].ladderR(); + auto ri = innerCell.inner_r(hh); + auto zi = innerCell.inner_z(hh); + auto ro = this->outer_r(hh); + auto zo = this->outer_z(hh); + auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri); + auto z_in_ladder = std::abs(z4 - ag[il].ladderZ()); + auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4); + auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4); + auto holeP = z4 > ag[il].ladderMaxZ() && z4 < ag[0].endCapZ(); + auto holeN = z4 < ag[il].ladderMinZ() && z4 > ag[1].endCapZ(); + return gap || holeP || holeN; + } +#endif + // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. - - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(Acc1D const& acc, - const ::reco::CAGraphSoAConstView& cc, - CACell* __restrict__ cells, + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(TAcc const& acc, + const HitsConstView& hh, + CACellT* __restrict__ cells, + CellTracksVector& cellTracks, HitContainer& foundNtuplets, - CellToCell const* __restrict__ cellNeighborsHisto, - CellToTracks* cellTracksHisto, - uint32_t* nCellTracks, - CAPairSoAView ct, cms::alpakatools::AtomicPairCounter& apc, Quality* __restrict__ quality, TmpTuple& tmpNtuplet, - const unsigned int minHitsPerNtuplet) const { + const unsigned int minHitsPerNtuplet, + bool startAt0) const { // the building process for a track ends if: // it has no right neighbor // it has no compatible neighbor // the ntuplets is then saved if the number of hits it contains is greater // than a threshold + if constexpr (DEPTH <= 0) { - printf("ERROR: CACell::find_ntuplets reached full depth!\n"); + printf("ERROR: CACellT::find_ntuplets reached full depth!\n"); ALPAKA_ASSERT_ACC(false); } else { auto doubletId = this - cells; - tmpNtuplet.push_back_unsafe(doubletId); // if we move this to be safe we could parallelize further below? + tmpNtuplet.push_back_unsafe(doubletId); ALPAKA_ASSERT_ACC(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); bool last = true; - auto const* __restrict__ bin = cellNeighborsHisto->begin(doubletId); - auto nInBin = cellNeighborsHisto->size(doubletId); - - for (auto idx = 0u; idx < nInBin; idx++) { - // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough? - unsigned int otherCell = bin[idx]; + for (unsigned int otherCell : outerNeighbors()) { if (cells[otherCell].isKilled()) - continue; -#ifdef CA_DEBUG - printf("Doublet no. %d %d doubletId: %ld -> %d (isKilled %d) (%d,%d) -> (%d,%d) %d %d\n", - tmpNtuplet.size(), - idx, - doubletId, - otherCell, - cells[otherCell].isKilled(), - this->inner_hit_id(), - this->outer_hit_id(), - cells[otherCell].inner_hit_id(), - cells[otherCell].outer_hit_id(), - idx, - nInBin); -#endif - + continue; // killed by earlyFishbone last = false; - cells[otherCell].template find_ntuplets(acc, - cc, - cells, - foundNtuplets, - cellNeighborsHisto, - cellTracksHisto, - nCellTracks, - ct, - apc, - quality, - tmpNtuplet, - minHitsPerNtuplet); + cells[otherCell].template find_ntuplets( + acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); } if (last) { // if long enough save... if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { +#ifdef ONLY_TRIPLETS_IN_HOLE + // triplets accepted only pointing to the hole + if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || + ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) +#endif { hindex_type hits[TrackerTraits::maxDepth + 2]; auto nh = 0U; constexpr int maxFB = 2; // for the time being let's limit this int nfb = 0; for (auto c : tmpNtuplet) { - hits[nh++] = cells[c].theInnerHitId_; + hits[nh++] = cells[c].theInnerHitId; if (nfb < maxFB && cells[c].hasFishbone()) { ++nfb; - hits[nh++] = cells[c].theFishboneId_; // Fishbone hit is always outer than inner hit + hits[nh++] = cells[c].theFishboneId; // Fishbone hit is always outer than inner hit } } ALPAKA_ASSERT_ACC(nh < TrackerTraits::maxHitsOnTrack); - hits[nh] = theOuterHitId_; + hits[nh] = this->theOuterHitId; auto it = foundNtuplets.bulkFill(acc, apc, hits, nh + 1); -#ifdef CA_DEBUG - printf("track n. %d nhits %d with cells: ", it, nh + 1); -#endif if (it >= 0) { // if negative is overflow.... - for (auto c : tmpNtuplet) { -#ifdef CA_DEBUG - printf("%d - ", c); -#endif - auto t_ind = alpaka::atomicAdd(acc, nCellTracks, 1u, alpaka::hierarchy::Blocks{}); - - if (t_ind >= uint32_t(ct.metadata().size())) { -#ifdef CA_WARNINGS - printf("Warning!!!! Too many cell->tracks associations (limit = %d)!\n", ct.metadata().size()); -#endif - alpaka::atomicSub(acc, nCellTracks, 1u, alpaka::hierarchy::Blocks{}); - break; - } - cellTracksHisto->count(acc, c); - - ct[t_ind].inner() = c; //cell - ct[t_ind].outer() = it; //track - } -#ifdef CA_DEBUG - printf("\n"); -#endif - quality[it] = bad; // initialize to bad + for (auto c : tmpNtuplet) + cells[c].addTrack(acc, it, cellTracks); + quality[it] = this->bad; // initialize to bad } } } @@ -278,16 +301,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } private: - int16_t theLayerPairId_; - uint8_t theInnerLayer_; - uint8_t theOuterLayer_; - uint16_t theStatus_; // tbd - - float theInnerZ_; - float theInnerR_; - hindex_type theInnerHitId_; - hindex_type theOuterHitId_; - hindex_type theFishboneId_; + CellNeighbors* theOuterNeighbors; + CellTracks* theTracks; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h index 96924f76932ec..9f590b1c81cc5 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h @@ -27,9 +27,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { template class CAFishbone { public: - ALPAKA_FN_ACC void operator()(Acc2D const& acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, HitsConstView hh, - CACell* cells, + CASimpleCell* cells, uint32_t const* __restrict__ nCells, HitToCell const* __restrict__ outerHitHisto, CellToTracks const* __restrict__ cellTracksHisto, @@ -38,6 +39,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid) for (uint32_t idy : cms::alpakatools::uniform_elements_y(acc, outerHits)) { uint32_t size = outerHitHisto->size(idy); + // printf("fishbone ---> outersize %d - ",idy,size); if (size < 2) continue; @@ -50,11 +52,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto xo = c0.outer_x(hh); auto yo = c0.outer_y(hh); auto zo = c0.outer_z(hh); - + //printf("first cell %d xo %.2f yo %.2f zo %.2f - ",bin[0],c0.outer_x(hh),c0.outer_y(hh),c0.outer_z(hh));ve + + // #ifdef GPU_DEBUG + // for (auto idx = 0u; idx < size; idx++) { + // unsigned int otherCell = bin[idx]; + // printf("vc[0] %d idx %d vc[idx] %d otherCell %d \n",vc[0],idx,vc[idx],otherCell); + // } + // #endif for (uint32_t ic : cms::alpakatools::independent_group_elements_x(acc, size)) { + //printf("cell0 = %d ci = %d\n",bin[0],bin[ic]); unsigned int otherCell = bin[ic]; auto& ci = cells[otherCell]; - + // printf("xo = %.2f yo = %.2f zo = %.2f xi = %.2f yi = %.2f zi = %.2f \n",xo,yo,zo,ci.inner_x(hh),ci.inner_y(hh),ci.inner_z(hh)); if (ci.unused()) continue; // for triplets equivalent to next if (checkTrack && cellTracksHisto->size(otherCell) == 0) diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc index 59a8c19a0390f..a9763033dfa2f 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc @@ -35,6 +35,7 @@ #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "DataFormats/SiStripDetId/interface/StripSubdetector.h" // #define GPU_DEBUG @@ -44,8 +45,8 @@ namespace reco { CAGeometryParams(edm::ParameterSet const& iConfig) : caThetaCuts_(iConfig.getParameter>("caThetaCuts")), caDCACuts_(iConfig.getParameter>("caDCACuts")), - pairGraph_(iConfig.getParameter>("pairGraph")), - startingPairs_(iConfig.getParameter>("startingPairs")), + pairGraph_(iConfig.getParameter>("pairGraph")), + startingPairs_(iConfig.getParameter>("startingPairs")), phiCuts_(iConfig.getParameter>("phiCuts")), minZ_(iConfig.getParameter>("minZ")), maxZ_(iConfig.getParameter>("maxZ")), @@ -56,8 +57,8 @@ namespace reco { const std::vector caDCACuts_; // Cells params - const std::vector pairGraph_; - const std::vector startingPairs_; + const std::vector pairGraph_; + const std::vector startingPairs_; const std::vector phiCuts_; const std::vector minZ_; const std::vector maxZ_; @@ -73,7 +74,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class CAHitNtupletAlpaka - : public stream::EDProducer, + : public stream::EDProducer, edm::RunCache>> { using HitsConstView = ::reco::TrackingRecHitConstView; using HitsOnDevice = reco::TrackingRecHitsSoACollection; @@ -81,9 +82,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using TkSoAHost = ::reco::TracksHost; using TkSoADevice = reco::TracksSoACollection; - + using Algo = CAHitNtupletGenerator; - + using CAGeometryCache = cms::alpakatools::MoveToDeviceCache; using Rotation = SOARotation; using Frame = SOAFrame; @@ -118,7 +119,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { std::cout << "No. Layers to be used = " << n_layers << std::endl; std::cout << "No. Pairs to be used = " << n_pairs << std::endl; #endif - + assert(int(n_pairs) == int(iCache->minZ_.size())); assert(int(*std::max_element(iCache->startingPairs_.begin(), iCache->startingPairs_.end())) <= n_pairs); assert(int(*std::max_element(iCache->pairGraph_.begin(), iCache->pairGraph_.end())) < n_layers); @@ -147,31 +148,94 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // of modules each layer has. And we need the extra spot // at the end to hold the total number of modules. - for (auto& det : dets) { - DetId detid = det->geographicalId(); -#ifdef GPU_DEBUG - if (n_modules >= int(subSystemOffset)) { - subSystemName = GeomDetEnumerators::tkDetEnum[++subSystem]; - subSystemOffset = trackerGeometry.offsetDU(subSystemName); - std::cout << " ===================== Subsystem: " << subSystemName << std::endl; + std::vector moduleToindexInDets; + + auto isPinPSinOTBarrel = [&](DetId detId) { + // std::cout << (int)trackerGeometry->getDetectorType(detId) << " " << (trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PSP) << "\n"; + // std::cout << (int)detId.subdetId() << " " << (detId.subdetId() == StripSubdetector::TOB) << std::endl; + // Select only P-hits from the OT barrel + return (trackerGeometry.getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PSP && + detId.subdetId() == StripSubdetector::TOB); + }; + auto isPh2Pixel = [&](DetId detId) { + return (trackerGeometry.getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXB || + trackerGeometry.getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXB3D || + trackerGeometry.getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXF || + trackerGeometry.getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PXF3D); + }; + if constexpr (std::is_same_v) { + int counter = 0; + for (auto& det : dets) { + DetId detid = det->geographicalId(); + auto layer = trackerTopology.layer(detid); +// std::cout << "Looping on " << detid.rawId() << " on layer " << layer << std::endl; + // Logic: + // - if we are not inside pixels, we need to ignore anything **but** the OT. + // - for the time being, this is assuming that the CA extension will + // only cover the OT barrel part, and will ignore the OT forward. + if (isPh2Pixel(detid)) { +// std::cout << "Good Pixel" << std::endl; + if (layer != oldLayer) { + //std::cout << "Pixel LayerStart: " << layerCount << " at layer " << layer << " has " << n_modules << " modules." << std::endl; + layerStarts[layerCount++] = n_modules; + if (layerCount > n_layers + 1) + break; + oldLayer = layer; + } + moduleToindexInDets.push_back(counter); + n_modules++; + } else { + auto const& detUnits = det->components(); + for (auto& detUnit : detUnits) { + DetId unitDetId(detUnit->geographicalId()); + if (isPinPSinOTBarrel(unitDetId)) { +// std::cout << "Good OT Barrel" << std::endl; + if (layer != oldLayer) { + //std::cout << "OT LayerStart: " << layerCount << " at layer " << layer << " has " << n_modules << " modules." << std::endl; + layerStarts[layerCount++] = n_modules; + if (layerCount > n_layers + 1) + break; + oldLayer = layer; + } + moduleToindexInDets.push_back(counter); + n_modules++; + } else { +// std::cout << "BAD OT" << std::endl; + } + } +// std::cout << "Done OT" << std::endl; + } + counter++; } + layerStarts[n_layers] = n_modules; + //std::cout << "OT LayerStart: " << n_layers << " has " << n_modules << " modules." << std::endl; + } else { + for (auto& det : dets) { + DetId detid = det->geographicalId(); +#ifdef GPU_DEBUG + if (n_modules >= int(subSystemOffset)) { + subSystemName = GeomDetEnumerators::tkDetEnum[++subSystem]; + subSystemOffset = trackerGeometry.offsetDU(subSystemName); + std::cout << " ===================== Subsystem: " << subSystemName << std::endl; + } #endif - auto layer = trackerTopology.layer(detid); + auto layer = trackerTopology.layer(detid); - if (layer != oldLayer) { - layerStarts[layerCount++] = n_modules; + if (layer != oldLayer) { + layerStarts[layerCount++] = n_modules; - if (layerCount > n_layers + 1) - break; + if (layerCount > n_layers + 1) + break; - oldLayer = layer; + oldLayer = layer; #ifdef GPU_DEBUG - std::cout << " > New layer at module : " << n_modules << " (detId: " << detid << ")" << std::endl; + std::cout << " > New layer at module : " << n_modules << " (detId: " << detid << ")" << std::endl; #endif - } + } - n_modules++; + n_modules++; + } } reco::CAGeometryHost product{{{n_layers + 1, n_pairs, n_modules}}, cms::alpakatools::host()}; @@ -180,17 +244,47 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto cellSoA = product.view<::reco::CAGraphSoA>(); auto modulesSoA = product.view<::reco::CAModulesSoA>(); - for (int i = 0; i < n_modules; ++i) { - auto det = dets[i]; - auto vv = det->surface().position(); - auto rr = Rotation(det->surface().rotation()); - modulesSoA[i].detFrame() = Frame(vv.x(), vv.y(), vv.z(), rr); - } + if constexpr (std::is_same_v) { + for (int i = 0; i < n_modules; ++i) { + auto idx = moduleToindexInDets[i]; + auto det = dets[idx]; +#ifdef GPU_DEBUG + auto const& detUnits = det->components(); + for (auto& detUnit : detUnits) { + DetId unitDetId(detUnit->geographicalId()); + if (isPinPSinOTBarrel(unitDetId)) { + std::cout << "Filling frame at index " << idx << " in SoA position " << i << " for det " << det->geographicalId() << " and detUnit->index: " << detUnit->index() << std::endl; + } + } + std::cout << "Filling frame at index " << idx << " in SoA position " << i << " for det " << det->geographicalId() << std::endl; +#endif + auto vv = det->surface().position(); + auto rr = Rotation(det->surface().rotation()); + modulesSoA[i].detFrame() = Frame(vv.x(), vv.y(), vv.z(), rr); +#ifdef GPU_DEBUG + std::cout << "Position: " << vv << " with Rotation: " << det->surface().rotation() << std::endl; + std::cout << "Rotation in Z-r plane: " << atan2(det->surface().normalVector().perp(),det->surface().normalVector().z())*180./M_PI << std::endl; +#endif + } - for (int i = 0; i < n_layers; ++i) { - layerSoA.layerStarts()[i] = layerStarts[i]; - layerSoA.caThetaCut()[i] = iCache->caThetaCuts_[i]; - layerSoA.caDCACut()[i] = iCache->caDCACuts_[i]; + for (int i = 0; i < n_layers; ++i) { + layerSoA.layerStarts()[i] = layerStarts[i]; + layerSoA.caThetaCut()[i] = iCache->caThetaCuts_[i]; + layerSoA.caDCACut()[i] = iCache->caDCACuts_[i]; + } + } else { + for (int i = 0; i < n_modules; ++i) { + auto det = dets[i]; + auto vv = det->surface().position(); + auto rr = Rotation(det->surface().rotation()); + modulesSoA[i].detFrame() = Frame(vv.x(), vv.y(), vv.z(), rr); + } + + for (int i = 0; i < n_layers; ++i) { + layerSoA.layerStarts()[i] = layerStarts[i]; + layerSoA.caThetaCut()[i] = iCache->caThetaCuts_[i]; + layerSoA.caDCACut()[i] = iCache->caDCACuts_[i]; + } } layerSoA.layerStarts()[n_layers] = layerStarts[n_layers]; @@ -204,7 +298,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { cellSoA.startingPair()[i] = false; } - for (const unsigned int& i : iCache->startingPairs_) + for (const int& i : iCache->startingPairs_) cellSoA.startingPair()[i] = true; return std::make_shared(std::move(product)); @@ -219,9 +313,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const device::EDGetToken tokenHit_; const device::EDPutToken tokenTrack_; - const ::reco::FormulaEvaluator maxNumberOfDoublets_; - const ::reco::FormulaEvaluator maxNumberOfTuples_; - + const TFormula maxNumberOfDoublets_; + const TFormula maxNumberOfTuples_; Algo deviceAlgo_; }; @@ -232,8 +325,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tokenField_(esConsumes()), tokenHit_(consumes(iConfig.getParameter("pixelRecHitSrc"))), tokenTrack_(produces()), - maxNumberOfDoublets_(iConfig.getParameter("maxNumberOfDoublets")), - maxNumberOfTuples_(iConfig.getParameter("maxNumberOfTuples")), + maxNumberOfDoublets_( + TFormula("doubletsHitsDependecy", iConfig.getParameter("maxNumberOfDoublets").data())), + maxNumberOfTuples_( + TFormula("tracksHitsDependency", iConfig.getParameter("maxNumberOfTuples").data())), deviceAlgo_(iConfig) { iCache->tokenGeometry_ = esConsumes(); iCache->tokenTopology_ = esConsumes(); @@ -256,11 +351,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const& geometry = runCache()->get(iEvent.queue()); auto const& hits = iEvent.get(tokenHit_); - std::array nHitsV = {{double(hits.nHits())}}; - std::array emptyV; - - uint32_t const maxTuples = maxNumberOfTuples_.evaluate(nHitsV, emptyV); - uint32_t const maxDoublets = maxNumberOfDoublets_.evaluate(nHitsV, emptyV); + uint32_t const maxTuples = maxNumberOfTuples_.Eval(hits.nHits()); + uint32_t const maxDoublets = maxNumberOfDoublets_.Eval(hits.nHits()); iEvent.emplace(tokenTrack_, deviceAlgo_.makeTuplesAsync(hits, geometry, bf, maxDoublets, maxTuples, iEvent.queue())); @@ -269,6 +361,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using CAHitNtupletAlpakaPhase1 = CAHitNtupletAlpaka; using CAHitNtupletAlpakaHIonPhase1 = CAHitNtupletAlpaka; using CAHitNtupletAlpakaPhase2 = CAHitNtupletAlpaka; + using CAHitNtupletAlpakaPhase2OT = CAHitNtupletAlpaka; } // namespace ALPAKA_ACCELERATOR_NAMESPACE #include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" @@ -276,3 +369,4 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaPhase1); DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaHIonPhase1); DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaPhase2); +DEFINE_FWK_ALPAKA_MODULE(CAHitNtupletAlpakaPhase2OT); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc index 4e205ba95f6b3..9d8017c248ba7 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc @@ -251,16 +251,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { .add>("caThetaCuts", std::vector(std::begin(thetaCuts), std::begin(thetaCuts) + numberOfLayers)) ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); - geometryParams.add>("startingPairs", {0u, 1u, 2u}) + geometryParams.add>("startingPairs", {0u, 1u, 2u}) ->setComment( "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression // cells params geometryParams - .add>( + .add>( "pairGraph", - std::vector(std::begin(layerPairs), + std::vector(std::begin(layerPairs), std::begin(layerPairs) + (pixelTopology::Phase1::nPairsForQuadruplets * 2))) - ->setComment("CA graph"); + ->setComment("CA graph"); geometryParams .add>( "phiCuts", @@ -325,14 +325,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { std::vector(std::begin(phase1HIonPixelTopology::thetaCuts), std::begin(phase1HIonPixelTopology::thetaCuts) + numberOfLayers)) ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); - geometryParams.add>("startingPairs", {0u, 1u, 2u}) + geometryParams.add>("startingPairs", {0u, 1u, 2u}) ->setComment( "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression // cells params geometryParams - .add>( + .add>( "pairGraph", - std::vector(std::begin(layerPairs), + std::vector(std::begin(layerPairs), std::begin(layerPairs) + (pixelTopology::Phase1::nPairsForQuadruplets * 2))) ->setComment("CA graph"); geometryParams @@ -389,15 +389,67 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { std::vector(std::begin(thetaCuts), std::begin(thetaCuts) + numberOfLayers)) ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); geometryParams - .add>("startingPairs", + .add>("startingPairs", {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}) ->setComment( "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression // cells params geometryParams - .add>( - "pairGraph", std::vector(std::begin(layerPairs), std::begin(layerPairs) + (nPairs * 2))) + .add>( + "pairGraph", std::vector(std::begin(layerPairs), std::begin(layerPairs) + (nPairs * 2))) + ->setComment("CA graph"); + geometryParams + .add>("phiCuts", std::vector(std::begin(phicuts), std::begin(phicuts) + nPairs)) + ->setComment("Cuts in phi for cells"); + geometryParams.add>("minZ", std::vector(std::begin(minz), std::begin(minz) + nPairs)) + ->setComment("Cuts in min z (on inner hit) for cells"); + geometryParams.add>("maxZ", std::vector(std::begin(maxz), std::begin(maxz) + nPairs)) + ->setComment("Cuts in max z (on inner hit) for cells"); + geometryParams.add>("maxR", std::vector(std::begin(maxr), std::begin(maxr) + nPairs)) + ->setComment("Cuts in max r for cells"); + + desc.add("geometry", geometryParams) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); + } + + // TODO: fill me properly + template <> + void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { + fillDescriptionsCommon(desc); + + edm::ParameterSetDescription trackQualityCuts; + trackQualityCuts.add("maxChi2", 5.)->setComment("Max normalized chi2"); + trackQualityCuts.add("minPt", 0.5)->setComment("Min pT in GeV"); + trackQualityCuts.add("maxTip", 0.3)->setComment("Max |Tip| in cm"); + trackQualityCuts.add("maxZip", 12.)->setComment("Max |Zip|, in cm"); + desc.add("trackQualityCuts", trackQualityCuts) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); + + edm::ParameterSetDescription geometryParams; + using namespace phase2PixelTopology; + // layers params + geometryParams + .add>("caDCACuts", + std::vector(std::begin(dcaCuts), std::begin(dcaCuts) + numberOfLayers)) + ->setComment("Cut on RZ alignement. One per layer, the layer being the middle one for a triplet."); + geometryParams + .add>("caThetaCuts", + std::vector(std::begin(thetaCuts), std::begin(thetaCuts) + numberOfLayers)) + ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); + geometryParams + .add>("startingPairs", {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}) + ->setComment( + "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression + // cells params + geometryParams + .add>("pairGraph", + std::vector(std::begin(layerPairs), std::begin(layerPairs) + (nPairs * 2))) ->setComment("CA graph"); geometryParams .add>("phiCuts", std::vector(std::begin(phicuts), std::begin(phicuts) + nPairs)) @@ -482,5 +534,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class CAHitNtupletGenerator; template class CAHitNtupletGenerator; + template class CAHitNtupletGenerator; template class CAHitNtupletGenerator; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index c7558009fe5b2..12e6138486abd 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -17,9 +17,9 @@ #include "CAHitNtupletGeneratorKernels.h" #include "CAHitNtupletGeneratorKernelsImpl.h" -// #define GPU_DEBUG -// #define NTUPLE_DEBUG -// #define CA_STATS +//#define GPU_DEBUG +//#define NTUPLE_DEBUG +//#define CA_STATS namespace ALPAKA_ACCELERATOR_NAMESPACE { @@ -31,152 +31,135 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t maxTuples, uint16_t nLayers, Queue &queue) - : m_params(params) { - ////////////////////////////////////////////////////////// - // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) - ////////////////////////////////////////////////////////// - - counters_ = cms::alpakatools::make_device_buffer(queue); - // Here we define the OneToMany maps and the histograms - // allocating the buffers and defining the views. - // For each map/histo, we need: - // - a buffer for the offsets sized as the number of ones + 1 - // (with the last bin holding the total number of ones) - // - a buffer fot the content/storage itself sized as the number of many - - auto const &algoParams = m_params.algoParams_; - int outerHits = - nHits - offsetBPIX2; // the number of hits that may be used as outer hits for a cell (so not on bpix1) - - // These hold the max number of associations needed - int nHitsToTracks = std::max(int(maxTuples * algoParams.avgHitsPerTrack_), 1); - int nHitsToCells = std::max(int(outerHits * algoParams.avgCellsPerHit_), 1); - int nCellsToCells = std::max(int(maxDoublets * algoParams.avgCellsPerCell_), 1); - int nCellsToTracks = std::max(int(maxDoublets * algoParams.avgTracksPerCell_), 1); - + : m_params(params), + ////////////////////////////////////////////////////////// + // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) + ////////////////////////////////////////////////////////// + counters_{cms::alpakatools::make_device_buffer(queue)}, + + // One to Many Maps + // Hits -> Track + device_hitToTuple_{cms::alpakatools::make_device_buffer(queue)}, + device_hitToTupleStorage_{cms::alpakatools::make_device_buffer( + queue, int(maxTuples * m_params.algoParams_.avgHitsPerTrack_) + 1)}, + device_hitToTupleOffsets_{cms::alpakatools::make_device_buffer(queue, nHits + 1)}, + device_hitToTupleView_{device_hitToTuple_.data(), + device_hitToTupleOffsets_.data(), + device_hitToTupleStorage_.data(), + int(nHits + 1), + int(maxTuples * m_params.algoParams_.avgHitsPerTrack_) + 1}, + + // (Outer) Hits-> Cells + device_hitToCell_{cms::alpakatools::make_device_buffer(queue)}, + device_hitToCellStorage_{cms::alpakatools::make_device_buffer( + queue, int((nHits - offsetBPIX2) * m_params.algoParams_.avgCellsPerHit_) + 1)}, + device_hitToCellOffsets_{ + cms::alpakatools::make_device_buffer(queue, nHits - offsetBPIX2 + 1)}, + device_hitToCellView_{device_hitToCell_.data(), + device_hitToCellOffsets_.data(), + device_hitToCellStorage_.data(), + int(nHits - offsetBPIX2 + 1), + int((nHits - offsetBPIX2) * m_params.algoParams_.avgCellsPerHit_) + 1}, + + // Hits + device_hitPhiHist_{cms::alpakatools::make_device_buffer(queue)}, + device_phiBinnerStorage_{cms::alpakatools::make_device_buffer(queue, nHits)}, + device_hitPhiView_{device_hitPhiHist_.data(), nullptr, device_phiBinnerStorage_.data(), -1, int(nHits)}, + device_layerStarts_{cms::alpakatools::make_device_buffer(queue, nLayers + 1)}, + + // Cell -> Neighbor Cells + device_cellToNeighbors_{cms::alpakatools::make_device_buffer(queue)}, + device_cellToNeighborsStorage_{cms::alpakatools::make_device_buffer( + queue, int(maxDoublets * m_params.algoParams_.avgCellsPerCell_) + 1)}, + device_cellToNeighborsOffsets_{ + cms::alpakatools::make_device_buffer(queue, maxDoublets + 1)}, + device_cellToNeighborsView_{device_cellToNeighbors_.data(), + device_cellToNeighborsOffsets_.data(), + device_cellToNeighborsStorage_.data(), + int(maxDoublets + 1), + int(maxDoublets * m_params.algoParams_.avgCellsPerCell_)}, + + // Cell -> Tracks + device_cellToTracks_{cms::alpakatools::make_device_buffer(queue)}, + device_cellToTracksStorage_{cms::alpakatools::make_device_buffer( + queue, int(maxDoublets * m_params.algoParams_.avgTracksPerCell_) + 1)}, + device_cellToTracksOffsets_{ + cms::alpakatools::make_device_buffer(queue, maxDoublets + 1)}, + device_cellToTracksView_{device_cellToTracks_.data(), + device_cellToTracksOffsets_.data(), + device_cellToTracksStorage_.data(), + int(maxDoublets + 1), + int(maxDoublets * m_params.algoParams_.avgTracksPerCell_) + 1}, + + // Tracks -> Hits + device_hitContainer_{cms::alpakatools::make_device_buffer(queue)}, + device_hitContainerStorage_{cms::alpakatools::make_device_buffer( + queue, int(m_params.algoParams_.avgHitsPerTrack_ * maxTuples) + 1)}, + device_hitContainerOffsets_{ + cms::alpakatools::make_device_buffer(queue, maxTuples + 1)}, + device_hitContainerView_{device_hitContainer_.data(), + device_hitContainerOffsets_.data(), + device_hitContainerStorage_.data(), + int(maxTuples + 1), + int(m_params.algoParams_.avgHitsPerTrack_ * maxTuples) + 1}, + + // No.Hits -> Track (Multiplicity) + device_tupleMultiplicity_{cms::alpakatools::make_device_buffer(queue)}, + device_tupleMultiplicityStorage_{ + cms::alpakatools::make_device_buffer(queue, maxTuples)}, + device_tupleMultiplicityOffsets_{ + cms::alpakatools::make_device_buffer(queue, TrackerTraits::maxHitsOnTrack + 1)}, + device_tupleMultiplicityView_{device_tupleMultiplicity_.data(), + device_tupleMultiplicityOffsets_.data(), + device_tupleMultiplicityStorage_.data(), + int(TrackerTraits::maxHitsOnTrack + 1), + int(maxTuples)}, + + // Structures and Counters Storage + device_simpleCells_{cms::alpakatools::make_device_buffer(queue, maxDoublets)}, + device_extraStorage_{ + cms::alpakatools::make_device_buffer(queue, 5u)}, + device_hitTuple_apc_{reinterpret_cast(device_extraStorage_.data())}, + device_nCells_{ + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_.data() + 2))}, + device_nTriplets_{ + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_.data() + 3))}, + device_nCellTracks_{ + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_.data() + 4))}, + deviceTriplets_{CACoupleSoACollection(maxDoublets * m_params.algoParams_.avgCellsPerCell_, queue)}, + deviceTracksCells_{ + CACoupleSoACollection(int(maxDoublets * m_params.algoParams_.avgTracksPerCell_) + 1, queue)} { #ifdef GPU_DEBUG - std::cout << "Allocation for tuple building with: " << std::endl; - std::cout << "- nHits = " << nHits << std::endl; - std::cout << "- maxDoublets = " << maxTuples << std::endl; - std::cout << "- maxTracks = " << maxDoublets << std::endl; - - std::cout << "- nCellsToCells = " << nCellsToCells << std::endl; - std::cout << "- nHitsToCells = " << nHitsToCells << std::endl; - std::cout << "- nCellsToTracks = " << nCellsToTracks << std::endl; - std::cout << "- nHitsToTracks = " << nHitsToTracks << std::endl; + std::cout << "Allocation for tuple building. N hits " << nHits << std::endl; + std::cout << "maxTrips = " << int(maxDoublets * m_params.algoParams_.avgCellsPerCell_) + 1 << std::endl; + std::cout << "maxDoublets = " << maxDoublets << std::endl; + std::cout << "maxTrackCell = " << int(maxDoublets * m_params.algoParams_.avgTracksPerCell_) + 1 << std::endl; #endif - // Hits -> Track - device_hitToTuple_ = cms::alpakatools::make_device_buffer(queue); - device_hitToTupleStorage_ = cms::alpakatools::make_device_buffer(queue, nHitsToTracks); - device_hitToTupleOffsets_ = cms::alpakatools::make_device_buffer(queue, nHits + 1); - device_hitToTupleView_ = {device_hitToTuple_->data(), - device_hitToTupleOffsets_->data(), - device_hitToTupleStorage_->data(), - int(nHits + 1), - nHitsToTracks}; + //TODO: if doStats? + alpaka::memset(queue, counters_, 0); + alpaka::memset(queue, device_nCells_, 0); + alpaka::memset(queue, device_nTriplets_, 0); + alpaka::memset(queue, device_nCellTracks_, 0); + + // Hits -> Track HitToTuple::template launchZero(device_hitToTupleView_, queue); // (Outer) Hits-> Cells - device_hitToCell_ = cms::alpakatools::make_device_buffer(queue); - device_hitToCellStorage_ = cms::alpakatools::make_device_buffer(queue, nHitsToCells); - device_hitToCellOffsets_ = cms::alpakatools::make_device_buffer(queue, outerHits + 1); - device_hitToCellView_ = {device_hitToCell_->data(), - device_hitToCellOffsets_->data(), - device_hitToCellStorage_->data(), - outerHits + 1, - nHitsToCells}; - HitToCell::template launchZero(device_hitToCellView_, queue); - // Hits Phi Histograms: one histogram per layer - device_hitPhiHist_ = cms::alpakatools::make_device_buffer(queue); - device_phiBinnerStorage_ = cms::alpakatools::make_device_buffer(queue, nHits); - device_hitPhiView_ = {device_hitPhiHist_->data(), nullptr, device_phiBinnerStorage_->data(), -1, int(nHits)}; - // This will hold where each layer starts in the hit soa - device_layerStarts_ = cms::alpakatools::make_device_buffer(queue, nLayers + 1); - - // Cell -> Neighbor Cells - device_cellToNeighbors_ = cms::alpakatools::make_device_buffer(queue); - device_cellToNeighborsStorage_ = - cms::alpakatools::make_device_buffer(queue, nCellsToCells); - device_cellToNeighborsOffsets_ = - cms::alpakatools::make_device_buffer(queue, maxDoublets + 1); - device_cellToNeighborsView_ = {device_cellToNeighbors_->data(), - device_cellToNeighborsOffsets_->data(), - device_cellToNeighborsStorage_->data(), - int(maxDoublets + 1), - nCellsToCells}; - + // Cells-> Neighbor Cells CellToCell::template launchZero(device_cellToNeighborsView_, queue); - // Cell -> Tracks - device_cellToTracks_ = cms::alpakatools::make_device_buffer(queue); - device_cellToTracksStorage_ = - cms::alpakatools::make_device_buffer(queue, nCellsToTracks); - device_cellToTracksOffsets_ = - cms::alpakatools::make_device_buffer(queue, maxDoublets + 1); - device_cellToTracksView_ = {device_cellToTracks_->data(), - device_cellToTracksOffsets_->data(), - device_cellToTracksStorage_->data(), - int(maxDoublets + 1), - nCellsToTracks}; - + // Cells-> Neighbor Cells CellToTrack::template launchZero(device_cellToTracksView_, queue); - // Track -> Hits - // - This is a OneToManyAssocSequential since each bin is filled - // in one go: all the hits forming a track are pushed together. - device_hitContainer_ = cms::alpakatools::make_device_buffer(queue); - device_hitContainerStorage_ = - cms::alpakatools::make_device_buffer(queue, nHitsToTracks); - device_hitContainerOffsets_ = - cms::alpakatools::make_device_buffer(queue, maxTuples + 1); - device_hitContainerView_ = {device_hitContainer_->data(), - device_hitContainerOffsets_->data(), - device_hitContainerStorage_->data(), - int(maxTuples + 1), - nHitsToTracks}; - - HitContainer::template launchZero(device_hitContainerView_, queue); - - // No.Hits -> Track (track multiplicity) - device_tupleMultiplicity_ = cms::alpakatools::make_device_buffer(queue); - device_tupleMultiplicityStorage_ = - cms::alpakatools::make_device_buffer(queue, maxTuples); - device_tupleMultiplicityOffsets_ = - cms::alpakatools::make_device_buffer(queue, TrackerTraits::maxHitsOnTrack + 1); - device_tupleMultiplicityView_ = { - device_tupleMultiplicity_->data(), - device_tupleMultiplicityOffsets_->data(), - device_tupleMultiplicityStorage_->data(), - int(TrackerTraits::maxHitsOnTrack + 1), //TODO: this could become configurable with some work - int(maxTuples)}; + // Tracks -> Hits TupleMultiplicity::template launchZero(device_tupleMultiplicityView_, queue); - // Structures and Counters Storage - device_simpleCells_ = cms::alpakatools::make_device_buffer(queue, maxDoublets); - - device_extraStorage_ = - cms::alpakatools::make_device_buffer(queue, 5u); - device_hitTuple_apc_ = reinterpret_cast(device_extraStorage_->data()); - device_nCells_ = - cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 2)); - device_nTriplets_ = - cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 3)); - device_nCellTracks_ = - cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 4)); - - deviceTriplets_ = CAPairSoACollection(maxDoublets * algoParams.avgCellsPerCell_, queue); - deviceTracksCells_ = CAPairSoACollection(nCellsToTracks, queue); - - //TODO: if doStats? - alpaka::memset(queue, *counters_, 0); - - alpaka::memset(queue, *device_nCells_, 0); - alpaka::memset(queue, *device_nTriplets_, 0); - alpaka::memset(queue, *device_nCellTracks_, 0); + // No.Hits -> Track (Multiplicity) + HitContainer::template launchZero(device_hitContainerView_, queue); maxNumberOfDoublets_ = maxDoublets; @@ -194,13 +177,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace caHitNtupletGeneratorKernels; const auto workDiv1D = cms::alpakatools::make_workdiv(1, ll.metadata().size() - 1); - alpaka::exec(queue, workDiv1D, SetHitsLayerStart{}, mm, ll, this->device_layerStarts_->data()); + alpaka::exec(queue, workDiv1D, SetHitsLayerStart{}, mm, ll, this->device_layerStarts_.data()); - cms::alpakatools::fillManyFromVector(device_hitPhiHist_->data(), + cms::alpakatools::fillManyFromVector(device_hitPhiHist_.data(), device_hitPhiView_, TrackerTraits::numberOfLayers, // could be ll.metadata().size() - 1 hh.iphi(), - this->device_layerStarts_->data(), + this->device_layerStarts_.data(), hh.metadata().size(), (uint32_t)256, queue); @@ -255,12 +238,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { this->device_hitTuple_apc_, // needed only to be reset, ready for next kernel hh, ll, - this->deviceTriplets_->view(), - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_nTriplets_->data(), - this->device_hitToCell_->data(), - this->device_cellToNeighbors_->data(), + this->deviceTriplets_.view(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_nTriplets_.data(), + this->device_hitToCell_.data(), + this->device_cellToNeighbors_.data(), this->m_params.algoParams_); CellToCell::template launchFinalize(this->device_cellToNeighborsView_, queue); @@ -276,10 +259,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, - Kernel_fillGenericPair{}, - this->deviceTriplets_->view(), - this->device_nTriplets_->data(), - this->device_cellToNeighbors_->data()); + Kernel_fillGenericCouple{}, + this->deviceTriplets_.view(), + this->device_nTriplets_.data(), + this->device_cellToNeighbors_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); @@ -299,10 +282,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { fishboneWorkDiv, CAFishbone{}, hh, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_hitToCell_->data(), - this->device_cellToTracks_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_hitToCell_.data(), + this->device_cellToTracks_.data(), nhits - offsetBPIX2, false); #ifdef GPU_DEBUG @@ -318,14 +301,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_find_ntuplets{}, cc, tracks_view, - this->device_hitContainer_->data(), - this->device_cellToNeighbors_->data(), - this->device_cellToTracks_->data(), - this->deviceTracksCells_->view(), - this->device_simpleCells_->data(), - this->device_nCellTracks_->data(), - this->device_nTriplets_->data(), - this->device_nCells_->data(), + this->device_hitContainer_.data(), + this->device_cellToNeighbors_.data(), + this->device_cellToTracks_.data(), + this->deviceTracksCells_.view(), + this->device_simpleCells_.data(), + this->device_nCellTracks_.data(), + this->device_nTriplets_.data(), + this->device_nCells_.data(), this->device_hitTuple_apc_, this->m_params.algoParams_); @@ -341,18 +324,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, - Kernel_fillGenericPair{}, - this->deviceTracksCells_->view(), - this->device_nCellTracks_->data(), - this->device_cellToTracks_->data()); + Kernel_fillGenericCouple{}, + this->deviceTracksCells_.view(), + this->device_nCellTracks_.data(), + this->device_cellToTracks_.data()); if (this->m_params.algoParams_.doStats_) alpaka::exec(queue, workDiv1D, Kernel_mark_used{}, - this->device_simpleCells_->data(), - this->device_cellToTracks_->data(), - this->device_nCells_->data()); + this->device_simpleCells_.data(), + this->device_cellToTracks_.data(), + this->device_nCells_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); @@ -366,7 +349,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, typename HitContainer::finalizeBulk{}, this->device_hitTuple_apc_, - this->device_hitContainer_->data()); + this->device_hitContainer_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); @@ -377,7 +360,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_fillHitDetIndices{}, tracks_view, tracks_hits_view, - this->device_hitContainer_->data(), + this->device_hitContainer_.data(), hh); #ifdef GPU_DEBUG @@ -389,7 +372,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_fillNLayers{}, tracks_view, tracks_hits_view, - this->device_layerStarts_->data(), + this->device_layerStarts_.data(), nLayers, this->device_hitTuple_apc_); @@ -405,9 +388,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, Kernel_earlyDuplicateRemover{}, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_cellToTracks_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_cellToTracks_.data(), tracks_view, this->m_params.algoParams_.dupPassThrough_); #ifdef GPU_DEBUG @@ -423,8 +406,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_countMultiplicity{}, tracks_view, - this->device_hitContainer_->data(), - this->device_tupleMultiplicity_->data()); + this->device_hitContainer_.data(), + this->device_tupleMultiplicity_.data()); GenericContainer::template launchFinalize(this->device_tupleMultiplicityView_, queue); #ifdef GPU_DEBUG @@ -437,8 +420,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_fillMultiplicity{}, tracks_view, - this->device_hitContainer_->data(), - this->device_tupleMultiplicity_->data()); + this->device_hitContainer_.data(), + this->device_tupleMultiplicity_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_fillMultiplicity -> done!" << std::endl; @@ -457,10 +440,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv2D, CAFishbone{}, hh, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_hitToCell_->data(), - this->device_cellToTracks_->data(), + // this->device_theCells_.data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + // this->isOuterHitOfCell_.data(), + this->device_hitToCell_.data(), + this->device_cellToTracks_.data(), nhits - offsetBPIX2, true); } @@ -478,7 +463,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t offsetBPIX2, Queue &queue) { using namespace caPixelDoublets; - using namespace caHitNtupletGeneratorKernels; auto nhits = hh.metadata().size(); const auto maxDoublets = this->maxNumberOfDoublets_; @@ -508,14 +492,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv2D, GetDoubletsFromHisto{}, maxDoublets, - this->device_simpleCells_->data(), - this->device_nCells_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), hh, cc, ll, - this->device_layerStarts_->data(), - this->device_hitPhiHist_->data(), - this->device_hitToCell_->data(), + this->device_layerStarts_.data(), + this->device_hitPhiHist_.data(), + this->device_hitToCell_.data(), this->m_params.algoParams_); HitToCell::template launchFinalize(this->device_hitToCellView_, queue); @@ -532,10 +516,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, FillDoubletsHisto{}, - this->device_simpleCells_->data(), - this->device_nCells_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), offsetBPIX2, - this->device_hitToCell_->data()); + this->device_hitToCell_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); @@ -566,7 +550,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_classifyTracks{}, tracks_view, - this->device_hitContainer_->data(), + this->device_hitContainer_.data(), this->m_params.qualityCuts_); #ifdef GPU_DEBUG alpaka::wait(queue); @@ -580,9 +564,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, Kernel_fishboneCleaner{}, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_cellToTracks_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_cellToTracks_.data(), tracks_view); } #ifdef GPU_DEBUG @@ -595,9 +579,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, Kernel_fastDuplicateRemover{}, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_cellToTracks_->data(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_cellToTracks_.data(), tracks_view, this->m_params.algoParams_.dupPassThrough_); #ifdef GPU_DEBUG @@ -613,16 +597,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_countHitInTracks{}, tracks_view, - this->device_hitContainer_->data(), - this->device_hitToTuple_->data()); + this->device_hitContainer_.data(), + this->device_hitToTuple_.data()); GenericContainer::template launchFinalize(this->device_hitToTupleView_, queue); alpaka::exec(queue, workDiv1D, Kernel_fillHitInTracks{}, tracks_view, - this->device_hitContainer_->data(), - this->device_hitToTuple_->data()); + this->device_hitContainer_.data(), + this->device_hitToTuple_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_countHitInTracks -> done!" << std::endl; @@ -638,7 +622,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_rejectDuplicate{}, tracks_view, this->m_params.algoParams_.dupPassThrough_, - this->device_hitToTuple_->data()); + this->device_hitToTuple_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_rejectDuplicate -> done!" << std::endl; @@ -648,11 +632,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_sharedHitCleaner{}, hh, - this->device_layerStarts_->data(), + this->device_layerStarts_.data(), tracks_view, this->m_params.algoParams_.minHitsForSharingCut_, this->m_params.algoParams_.dupPassThrough_, - this->device_hitToTuple_->data()); + this->device_hitToTuple_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_sharedHitCleaner -> done!" << std::endl; @@ -666,7 +650,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_simpleTripletCleaner{}, tracks_view, this->m_params.algoParams_.dupPassThrough_, - this->device_hitToTuple_->data()); + this->device_hitToTuple_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_simpleTripletCleaner -> done!" << std::endl; @@ -680,7 +664,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_tripletCleaner{}, tracks_view, this->m_params.algoParams_.dupPassThrough_, - this->device_hitToTuple_->data()); + this->device_hitToTuple_.data()); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Kernel_tripletCleaner -> done!" << std::endl; @@ -696,20 +680,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_checkOverflows{}, tracks_view, - this->device_hitContainer_->data(), - this->device_tupleMultiplicity_->data(), - this->device_hitToTuple_->data(), + this->device_hitContainer_.data(), + this->device_tupleMultiplicity_.data(), + this->device_hitToTuple_.data(), this->device_hitTuple_apc_, - this->device_simpleCells_->data(), - this->device_nCells_->data(), - this->device_nTriplets_->data(), - this->device_nCellTracks_->data(), - this->deviceTriplets_->view(), - this->deviceTracksCells_->view(), + this->device_simpleCells_.data(), + this->device_nCells_.data(), + this->device_nTriplets_.data(), + this->device_nCellTracks_.data(), + this->deviceTriplets_.view(), + this->deviceTracksCells_.view(), nhits, this->maxNumberOfDoublets_, this->m_params.algoParams_, - this->counters_->data()); + this->counters_.data()); } #ifdef CA_STATS @@ -720,9 +704,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_printSizes{}, hh, tracks_view, - this->device_nCells_->data(), - this->device_nTriplets_->data(), - this->device_nCellTracks_->data()); + this->device_nCells_.data(), + this->device_nTriplets_.data(), + this->device_nCellTracks_.data()); alpaka::wait(queue); #endif @@ -735,8 +719,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, Kernel_doStatsForHitInTracks{}, - this->device_hitToTuple_->data(), - this->counters_->data()); + this->device_hitToTuple_.data(), + this->counters_.data()); numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); @@ -744,12 +728,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv1D, Kernel_doStatsForTracks{}, tracks_view, - this->device_hitContainer_->data(), - this->counters_->data()); + this->device_hitContainer_.data(), + this->counters_.data()); + + workDiv1D = cms::alpakatools::make_workdiv(1, 1); + alpaka::exec(queue, workDiv1D, Kernel_printCounters{}, this->counters_.data()); + alpaka::wait(queue); - auto workDiv1D = cms::alpakatools::make_workdiv(1, 1); - alpaka::exec(queue, workDiv1D, Kernel_printCounters{}, this->counters_->data()); + workDiv1D = cms::alpakatools::make_workdiv(1, 1); + alpaka::exec(queue, + workDiv1D, + Kernel_print_found_ntuplets{}, + hh, + tracks_view, + this->device_hitContainer_.data(), + this->device_hitToTuple_.data(), + 0, + 100, + 0); + alpaka::wait(queue); } + #ifdef GPU_DEBUG alpaka::wait(queue); #endif @@ -767,8 +766,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_print_found_ntuplets{}, hh, tracks_view, - this->device_hitContainer_->data(), - this->device_hitToTuple_->data(), + this->device_hitContainer_.data(), + this->device_hitToTuple_.data(), k, k + 500, iev); @@ -779,7 +778,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_print_found_ntuplets{}, hh, tracks_view, - this->device_hitToTuple_->data(), + this->device_hitToTuple_.data(), 20000, 1000000, iev); @@ -794,12 +793,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template void CAHitNtupletGeneratorKernels::printCounters() { auto workDiv1D = cms::alpakatools::make_workdiv(1,1); - alpaka::exec(queue_, workDiv1D, Kernel_printCounters{}, this->counters_->data()); + alpaka::exec(queue_, workDiv1D, Kernel_printCounters{}, this->counters_.data()); } */ template class CAHitNtupletGeneratorKernels; template class CAHitNtupletGeneratorKernels; template class CAHitNtupletGeneratorKernels; + template class CAHitNtupletGeneratorKernels; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h index 2634c1479b3f1..0c852c5f75ddc 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -17,9 +17,9 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" -#include "RecoTracker/PixelSeeding/interface/alpaka/CAPairSoACollection.h" +#include "RecoTracker/PixelSeeding/interface/alpaka/CACoupleSoACollection.h" -#include "CACell.h" +#include "CASimpleCell.h" #include "CAPixelDoublets.h" #include "CAStructures.h" @@ -101,7 +101,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: using TrackerTraits = TTTraits; - using SimpleCell = CACell; + using SimpleCell = CASimpleCell; using Params = caHitNtupletGenerator::ParamsT; using Counters = caHitNtupletGenerator::Counters; // Track qualities @@ -124,20 +124,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using GenericContainer = caStructures::GenericContainer; using GenericContainerStorage = typename GenericContainer::index_type; using GenericContainerView = typename GenericContainer::View; - using DeviceGenericContainerBuffer = std::optional>; - using DeviceGenericStorageBuffer = - std::optional>; - using DeviceGenericOffsetsBuffer = - std::optional>; + using DeviceGenericContainerBuffer = cms::alpakatools::device_buffer; + using DeviceGenericStorageBuffer = cms::alpakatools::device_buffer; + using DeviceGenericOffsetsBuffer = cms::alpakatools::device_buffer; using SequentialContainer = caStructures::SequentialContainer; using SequentialContainerStorage = typename SequentialContainer::index_type; using SequentialContainerView = typename SequentialContainer::View; - using DeviceSequentialContainerBuffer = std::optional>; - using DeviceSequentialStorageBuffer = - std::optional>; - using DeviceSequentialOffsetsBuffer = - std::optional>; + using DeviceSequentialContainerBuffer = cms::alpakatools::device_buffer; + using DeviceSequentialStorageBuffer = cms::alpakatools::device_buffer; + using DeviceSequentialOffsetsBuffer = cms::alpakatools::device_buffer; CAHitNtupletGeneratorKernels(Params const& params, uint32_t nHits, @@ -148,12 +144,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Queue& queue); ~CAHitNtupletGeneratorKernels() = default; - TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_->data(); } - HitContainer const* hitContainer() const { return device_hitContainer_->data(); } - HitToCell const* hitToCell() const { return device_hitToCell_->data(); } - HitToTuple const* hitToTuple() const { return device_hitToTuple_->data(); } - CellToCell const* cellToCell() const { return device_cellToNeighbors_->data(); } - CellToTrack const* cellToTrack() const { return device_cellToTracks_->data(); } + TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.data(); } + HitContainer const* hitContainer() const { return device_hitContainer_.data(); } + HitToCell const* hitToCell() const { return device_hitToCell_.data(); } + HitToTuple const* hitToTuple() const { return device_hitToTuple_.data(); } + CellToCell const* cellToCell() const { return device_cellToNeighbors_.data(); } + CellToTrack const* cellToTrack() const { return device_cellToTracks_.data(); } void prepareHits(const HitsConstView& hh, const HitModulesConstView& mm, @@ -182,7 +178,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: // params Params const& m_params; - std::optional> counters_; + cms::alpakatools::device_buffer counters_; // Hits->Track DeviceGenericContainerBuffer device_hitToTuple_; @@ -197,10 +193,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { GenericContainerView device_hitToCellView_; // Hits Phi Binner - std::optional> device_hitPhiHist_; - std::optional> device_phiBinnerStorage_; + cms::alpakatools::device_buffer device_hitPhiHist_; + cms::alpakatools::device_buffer device_phiBinnerStorage_; PhiBinnerView device_hitPhiView_; - std::optional> device_layerStarts_; + cms::alpakatools::device_buffer device_layerStarts_; // Cells-> Neighbor Cells DeviceGenericContainerBuffer device_cellToNeighbors_; @@ -226,17 +222,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { DeviceGenericOffsetsBuffer device_tupleMultiplicityOffsets_; GenericContainerView device_tupleMultiplicityView_; - std::optional> device_simpleCells_; - - std::optional> - device_extraStorage_; + // cms::alpakatools::device_buffer device_theCells_; + cms::alpakatools::device_buffer device_simpleCells_; + // cms::alpakatools::device_buffer device_isOuterHitOfCell_; + // cms::alpakatools::device_buffer isOuterHitOfCell_; + // cms::alpakatools::device_buffer device_theCellNeighbors_; + // cms::alpakatools::device_buffer device_theCellTracks_; + // cms::alpakatools::device_buffer cellStorage_; + // CellNeighbors* device_theCellNeighborsContainer_; + // CellTracks* device_theCellTracksContainer_; + cms::alpakatools::device_buffer device_extraStorage_; cms::alpakatools::AtomicPairCounter* device_hitTuple_apc_; - std::optional> device_nCells_; - std::optional> device_nTriplets_; - std::optional> device_nCellTracks_; + cms::alpakatools::device_view device_nCells_; + cms::alpakatools::device_view device_nTriplets_; + cms::alpakatools::device_view device_nCellTracks_; - std::optional deviceTriplets_; - std::optional deviceTracksCells_; + CACoupleSoACollection deviceTriplets_; + CACoupleSoACollection deviceTracksCells_; // this could be inferred from the above buffers // but seems cleaner to have a dedicate variable diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index ca6e3eefc83d9..faf785aa680d8 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -1,11 +1,10 @@ #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h #define RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h -// #define GPU_DEBUG -// #define NTUPLE_DEBUG -// #define CA_DEBUG -// #define CA_WARNINGS - +// MRMR #define GPU_DEBUG // MRMR +// MRMR #define NTUPLE_DEBUG // MRMR +// MRMR #define CA_DEBUG // MRMR +#define CA_WARNINGS // MRMR // C++ includes #include #include @@ -24,10 +23,10 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "FWCore/Utilities/interface/isFinite.h" -#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" // local includes -#include "CACell.h" +#include "CASimpleCell.h" #include "CAHitNtupletGeneratorKernels.h" #include "CAStructures.h" @@ -60,7 +59,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { class SetHitsLayerStart { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, const reco::HitModuleSoAConstView &mm, const reco::CALayersSoAConstView &ll, uint32_t *__restrict__ hitsLayerStart) const { @@ -83,7 +83,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { class Kernel_printSizes { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, HitsConstView hh, TkSoAView tt, uint32_t const *__restrict__ nCells, @@ -104,18 +105,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_checkOverflows { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, TupleMultiplicity const *tupleMultiplicity, HitToTuple const *hitToTuple, cms::alpakatools::AtomicPairCounter *apc, - CACell const *__restrict__ cells, + CASimpleCell const *__restrict__ cells, uint32_t const *__restrict__ nCells, uint32_t const *__restrict__ nTrips, uint32_t const *__restrict__ nCellTracks, - caStructures::CAPairSoAConstView cellCell, - caStructures::CAPairSoAConstView cellTrack, + caStructures::CACoupleSoAConstView cellCell, + caStructures::CACoupleSoAConstView cellTrack, int32_t nHits, uint32_t maxNumberOfDoublets, AlgoParams const ¶ms, @@ -184,8 +186,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fishboneCleaner { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - CACell const *cells, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CASimpleCell const *cells, uint32_t const *__restrict__ nCells, CellToTrack const *__restrict__ cellTracksHisto, TkSoAView tracks_view) const { @@ -208,8 +211,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_earlyDuplicateRemover { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - CACell const *cells, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CASimpleCell const *cells, uint32_t const *__restrict__ nCells, CellToTrack const *__restrict__ cellTracksHisto, TkSoAView tracks_view, @@ -255,8 +259,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fastDuplicateRemover { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - CACell const *__restrict__ cells, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CASimpleCell const *__restrict__ cells, uint32_t const *__restrict__ nCells, CellToTrack const *__restrict__ cellTracksHisto, TkSoAView tracks_view, @@ -347,18 +352,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_connect { public: - ALPAKA_FN_ACC void operator()(Acc2D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, cms::alpakatools::AtomicPairCounter *apc, // just to zero them HitsConstView hh, reco::CALayersSoAConstView ll, - caStructures::CAPairSoAView cn, - CACell *cells, + caStructures::CACoupleSoAView cn, + CASimpleCell *cells, uint32_t const *nCells, uint32_t *nTrips, HitToCell const *__restrict__ outerHitHisto, CellToCell *cellNeighborsHisto, AlgoParams const ¶ms) const { - using Cell = CACell; + using Cell = CASimpleCell; uint32_t maxTriplets = cn.metadata().size(); if (cms::alpakatools::once_per_grid(acc)) { @@ -399,20 +405,22 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto dcaCut = ll[oc.innerLayer()].caDCACut(); bool aligned = Cell::areAlignedRZ(r1, z1, ri, zi, ro, zo, params.ptmin_, thetaCut); if (aligned && thisCell.dcaCut(hh, oc, dcaCut, params.hardCurvCut_)) { - auto t_ind = alpaka::atomicAdd(acc, nTrips, 1u, alpaka::hierarchy::Blocks{}); + auto t_ind = alpaka::atomicAdd(acc, nTrips, (uint32_t)1, alpaka::hierarchy::Blocks{}); #ifdef CA_DEBUG - printf("Triplet no. %d %.5f %.5f (%d %d) - %d %d -> (%d, %d, %d, %d) \n", - t_ind, - thetaCut, - dcaCut, - thisCell.layerPairId(), - oc.layerPairId(), - otherCell, - cellIndex, - thisCell.inner_hit_id(), - thisCell.outer_hit_id(), - oc.inner_hit_id(), - oc.outer_hit_id()); + if (cms::alpakatools::once_per_grid(acc)) + printf("%-10s %-7s %-7s %-3s %-3s %-3s %-3s %-4s %-4s %-4s %-4s\n", "Triplet#", "Theta", "DCA", "LI", "LO", "OC", "CI", "i1", "o1", "i2", "o2"); + printf("%-10d %-7.5f %-7.5f %-3d %-3d %-3d %-3d %-4d %-4d %-4d %-4d\n", + t_ind, + thetaCut, + dcaCut, + thisCell.layerPairId(), + oc.layerPairId(), + otherCell, + cellIndex, + thisCell.inner_hit_id(), + thisCell.outer_hit_id(), + oc.inner_hit_id(), + oc.outer_hit_id()); #endif #ifdef CA_DEBUG @@ -422,8 +430,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (t_ind >= maxTriplets) { #ifdef CA_WARNINGS printf("Warning!!!! Too many cell->cell (triplets) associations (limit = %d)!\n", cn.metadata().size()); + assert(0); #endif - alpaka::atomicSub(acc, nTrips, 1u, alpaka::hierarchy::Blocks{}); + alpaka::atomicSub(acc, nTrips, (uint32_t)1, alpaka::hierarchy::Blocks{}); break; } @@ -441,26 +450,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { }; template - class FillDoubletsHisto { + class Kernel_fillGenericCouple { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - CACell const *__restrict__ cells, - uint32_t *nCells, - uint32_t offsetBPIX2, - HitToCell *outerHitHisto) const { - for (auto cellIndex : cms::alpakatools::uniform_elements(acc, *nCells)) { -#ifdef DOUBLETS_DEBUG - printf("outerHitHisto;%d;%d\n", cellIndex, cells[cellIndex].outer_hit_id()); -#endif - outerHitHisto->fill(acc, cells[cellIndex].outer_hit_id() - offsetBPIX2, cellIndex); - } - } - }; - - class Kernel_fillGenericPair { - public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - caStructures::CAPairSoAConstView cn, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + caStructures::CACoupleSoAConstView cn, uint32_t const *nElements, GenericContainer *genericHisto) const { for (uint32_t index : cms::alpakatools::uniform_elements(acc, *nElements)) { @@ -472,20 +466,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_find_ntuplets { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, const ::reco::CAGraphSoAConstView &cc, TkSoAView tracks_view, HitContainer *foundNtuplets, CellToCell const *__restrict__ cellNeighborsHisto, CellToTrack *cellTracksHisto, - caStructures::CAPairSoAView ct, - CACell *__restrict__ cells, + caStructures::CACoupleSoAView ct, + CASimpleCell *__restrict__ cells, uint32_t *nCellTracks, uint32_t const *nTriplets, uint32_t const *nCells, cms::alpakatools::AtomicPairCounter *apc, AlgoParams const ¶ms) const { - using Cell = CACell; + using Cell = CASimpleCell; #ifdef GPU_DEBUG if (cms::alpakatools::once_per_grid(acc)) @@ -519,18 +514,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { typename Cell::TmpTuple stack; stack.reset(); - thisCell.template find_ntuplets(acc, - cc, - cells, - *foundNtuplets, - cellNeighborsHisto, - cellTracksHisto, - nCellTracks, - ct, - *apc, - tracks_view.quality(), - stack, - params.minHitsPerNtuplet_); + thisCell.template find_ntuplets(acc, + cc, + cells, + *foundNtuplets, + cellNeighborsHisto, + cellTracksHisto, + nCellTracks, + ct, + *apc, + tracks_view.quality(), + stack, + params.minHitsPerNtuplet_); ALPAKA_ASSERT_ACC(stack.empty()); } } @@ -540,11 +535,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_mark_used { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, - CACell *__restrict__ cells, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, + CASimpleCell *__restrict__ cells, CellToTrack const *__restrict__ cellTracksHisto, uint32_t const *nCells) const { - using Cell = CACell; + using Cell = CASimpleCell; for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) { auto &thisCell = cells[idx]; if (cellTracksHisto->size(idx) > 0) @@ -556,7 +552,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countMultiplicity { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, TupleMultiplicity *tupleMultiplicity) const { @@ -579,7 +576,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillMultiplicity { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, TupleMultiplicity *tupleMultiplicity) const { @@ -602,7 +600,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_classifyTracks { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, QualityCuts cuts) const { @@ -649,7 +648,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_doStatsForTracks { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, Counters *counters) const { @@ -669,7 +669,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countHitInTracks { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, HitToTuple *hitToTuple) const { @@ -685,7 +686,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillHitInTracks { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, HitToTuple *hitToTuple) const { @@ -701,7 +703,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillHitDetIndices { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, TkHitSoAView track_hits_view, HitContainer const *__restrict__ foundNtuplets, @@ -728,7 +731,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillNLayers { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, TkHitSoAView track_hits_view, uint32_t const *__restrict__ layerStarts, @@ -757,7 +761,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_doStatsForHitInTracks { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, HitToTuple const *__restrict__ hitToTuple, Counters *counters) const { auto &c = *counters; @@ -774,7 +779,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countSharedHit { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, int *__restrict__ nshared, HitContainer const *__restrict__ ptuples, Quality const *__restrict__ quality, @@ -803,7 +809,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { if (foundNtuplets.size(*it) > 3) continue; - alpaka::atomicAdd(acc, &nshared[*it], 1, alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, &nshared[*it], 1ull, alpaka::hierarchy::Blocks{}); } } // hit loop @@ -812,7 +818,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_markSharedHit { - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, int const *__restrict__ nshared, HitContainer const *__restrict__ tuples, Quality *__restrict__ quality, @@ -839,7 +846,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_rejectDuplicate { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, bool dupPassThrough, HitToTuple const *__restrict__ phitToTuple) const { @@ -894,7 +902,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_sharedHitCleaner { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, HitsConstView hh, uint32_t const *__restrict__ layerStarts, TkSoAView tracks_view, @@ -948,7 +957,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_tripletCleaner { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, bool dupPassThrough, HitToTuple const *__restrict__ phitToTuple) const { @@ -1006,7 +1016,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_simpleTripletCleaner { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, bool dupPassThrough, HitToTuple const *__restrict__ phitToTuple) const { @@ -1050,7 +1061,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_print_found_ntuplets { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, HitsConstView hh, TkSoAView tracks_view, HitContainer const *__restrict__ foundNtuplets, @@ -1060,43 +1072,69 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { int iev) const { constexpr auto loose = Quality::loose; + // Print header + printf("TK: %10s %3s %3s %3s %6s %9s %8s %8s %9s %9s %9s %9s %9s\n", + "ID", + "Q", + "nH", + "nL", + "Qchg", + "pT", + "Eta", + "Phi", + "Tip", + "Zip", + "Chi2", + "z1", + "z2"); for (auto i : cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, foundNtuplets->nOnes()))) { auto nh = foundNtuplets->size(i); if (nh < 3) continue; - if (tracks_view[i].quality() < loose) - continue; - printf("TK: %d %d %d %d %f %f %f %f %f %f %f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n", - 10000 * iev + i, - int(tracks_view[i].quality()), - nh, - tracks_view[i].nLayers(), - reco::charge(tracks_view, i), - tracks_view[i].pt(), - tracks_view[i].eta(), - reco::phi(tracks_view, i), - reco::tip(tracks_view, i), - reco::zip(tracks_view, i), - tracks_view[i].chi2(), - hh[*foundNtuplets->begin(i)].zGlobal(), - hh[*(foundNtuplets->begin(i) + 1)].zGlobal(), - hh[*(foundNtuplets->begin(i) + 2)].zGlobal(), - nh > 3 ? hh[int(*(foundNtuplets->begin(i) + 3))].zGlobal() : 0, - nh > 4 ? hh[int(*(foundNtuplets->begin(i) + 4))].zGlobal() : 0, - nh > 5 ? hh[int(*(foundNtuplets->begin(i) + 5))].zGlobal() : 0, - nh > 6 ? hh[int(*(foundNtuplets->begin(i) + nh - 1))].zGlobal() : 0); +// if (tracks_view[i].quality() < loose) +// continue; + + printf("TK: %10d %3d %3d %3d %6.1f %9.3f %8.3f %8.3f %9.3f %9.3f %9.3f %9.3f %9.3f\n", + 10000 * iev + i, // ID + int(tracks_view[i].quality()), // Quality + nh, // Number of hits + tracks_view[i].nLayers(), // Number of layers + reco::charge(tracks_view, i), // Charge + tracks_view[i].pt(), // Pt + tracks_view[i].eta(), // Eta + reco::phi(tracks_view, i), // Phi + reco::tip(tracks_view, i), // Tip + reco::zip(tracks_view, i), // Zip + tracks_view[i].chi2(), // Chi2 + hh[*foundNtuplets->begin(i)].zGlobal(), // z1 + hh[*(foundNtuplets->begin(i) + 1)].zGlobal() // z2 + ); } } }; class Kernel_printCounters { public: - ALPAKA_FN_ACC void operator()(Acc1D const &acc, Counters const *counters) const { + template >> + ALPAKA_FN_ACC void operator()(TAcc const &acc, Counters const *counters) const { auto const &c = *counters; - printf( - "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | " - "nDupHits | nFishCells | nKilledCells | nUsedCells | nZeroTrackCells ||\n"); - printf("Counters Raw %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", + printf("||%-15s|%10s|%10s|%10s|%10s|%14s|%16s|%14s|%11s|%10s|%13s|%15s|%12s|%17s||\n", + "Counters", + "nEvents", + "nHits", + "nCells", + "nTuples", + "nFitTracks", + "nLooseTracks", + "nGoodTracks", + "nUsedHits", + "nDupHits", + "nFishCells", + "nKilledCells", + "nUsedCells", + "nZeroTrackCells"); + printf("||%-15s|%10lld|%10lld|%10lld|%10lld|%14lld|%16lld|%14lld|%11lld|%10lld|%13lld|%15lld|%12lld|%17lld||\n", + "Raw", c.nEvents, c.nHits, c.nCells, @@ -1110,22 +1148,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { c.nKilledCells, c.nEmptyCells, c.nZeroTrackCells); - printf( - "Counters Norm %lld || %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.1f| %.3f| %.3f| %.3f| " - "%.3f||\n", - c.nEvents, - c.nHits / double(c.nEvents), - c.nCells / double(c.nEvents), - c.nTuples / double(c.nEvents), - c.nFitTracks / double(c.nEvents), - c.nLooseTracks / double(c.nEvents), - c.nGoodTracks / double(c.nEvents), - c.nUsedHits / double(c.nEvents), - c.nDupHits / double(c.nEvents), - c.nFishCells / double(c.nCells), - c.nKilledCells / double(c.nCells), - c.nEmptyCells / double(c.nCells), - c.nZeroTrackCells / double(c.nCells)); + printf("||%-15s|%10lld|%10.1f|%10.1f|%10.1f|%14.1f|%16.1f|%14.1f|%11.1f|%10.3f|%13.3f|%15.3f|%12.3f|%17.3f||\n", + "Norm", + c.nEvents, + c.nHits / double(c.nEvents), + c.nCells / double(c.nEvents), + c.nTuples / double(c.nEvents), + c.nFitTracks / double(c.nEvents), + c.nLooseTracks / double(c.nEvents), + c.nGoodTracks / double(c.nEvents), + c.nUsedHits / double(c.nEvents), + c.nDupHits / double(c.nEvents), + c.nFishCells / double(c.nCells), + c.nKilledCells / double(c.nCells), + c.nEmptyCells / double(c.nCells), + c.nZeroTrackCells / double(c.nCells)); } }; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h index 46997a61b064b..0234a8366bb0a 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -10,30 +10,55 @@ #include "CAPixelDoubletsAlgos.h" -namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { - - template - class GetDoubletsFromHisto { - public: - // #ifdef __CUDACC__ - // __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) // TODO: Alapakafy - // #endif - ALPAKA_FN_ACC void operator()(Acc2D const& acc, - uint32_t maxNumOfDoublets, - CACell* cells, - uint32_t* nCells, - HitsConstView hh, - ::reco::CAGraphSoAConstView cc, - ::reco::CALayersSoAConstView ll, - uint32_t const* __restrict__ offsets, - PhiBinner const* phiBinner, - HitToCell* outerHitHisto, - AlgoParams const& params) const { - doubletsFromHisto( - acc, maxNumOfDoublets, cells, nCells, hh, cc, ll, offsets, phiBinner, outerHitHisto, params); - } - }; - -} // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace alpaka; + using namespace cms::alpakatools; + + namespace caPixelDoublets { + + template + class GetDoubletsFromHisto { + public: + template >> + // #ifdef __CUDACC__ + // __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) // TODO: Alapakafy + // #endif + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint32_t maxNumOfDoublets, + CASimpleCell* cells, + uint32_t* nCells, + HitsConstView hh, + ::reco::CAGraphSoAConstView cc, + ::reco::CALayersSoAConstView ll, + uint32_t const* __restrict__ offsets, + PhiBinner const* phiBinner, + HitToCell* outerHitHisto, + AlgoParams const& params) const { + doubletsFromHisto( + acc, maxNumOfDoublets, cells, nCells, hh, cc, ll, offsets, phiBinner, outerHitHisto, params); + } + }; + + template + class FillDoubletsHisto { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + CASimpleCell const* __restrict__ cells, + uint32_t* nCells, + uint32_t offsetBPIX2, + HitToCell* outerHitHisto) const { + for (auto cellIndex : cms::alpakatools::uniform_elements(acc, *nCells)) { +#ifdef DOUBLETS_DEBUG + printf("outerHitHisto;%d;%d\n", cellIndex, cells[cellIndex].outer_hit_id()); +#endif + outerHitHisto->fill(acc, cells[cellIndex].outer_hit_id() - offsetBPIX2, cellIndex); + } + } + }; + + } // namespace caPixelDoublets + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoublets_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h index 26b38ca1aa9fb..4313dc702d32d 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -18,13 +18,13 @@ #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" -#include "CACell.h" +#include "CASimpleCell.h" #include "CAStructures.h" #include "CAHitNtupletGeneratorKernels.h" -// #define GPU_DEBUG -// #define DOUBLETS_DEBUG -// #define CA_WARNINGS +// MRMR #define GPU_DEBUG // MRMR +// MRMR #define DOUBLETS_DEBUG // MRMR +#define CA_WARNINGS namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { using namespace cms::alpakatools; @@ -129,17 +129,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void doubletsFromHisto(const TAcc& acc, - uint32_t maxNumOfDoublets, - CACell* cells, - uint32_t* nCells, - HitsConstView hh, - ::reco::CAGraphSoAConstView cc, - ::reco::CALayersSoAConstView ll, - uint32_t const* __restrict__ offsets, - PhiBinner const* phiBinner, - HitToCell* outerHitHisto, - AlgoParams const& params) { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto( + const TAcc& acc, + uint32_t maxNumOfDoublets, + CASimpleCell* cells, + uint32_t* nCells, + HitsConstView hh, + ::reco::CAGraphSoAConstView cc, + ::reco::CALayersSoAConstView ll, + uint32_t const* __restrict__ offsets, + PhiBinner const* phiBinner, + HitToCell* outerHitHisto, + AlgoParams const& params) { + const bool doClusterCut = params.minYsizeB1_ > 0 or params.minYsizeB2_ > 0; const bool doZSizeCut = params.maxDYsize12_ > 0 or params.maxDYsize_ > 0 or params.maxDYPred_ > 0; @@ -218,8 +220,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { offsets[inner + 1]); #endif // found hit corresponding to our worker thread, now do the job - if (hh[i].detectorIndex() > ll.layerStarts()[ll.metadata().size() - 1]) //TODO use cc + if (hh[i].detectorIndex() > ll.layerStarts()[ll.metadata().size() - 1]) { //TODO use cc +#ifdef DOUBLETS_DEBUG + printf("Killed here 1\n"); +#endif continue; // invalid + } /* maybe clever, not effective when zoCut is on auto bpos = (mi%8)/4; // if barrel is 1 for z>0 @@ -229,15 +235,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto mez = hh[i].zGlobal(); - if (mez < cc.minz()[pairLayerId] || mez > cc.maxz()[pairLayerId]) + if (mez < cc.minz()[pairLayerId] || mez > cc.maxz()[pairLayerId]) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 2 --> mez: %f [index: %d], miz: %f, maxz: %f\n", mez, hh[i].detectorIndex(), cc.minz()[pairLayerId], cc.maxz()[pairLayerId]); +#endif continue; + } #ifdef DOUBLETS_DEBUG if (doClusterCut && outer > pixelTopology::last_barrel_layer) printf("clustCut: %d %d \n", i, clusterCut(acc, hh, ll, params, i)); #endif - if (doClusterCut && outer > pixelTopology::last_barrel_layer && clusterCut(acc, hh, ll, params, i)) + if (doClusterCut && outer > pixelTopology::last_barrel_layer && clusterCut(acc, hh, ll, params, i)) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 3\n"); +#endif continue; + } auto mep = hh[i].iphi(); auto mer = hh[i].rGlobal(); @@ -254,7 +268,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto zo = hh[j].zGlobal(); auto ro = hh[j].rGlobal(); auto dr = ro - mer; - return dr > cc.maxr()[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > params.cellZ0Cut_ * dr; +#ifdef DOUBLETS_DEBUG + printf("dr: %4.3f, %4.3f, %4.3f --> %d\n", mer, ro, dr, (dr > cc.maxr()[pairLayerId])); + printf("mez: %4.3f, zo: %4.3f, std::abs((mez * ro - mer * zo)): %4.3f --> %d\n", + mez, zo, std::abs((mez * ro - mer * zo)), (std::abs((mez * ro - mer * zo)) > params.cellZ0Cut_*dr)); +#endif + return dr > cc.maxr()[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > params.cellZ0Cut_ * dr; }; auto iphicut = cc.phiCuts()[pairLayerId]; @@ -263,7 +282,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto kh = PhiHisto::bin(int16_t(mep + iphicut)); auto incr = [](auto& k) { return k = (k + 1) % PhiHisto::nbins(); }; -#ifdef GPU_DEGBU +#ifdef GPU_DEBUG printf("pairLayerId %d %d %.2f %.2f %.2f \n", pairLayerId, cc.phiCuts()[pairLayerId], @@ -290,33 +309,57 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto oi = p[pIndex]; ALPAKA_ASSERT_ACC(oi >= offsets[outer]); ALPAKA_ASSERT_ACC(oi < offsets[outer + 1]); +#ifdef DOUBLETS_DEBUG + printf("Exploring couple i: %d o: %d\n", i, oi); +#endif auto mo = hh[oi].detectorIndex(); // invalid - if (mo > pixelClustering::maxNumModules) //FIXME use cc? + if (mo > pixelClustering::maxNumModules) { //FIXME use cc? +#ifdef DOUBLETS_DEBUG + printf("Killed here 4\n"); +#endif continue; + } - if (params.cellZ0Cut_ > 0. && z0cutoff(oi)) + if (params.cellZ0Cut_ > 0. && z0cutoff(oi)) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 5\n"); +#endif continue; + } auto mop = hh[oi].iphi(); uint16_t idphi = std::min(std::abs(int16_t(mop - mep)), std::abs(int16_t(mep - mop))); - if (idphi > iphicut) + if (idphi > iphicut) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 6\n"); +#endif continue; + } #ifdef DOUBLETS_DEBUG printf("zSizeCut: %d %d %d \n", i, oi, zSizeCut(acc, hh, ll, params, i, oi)); #endif - if (doZSizeCut && zSizeCut(acc, hh, ll, params, i, oi)) + if (doZSizeCut && zSizeCut(acc, hh, ll, params, i, oi)) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 7\n"); +#endif continue; + } - if (params.cellPtCut_ > 0. && ptcut(oi, idphi)) + if (params.cellPtCut_ > 0. && ptcut(oi, idphi)) { +#ifdef DOUBLETS_DEBUG + printf("Killed here 8\n"); +#endif continue; + } auto ind = alpaka::atomicAdd(acc, nCells, 1u, alpaka::hierarchy::Blocks{}); if (ind >= maxNumOfDoublets) { #ifdef CA_WARNINGS printf("Warning!!!! Too many cells (limit = %d)!\n", maxNumOfDoublets); + assert(0); #endif alpaka::atomicSub(acc, nCells, 1u, alpaka::hierarchy::Blocks{}); break; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CASimpleCell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CASimpleCell.h new file mode 100644 index 0000000000000..fd76ba1358a40 --- /dev/null +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CASimpleCell.h @@ -0,0 +1,302 @@ +#ifndef RecoTracker_PixelSeeding_plugins_alpaka_CASimpleCell_h +#define RecoTracker_PixelSeeding_plugins_alpaka_CASimpleCell_h + +// #define GPU_DEBUG +// #define CA_DEBUG +// #define CA_WARNINGS +#include +#include + +#include + +#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" +#include "DataFormats/TrackSoA/interface/TracksSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoTracker/PixelSeeding/interface/CircleEq.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "RecoTracker/PixelSeeding/interface/CACoupleSoA.h" + +#include "CAStructures.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace ::caStructures; + + template + class CACellT; + + template + class CASimpleCell { + friend class CACellT; + + public: + ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(const HitsConstView& hh, + int layerPairId, + uint8_t theInnerLayer, + uint8_t theOuterLayer, + hindex_type innerHitId, + hindex_type outerHitId) { + theInnerHitId = innerHitId; + theOuterHitId = outerHitId; + theLayerPairId_ = layerPairId; + theInnerLayer_ = theInnerLayer; + theOuterLayer_ = theOuterLayer; + theStatus_ = 0; + theFishboneId = invalidHitId; + + // optimization that depends on access pattern + theInnerZ = hh[innerHitId].zGlobal(); + theInnerR = hh[innerHitId].rGlobal(); + } + + using hindex_type = ::caStructures::hindex_type; + using tindex_type = ::caStructures::tindex_type; + + static constexpr auto invalidHitId = std::numeric_limits::max(); + + using TmpTuple = cms::alpakatools::VecArray; + using HitContainer = caStructures::SequentialContainer; + using CellToCell = caStructures::GenericContainer; + using CellToTracks = caStructures::GenericContainer; + using CACoupleSoAView = caStructures::CACoupleSoAView; + + using Quality = ::pixelTrack::Quality; + static constexpr auto bad = ::pixelTrack::Quality::bad; + + enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 }; + + CASimpleCell() = default; + + constexpr unsigned int inner_hit_id() const { return theInnerHitId; } + constexpr unsigned int outer_hit_id() const { return theOuterHitId; } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t innerLayer() const { return theInnerLayer_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t outerLayer() const { return theOuterLayer_; } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_detIndex(const HitsConstView& hh) const { + return hh[theInnerHitId].detectorIndex(); + } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_detIndex(const HitsConstView& hh) const { + return hh[theOuterHitId].detectorIndex(); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId != invalidHitId; } + + ALPAKA_FN_ACC void print_cell() const { + printf("printing cell: on layerPair: %d, innerLayer: %d, outerLayer: %d, innerHitId: %d, outerHitId: %d \n", + theLayerPairId_, + theInnerLayer_, + theOuterLayer_, + theInnerHitId, + theOuterHitId); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(TAcc const& acc, hindex_type id, float z, const HitsConstView& hh) { + // make it deterministic: use the farther apart (in z) + auto old = theFishboneId; + while (old != + alpaka::atomicCas( + acc, + &theFishboneId, + old, + (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old, + alpaka::hierarchy::Blocks{})) + old = theFishboneId; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool areAlignedRZ( + float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { + float radius_diff = std::abs(r1 - ro); + float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); + + float pMin = ptmin * std::sqrt(distance_13_squared); // this needs to be divided by + // radius_diff later + + float tan_12_13_half_mul_distance_13_squared = fabs(z1 * (ri - ro) + zi * (ro - r1) + zo * (r1 - ri)); + return tan_12_13_half_mul_distance_13_squared * pMin <= thetaCut * distance_13_squared * radius_diff; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool dcaCut(const HitsConstView& hh, + CASimpleCell const& otherCell, + const float region_origin_radius_plus_tolerance, + const float maxCurv) const { + auto x1 = otherCell.inner_x(hh); + auto y1 = otherCell.inner_y(hh); + + auto x2 = inner_x(hh); + auto y2 = inner_y(hh); + + auto x3 = outer_x(hh); + auto y3 = outer_y(hh); + + CircleEq eq(x1, y1, x2, y2, x3, y3); + +// MRMR printf("Computed curvature: %f, vs parameter: %f\n", eq.curvature(), maxCurv); + if (std::abs(eq.curvature()) > maxCurv) + return false; + + return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); + } + + // trying to free the track building process from hardcoded layers, leaving + // the visit of the graph based on the neighborhood connections between cells. + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(TAcc const& acc, + const ::reco::CAGraphSoAConstView& cc, + CASimpleCell* __restrict__ cells, + HitContainer& foundNtuplets, + CellToCell const* __restrict__ cellNeighborsHisto, + CellToTracks* cellTracksHisto, + uint32_t* nCellTracks, + CACoupleSoAView ct, + cms::alpakatools::AtomicPairCounter& apc, + Quality* __restrict__ quality, + TmpTuple& tmpNtuplet, + const unsigned int minHitsPerNtuplet) const { + // the building process for a track ends if: + // it has no right neighbor + // it has no compatible neighbor + // the ntuplets is then saved if the number of hits it contains is greater + // than a threshold + if constexpr (DEPTH <= 0) { + printf("ERROR: CASimpleCell::find_ntuplets reached full depth!\n"); + ALPAKA_ASSERT_ACC(false); + } else { + auto doubletId = this - cells; + tmpNtuplet.push_back_unsafe(doubletId); // if we move this to be safe we could parallelize further below? + ALPAKA_ASSERT_ACC(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); + + bool last = true; + auto const* __restrict__ bin = cellNeighborsHisto->begin(doubletId); + auto nInBin = cellNeighborsHisto->size(doubletId); + + for (auto idx = 0u; idx < nInBin; idx++) { + // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough? + unsigned int otherCell = bin[idx]; + if (cells[otherCell].isKilled()) + continue; +#ifdef CA_DEBUG + printf("Doublet no. %d | Idx: %d | DoubletID: %ld | OtherCell: %d | isKilled: %d | " + "This(i,o): (%d,%d) -> Other(i,o): (%d,%d) | Idx: %d | BinN: %d\n", + tmpNtuplet.size(), + idx, + doubletId, + otherCell, + cells[otherCell].isKilled(), + this->inner_hit_id(), + this->outer_hit_id(), + cells[otherCell].inner_hit_id(), + cells[otherCell].outer_hit_id(), + idx, + nInBin); +#endif + + last = false; + cells[otherCell].template find_ntuplets(acc, + cc, + cells, + foundNtuplets, + cellNeighborsHisto, + cellTracksHisto, + nCellTracks, + ct, + apc, + quality, + tmpNtuplet, + minHitsPerNtuplet); + } + if (last) { // if long enough save... + if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { + { + hindex_type hits[TrackerTraits::maxDepth + 2]; + auto nh = 0U; + constexpr int maxFB = 2; // for the time being let's limit this + int nfb = 0; + for (auto c : tmpNtuplet) { + hits[nh++] = cells[c].theInnerHitId; + if (nfb < maxFB && cells[c].hasFishbone()) { + ++nfb; + hits[nh++] = cells[c].theFishboneId; // Fishbone hit is always outer than inner hit + } + } + ALPAKA_ASSERT_ACC(nh < TrackerTraits::maxHitsOnTrack); + hits[nh] = theOuterHitId; + auto it = foundNtuplets.bulkFill(acc, apc, hits, nh + 1); +#ifdef CA_DEBUG + printf("track n. %d nhits %d with cells: \n", it, nh + 1); +#endif + if (it >= 0) { // if negative is overflow.... + for (auto c : tmpNtuplet) { +#ifdef CA_DEBUG + printf("%d - ", c); +#endif + auto t_ind = alpaka::atomicAdd(acc, nCellTracks, (uint32_t)1, alpaka::hierarchy::Blocks{}); + + if (t_ind >= uint32_t(ct.metadata().size())) { +#ifdef CA_WARNINGS + printf("Warning!!!! Too many cell->tracks associations (limit = %d)!\n", ct.metadata().size()); + assert(0); +#endif + alpaka::atomicSub(acc, nCellTracks, (uint32_t)1, alpaka::hierarchy::Blocks{}); + break; + } + cellTracksHisto->count(acc, c); + + ct[t_ind].inner() = c; //cell + ct[t_ind].outer() = it; //track + } +#ifdef CA_DEBUG + printf("\n"); +#endif + quality[it] = bad; // initialize to bad + } + } + } + } + tmpNtuplet.pop_back(); + ALPAKA_ASSERT_ACC(tmpNtuplet.size() < int(TrackerTraits::maxHitsOnTrack - 1)); + } + } + + protected: + int16_t theLayerPairId_; + uint8_t theInnerLayer_; + uint8_t theOuterLayer_; + uint16_t theStatus_; // tbd + + float theInnerZ; + float theInnerR; + hindex_type theInnerHitId; + hindex_type theOuterHitId; + hindex_type theFishboneId; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoTracker_PixelSeeding_plugins_alpaka_CASimpleCell_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc index 9185bd8b2fb94..20a032d113c35 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.cc @@ -20,5 +20,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class HelixFit; template class HelixFit; + template class HelixFit; template class HelixFit; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc index e1ecf5825b106..d255840a613c4 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -389,6 +389,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class HelixFit; template class HelixFit; + template class HelixFit; template class HelixFit; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp index 7b570f3e29ca0..9e164d1eb09e1 100644 --- a/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp +++ b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp @@ -17,7 +17,7 @@ int main() { using namespace caStructures; //for Phase-I - print>(); + print>(); print>(); print>(); print>(); @@ -27,7 +27,7 @@ int main() { print>(); //for Phase-II - print>(); + print>(); print>(); print>(); print>(); diff --git a/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h b/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h index 21b1ac1564ff9..045578a27e1cb 100644 --- a/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h +++ b/RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h @@ -8,6 +8,8 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h" +//#define BL_DEEPDEBUG + namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { using namespace cms::alpakatools; @@ -60,7 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { ALPAKA_FN_ACC ALPAKA_FN_INLINE double multScatt( const TAcc& acc, const double& length, const double bField, const double radius, int layer, double slope) { // limit R to 20GeV... - auto pt2 = alpaka::math::min(acc, 20., bField * radius); + auto pt2 = alpaka::math::min(acc, 50., bField * radius); pt2 *= pt2; constexpr double inv_X0 = 0.06 / 16.; //!< inverse of radiation length of the material in cm //if(Layer==1) XXI_0=0.06/16.; @@ -186,6 +188,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { alpaka::math::atan2(acc, riemannFit::cross2D(acc, dVec, eVec), dVec.dot(eVec)); // calculates the arc length } riemannFit::VectorNd zVec = hits.block(2, 0, 1, n).transpose(); + #ifdef BL_DEEPDEBUG + for (u_int i = 0; i < n; i++) { + printf("Point %d, x, %f, y: %f, z: %f, s: %f\n", i, hits.block(0, 0, 2, n)(0, i), hits.block(0, 0, 2, n)(1, i), zVec(i), results.sTransverse(i)); + } + #endif //calculate sTotal and zVec riemannFit::Matrix2xNd pointsSZ = riemannFit::Matrix2xNd::Zero(); @@ -196,7 +203,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { } results.sTotal = pointsSZ.block(0, 0, 1, n).transpose(); results.zInSZplane = pointsSZ.block(1, 0, 1, n).transpose(); - + #ifdef BL_DEEPDEBUG + for (u_int i = 0; i < n; i++) { + printf("Point %d, rot_s: %f, rot_z: %f\n", i, results.sTotal(i), results.zInSZplane(i)); + } + #endif //calculate varBeta results.varBeta(0) = results.varBeta(n - 1) = 0; for (u_int i = 1; i < n - 1; i++) { @@ -293,6 +304,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { result(3) = result(2) * atan2(riemannFit::cross2D(acc, d, e), d.dot(e)) / (hits(2, n - 1) - hits(2, 0)); // ds/dz slope between last and first point + #ifdef BL_DEEPDEBUG + printf("FastFit results(x,y,R,tan(theta): %e, %e, %e, %e\n", result(0), result(1), result(2), result(3)); + #endif } /*! @@ -481,6 +495,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { const auto& varBeta = data.varBeta; const double slope = -data.qCharge / fast_fit(3); + #ifdef BL_DEEPDEBUG + printf("Slope: %e, charge: %d, curvature: %e\n", slope, data.qCharge, fast_fit(3)); + #endif riemannFit::Matrix2d rotMat = rotationMatrix(acc, slope); riemannFit::Matrix3d vMat = riemannFit::Matrix3d::Zero(); // covariance matrix XYZ @@ -495,8 +512,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::brokenline { vMat(2, 1) = vMat(1, 2) = hits_ge.col(i)[4]; // cov_yz vMat(2, 2) = hits_ge.col(i)[5]; // z errors auto tmp = 1. / radii.block(0, i, 2, 1).norm(); - jacobXYZtosZ(0, 0) = radii(1, i) * tmp; - jacobXYZtosZ(0, 1) = -radii(0, i) * tmp; + jacobXYZtosZ(0, 0) = data.qCharge*radii(1, i) * tmp; + jacobXYZtosZ(0, 1) = -data.qCharge*radii(0, i) * tmp; jacobXYZtosZ(1, 2) = 1.; weights(i) = 1. / ((rotMat * jacobXYZtosZ * vMat * jacobXYZtosZ.transpose() * rotMat.transpose())( 1, 1)); // compute the orthogonal weight point by point diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc index acb780850b638..9df4bb5cdf9e6 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -17,11 +18,13 @@ #include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" #include "DataFormats/TrajectoryState/interface/LocalTrajectoryParameters.h" #include "FWCore/Framework/interface/ConsumesCollector.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/Framework/interface/global/EDProducer.h" +// #include "FWCore/Framework/interface/RunCache.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" @@ -29,8 +32,10 @@ #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" #include "RecoTracker/PixelTrackFitting/interface/alpaka/FitUtils.h" +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" #include "TrackingTools/AnalyticalJacobians/interface/JacobianLocalToCurvilinear.h" #include "TrackingTools/TrajectoryParametrization/interface/CurvilinearTrajectoryError.h" #include "TrackingTools/TrajectoryParametrization/interface/GlobalTrajectoryParameters.h" @@ -43,8 +48,18 @@ */ // #define GPU_DEBUG +// struct that holds two maps for detIds of the OT modules +struct DetIdMaps { + DetIdMaps() : detIdToOTModuleId_(), detIdIsUsedOTModule_() {} + + // map from the detId of OT modules to the moduleId among the used OT modules + // (starting from 0 for first module of first OT layer) + std::map detIdToOTModuleId_; + // map from detId to bool if used as OT extension + std::map detIdIsUsedOTModule_; +}; -class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { +class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer> { using TrackSoAHost = reco::TracksHost; using HMSstorage = std::vector; using IndToEdm = std::vector; @@ -55,32 +70,43 @@ class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { ~PixelTrackProducerFromSoAAlpaka() override = default; static void fillDescriptions(edm::ConfigurationDescriptions &descriptions); + std::shared_ptr globalBeginRun(edm::Run const &, edm::EventSetup const &) const override; + void globalEndRun(edm::Run const &, edm::EventSetup const &) const override {}; private: void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; // Event Data tokens - const edm::EDGetTokenT tBeamSpot_; - const edm::EDGetTokenT tokenTrack_; - const edm::EDGetTokenT cpuHits_; - const edm::EDGetTokenT hmsToken_; + const edm::EDGetTokenT beamSpotToken_; + const edm::EDGetTokenT trackSoAToken_; + const edm::EDGetTokenT pixelRecHitsToken_; + edm::EDGetTokenT otRecHitsToken_; + const edm::EDGetTokenT pixelHMSToken_; + edm::EDGetTokenT otHMSToken_; // Event Setup tokens const edm::ESGetToken idealMagneticFieldToken_; - const edm::ESGetToken ttTopoToken_; + const edm::ESGetToken trackerTopologyToken_; + // const edm::ESGetToken trackerGeometryToken_; + const edm::ESGetToken trackerGeometryTokenRun_; int32_t const minNumberOfHits_; pixelTrack::Quality const minQuality_; + const bool useOTExtension_; }; PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig) - : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), - tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), - cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), - hmsToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + : beamSpotToken_(consumes(iConfig.getParameter("beamSpot"))), + trackSoAToken_(consumes(iConfig.getParameter("trackSrc"))), + pixelRecHitsToken_( + consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), + pixelHMSToken_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), idealMagneticFieldToken_(esConsumes()), - ttTopoToken_(esConsumes()), + trackerTopologyToken_(esConsumes()), + // trackerGeometryToken_(esConsumes()), + trackerGeometryTokenRun_(esConsumes()), minNumberOfHits_(iConfig.getParameter("minNumberOfHits")), - minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))) { + minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))), + useOTExtension_(iConfig.getParameter("useOTExtension")) { if (minQuality_ == pixelTrack::Quality::notQuality) { throw cms::Exception("PixelTrackConfiguration") << iConfig.getParameter("minQuality") + " is not a pixelTrack::Quality"; @@ -96,6 +122,48 @@ PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka(const edm::Para // around a rare race condition in framework scheduling produces(); produces(); + + // if useOTExtension consume the OT RecHits + if (useOTExtension_) { + otRecHitsToken_ = + consumes(iConfig.getParameter("outerTrackerRecHitSrc")); + otHMSToken_ = consumes(iConfig.getParameter("outerTrackerRecHitSoAConverterSrc")); + } +} + +std::shared_ptr PixelTrackProducerFromSoAAlpaka::globalBeginRun(const edm::Run &iRun, + const edm::EventSetup &iSetup) const { + // make the maps object + auto detIdMaps = std::make_shared(); + + // if OT RecHits are used in PixelTracks, fill the detIdToOTModuleId_ map + if (useOTExtension_) { + // get track geometry + const auto &trackerGeometry = &iSetup.getData(trackerGeometryTokenRun_); + + // function to check if given module is used as OT for CA + auto isPinPSinOTBarrel = [&](DetId detId) { + // Select only P-hits from the OT barrel + return (trackerGeometry->getDetectorType(detId) == TrackerGeometry::ModuleType::Ph2PSP && + detId.subdetId() == StripSubdetector::TOB); + }; + + // loop over all modules and fill the map detIdToOTModuleId_ + auto const &detUnits = trackerGeometry->detUnits(); + for (uint32_t otModuleId{0}; auto &detUnit : detUnits) { + DetId detId(detUnit->geographicalId()); + // check if the module is used for OT extension + bool isUsedOTModule = isPinPSinOTBarrel(detId); + detIdMaps->detIdIsUsedOTModule_[detUnit->geographicalId()] = isUsedOTModule; + if (isUsedOTModule) { + // save the module index among the extension modules + detIdMaps->detIdToOTModuleId_[detUnit->geographicalId()] = otModuleId; + otModuleId++; + } + } + } + + return detIdMaps; } void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { @@ -103,8 +171,11 @@ void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescrip desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); desc.add("trackSrc", edm::InputTag("pixelTracksAlpaka")); desc.add("pixelRecHitLegacySrc", edm::InputTag("siPixelRecHitsPreSplittingLegacy")); + desc.add("outerTrackerRecHitSrc", edm::InputTag("hltSiPhase2RecHits")); + desc.add("outerTrackerRecHitSoAConverterSrc", edm::InputTag("phase2OTRecHitsSoAConverter")); desc.add("minNumberOfHits", 0); desc.add("minQuality", "loose"); + desc.add("useOTExtension", false); descriptions.addWithDefaultLabel(desc); } @@ -125,44 +196,99 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, std::cout << "Converting soa helix in reco tracks" << std::endl; #endif + // index map: trackId(in SoA) -> trackId(in legacy edm) auto indToEdmP = std::make_unique(); auto &indToEdm = *indToEdmP; auto const &idealField = iSetup.getData(idealMagneticFieldToken_); + // prepare container for legacy tracks pixeltrackfitting::TracksWithRecHits tracks; - auto const &httopo = iSetup.getData(ttTopoToken_); + // get trackerTopology + auto const &trackerTopology = iSetup.getData(trackerTopologyToken_); + + // get the maps for the detId of the OT modules + auto const &detIdIsUsedOTModule = runCache(iEvent.getRun().index())->detIdIsUsedOTModule_; + auto const &detIdToOTModuleId = runCache(iEvent.getRun().index())->detIdToOTModuleId_; - const auto &bsh = iEvent.get(tBeamSpot_); + // get beamspot + const auto &bsh = iEvent.get(beamSpotToken_); GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - auto const &pixelRecHitsDSV = iEvent.get(cpuHits_); - std::vector hitmap; + // get the module's starting indices in the hit collection + auto const &pixelHitsModuleStart = iEvent.get(pixelHMSToken_); + + // get Pixel RecHits + auto const &pixelRecHitsDSV = iEvent.get(pixelRecHitsToken_); auto const &pixelRecHits = pixelRecHitsDSV.data(); auto const nPixelHits = pixelRecHits.size(); - auto const &hitsModuleStart = iEvent.get(hmsToken_); + // get OT RecHits if needed + size_t nOTHits = 0; + const Phase2TrackerRecHit1DCollectionNew *otRecHitsDSV = nullptr; + if (useOTExtension_) { + otRecHitsDSV = &iEvent.get(otRecHitsToken_); + nOTHits = otRecHitsDSV->dataSize(); + } + + size_t nHits = nPixelHits + nOTHits; - hitmap.resize(nPixelHits, nullptr); + // hitmap to go from a unique RecHit identifier to the RecHit in the legacy collection + // (unique hit identifier is equivalent to the position of the hit in the RecHit SoA) + std::vector hitmap; + hitmap.resize(nHits, nullptr); + // loop over pixel RecHits to fill the hitmap for (auto const &pixelHit : pixelRecHits) { auto const &thit = static_cast(pixelHit); auto const detI = thit.det()->index(); auto const &clus = thit.firstClusterRef(); assert(clus.isPixel()); - auto const idx = hitsModuleStart[detI] + clus.pixelCluster().originalId(); - if (idx >= hitmap.size()) - hitmap.resize(idx + 256, nullptr); // only in case of hit overflow in one module + + // get hit identifier as (hit offset of the module) + (hit index in this module) + auto const idx = pixelHitsModuleStart[detI] + clus.pixelCluster().originalId(); assert(nullptr == hitmap[idx]); + // std::cout << "Filling hitmap at position " << idx << " with a pixel hit." << std::endl; hitmap[idx] = &pixelHit; } + // if OT RecHits are used in PixelTracks, fill the hitmap also with those + if (useOTExtension_) { + // The RecHits in the SoA are ordered according to the detUnit->index() + // of the respective OT module. For this reason, we need the map from the + // detId to the moduleId among all used OT modules. This otModuleId corresponds + // to the module's position in the otHitsModuleStart that we get from the event. + + // get the module's starting indices in the hit collection + auto const &otHitsModuleStart = iEvent.get(otHMSToken_); + + // perform the exact same loop of how the SoA is initially filled with OT hits + // and get the index by counting the hits (starting from the correpondign HitStartModule) + for (auto const &detSet : *otRecHitsDSV) { + auto detId = detSet.detId(); + + // check if module is used in extension + if (detIdIsUsedOTModule.find(detId)->second) { + // get the corresponding otModuleId + auto otModuleId = detIdToOTModuleId.find(detId)->second; + + // loop over the RecHits of the module and fill the hitmap + for (int idx = otHitsModuleStart[otModuleId]; auto const &recHit : detSet) { + assert(nullptr == hitmap[idx]); + // std::cout << "Filling hitmap at position " << idx << " with an OT hit." << std::endl; + hitmap[idx] = &recHit; + idx++; + } + } + } + } + std::vector hits; hits.reserve(5); //TODO move to a configurable parameter? - auto const &tsoa = iEvent.get(tokenTrack_); + auto const &tsoa = iEvent.get(trackSoAToken_); auto const *quality = tsoa.view().quality(); auto const *hitOffs = tsoa.view().hitOffsets(); auto const *hitIdxs = tsoa.template view().id(); @@ -184,17 +310,24 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, return quality[i1] > quality[i2]; }); - //store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) - indToEdm.resize(sortIdxs.size(), -1); + indToEdm.resize(nTracks, -1); + + // loop over (sorted) tracks for (const auto &it : sortIdxs) { auto nHits = reco::nHits(tsoa.view(), it); assert(nHits >= 3); auto q = quality[it]; + // apply cuts on quality and number of hits if (q < minQuality_) - continue; + // since the tracks are sorted according to quality, + // we can break after the first track with low quality + break; if (nHits < minNumberOfHits_) //move to nLayers? continue; + + //store the index of the SoA: + // indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) indToEdm[it] = nt; ++nt; @@ -205,7 +338,8 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, for (auto iHit = start; iHit < end; ++iHit) hits[iHit - start] = hitmap[hitIdxs[iHit]]; -#ifdef CA_DEBUG +//#ifdef CA_DEBUG +#if 0 std::cout << "track soa : " << it << " with hits: "; for (auto iHit = start; iHit < end; ++iHit) std::cout << hitIdxs[iHit] << " - "; @@ -267,7 +401,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, #endif // store tracks - storeTracks(iEvent, tracks, httopo); + storeTracks(iEvent, tracks, trackerTopology); iEvent.put(std::move(indToEdmP)); }