diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc index b84de5250321e..758957feffd02 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc @@ -20,10 +20,9 @@ #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" // TODO: change class name to SiPixelCompareRecHitsSoA when CUDA code is removed -template class SiPixelCompareRecHits : public DQMEDAnalyzer { public: - using HitsSoA = TrackingRecHitHost; + using HitsSoA = reco::TrackingRecHitHost; explicit SiPixelCompareRecHits(const edm::ParameterSet&); ~SiPixelCompareRecHits() override = default; @@ -75,8 +74,8 @@ class SiPixelCompareRecHits : public DQMEDAnalyzer { // // constructors // -template -SiPixelCompareRecHits::SiPixelCompareRecHits(const edm::ParameterSet& iConfig) + +SiPixelCompareRecHits::SiPixelCompareRecHits(const edm::ParameterSet& iConfig) : geomToken_(esConsumes()), topoToken_(esConsumes()), tokenSoAHitsReference_(consumes(iConfig.getParameter("pixelHitsReferenceSoA"))), @@ -87,15 +86,14 @@ SiPixelCompareRecHits::SiPixelCompareRecHits(const edm::ParameterSet& iConfig // // Begin Run // -template -void SiPixelCompareRecHits::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { + +void SiPixelCompareRecHits::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { tkGeom_ = &iSetup.getData(geomToken_); tTopo_ = &iSetup.getData(topoToken_); } -template template -void SiPixelCompareRecHits::analyzeSeparate(U tokenRef, V tokenTar, const edm::Event& iEvent) { +void SiPixelCompareRecHits::analyzeSeparate(U tokenRef, V tokenTar, const edm::Event& iEvent) { const auto& rhsoaHandleRef = iEvent.getHandle(tokenRef); const auto& rhsoaHandleTar = iEvent.getHandle(tokenTar); @@ -206,8 +204,8 @@ void SiPixelCompareRecHits::analyzeSeparate(U tokenRef, V tokenTar, const edm // // -- Analyze // -template -void SiPixelCompareRecHits::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + +void SiPixelCompareRecHits::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { // The default use case is to use vertices from Alpaka reconstructed on CPU and GPU; // The function is left templated if any other cases need to be added analyzeSeparate(tokenSoAHitsReference_, tokenSoAHitsTarget_, iEvent); @@ -216,10 +214,10 @@ void SiPixelCompareRecHits::analyze(const edm::Event& iEvent, const edm::Even // // -- Book Histograms // -template -void SiPixelCompareRecHits::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { + +void SiPixelCompareRecHits::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { iBook.cd(); iBook.setCurrentFolder(topFolderName_); @@ -259,8 +257,8 @@ void SiPixelCompareRecHits::bookHistograms(DQMStore::IBooker& iBook, hFposYDiff_ = iBook.book1D("rechitsposYDiffFpix","y-position difference of rechits in FPix; rechit y-pos difference (Reference - Target)", 1000, -10, 10); } -template -void SiPixelCompareRecHits::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + +void SiPixelCompareRecHits::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { // monitorpixelRecHitsSoAAlpaka edm::ParameterSetDescription desc; desc.add("pixelHitsReferenceSoA", edm::InputTag("siPixelRecHitsPreSplittingAlpakaSerial")); @@ -270,12 +268,15 @@ void SiPixelCompareRecHits::fillDescriptions(edm::ConfigurationDescriptions& descriptions.addWithDefaultLabel(desc); } -using SiPixelPhase1CompareRecHits = SiPixelCompareRecHits; -using SiPixelPhase2CompareRecHits = SiPixelCompareRecHits; -using SiPixelHIonPhase1CompareRecHits = SiPixelCompareRecHits; +using SiPixelCompareRecHits = SiPixelCompareRecHits; +// keeping the old names to allow a smooth HLT migration +using SiPixelPhase1CompareRecHits = SiPixelCompareRecHits; +using SiPixelPhase2CompareRecHits = SiPixelCompareRecHits; +using SiPixelHIonPhase1CompareRecHits = SiPixelCompareRecHits; #include "FWCore/Framework/interface/MakerMacros.h" // TODO: change module names to SiPixel*CompareRecHitsSoA when CUDA code is removed +DEFINE_FWK_MODULE(SiPixelCompareRecHits); DEFINE_FWK_MODULE(SiPixelPhase1CompareRecHits); DEFINE_FWK_MODULE(SiPixelPhase2CompareRecHits); DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareRecHits); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc index 4394b5d59f34b..9e279f249d810 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc @@ -69,10 +69,10 @@ namespace { } // namespace // TODO: change class name to SiPixelCompareTracksSoA when CUDA code is removed -template + class SiPixelCompareTracks : public DQMEDAnalyzer { public: - using PixelTrackSoA = TracksHost; + using PixelTrackSoA = reco::TracksHost; explicit SiPixelCompareTracks(const edm::ParameterSet&); ~SiPixelCompareTracks() override = default; @@ -135,8 +135,7 @@ class SiPixelCompareTracks : public DQMEDAnalyzer { // constructors // -template -SiPixelCompareTracks::SiPixelCompareTracks(const edm::ParameterSet& iConfig) +SiPixelCompareTracks::SiPixelCompareTracks(const edm::ParameterSet& iConfig) : tokenSoATrackReference_(consumes(iConfig.getParameter("pixelTrackReferenceSoA"))), tokenSoATrackTarget_(consumes(iConfig.getParameter("pixelTrackTargetSoA"))), topFolderName_(iConfig.getParameter("topFolderName")), @@ -144,11 +143,8 @@ SiPixelCompareTracks::SiPixelCompareTracks(const edm::ParameterSet& iConfig) minQuality_(pixelTrack::qualityByName(iConfig.getParameter("minQuality"))), dr2cut_(iConfig.getParameter("deltaR2cut")) {} -template template -void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Event& iEvent) { - using helper = TracksUtilities; - +void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Event& iEvent) { const auto& tsoaHandleRef = iEvent.getHandle(tokenRef); const auto& tsoaHandleTar = iEvent.getHandle(tokenTar); @@ -182,7 +178,7 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm: //Loop over Tar tracks and store the indices of the loose tracks. Whats happens if useQualityCut_ is false? std::vector looseTrkidxTar; for (int32_t jt = 0; jt < maxTracksTar; ++jt) { - if (helper::nHits(tsoaTar.view(), jt) == 0) + if (reco::nHits(tsoaTar.view(), jt) == 0) break; // this is a guard if (!(tsoaTar.view()[jt].pt() > 0.)) continue; @@ -195,7 +191,7 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm: //Now loop over Ref tracks//nested loop for loose gPU tracks for (int32_t it = 0; it < maxTracksRef; ++it) { - int nHitsRef = helper::nHits(tsoaRef.view(), it); + int nHitsRef = reco::nHits(tsoaRef.view(), it); if (nHitsRef == 0) break; // this is a guard @@ -238,7 +234,7 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm: hchi2_->Fill(tsoaRef.view()[it].chi2(), tsoaTar.view()[closestTkidx].chi2()); hCharge_->Fill(qRef, reco::charge(tsoaTar.view(), closestTkidx)); - hnHits_->Fill(helper::nHits(tsoaRef.view(), it), helper::nHits(tsoaTar.view(), closestTkidx)); + hnHits_->Fill(reco::nHits(tsoaRef.view(), it), reco::nHits(tsoaTar.view(), closestTkidx)); hnLayers_->Fill(tsoaRef.view()[it].nLayers(), tsoaTar.view()[closestTkidx].nLayers()); hpt_->Fill(ptRef, tsoaTar.view()[closestTkidx].pt()); hCurvature_->Fill(qRef / ptRef, reco::charge(tsoaTar.view(), closestTkidx) / tsoaTar.view()[closestTkidx].pt()); @@ -279,8 +275,8 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm: // // -- Analyze // -template -void SiPixelCompareTracks::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + +void SiPixelCompareTracks::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { // The default use case is to use vertices from Alpaka reconstructed on CPU and GPU; // The function is left templated if any other cases need to be added analyzeSeparate(tokenSoATrackReference_, tokenSoATrackTarget_, iEvent); @@ -289,10 +285,10 @@ void SiPixelCompareTracks::analyze(const edm::Event& iEvent, const edm::Event // // -- Book Histograms // -template -void SiPixelCompareTracks::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { + +void SiPixelCompareTracks::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { iBook.cd(); iBook.setCurrentFolder(topFolderName_); @@ -371,8 +367,7 @@ void SiPixelCompareTracks::bookHistograms(DQMStore::IBooker& iBook, } -template -void SiPixelCompareTracks::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { +void SiPixelCompareTracks::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { // monitorpixelTrackSoA edm::ParameterSetDescription desc; desc.add("pixelTrackReferenceSoA", edm::InputTag("pixelTracksAlpakaSerial")); @@ -386,10 +381,12 @@ void SiPixelCompareTracks::fillDescriptions(edm::ConfigurationDescriptions& d // TODO: change module names to SiPixel*CompareTracksSoA when CUDA code is removed -using SiPixelPhase1CompareTracks = SiPixelCompareTracks; -using SiPixelPhase2CompareTracks = SiPixelCompareTracks; -using SiPixelHIonPhase1CompareTracks = SiPixelCompareTracks; +using SiPixelPhase1CompareTracks = SiPixelCompareTracks; +using SiPixelPhase2CompareTracks = SiPixelCompareTracks; +using SiPixelHIonPhase1CompareTracks = SiPixelCompareTracks; +// Duplicates to keep them alive for the HLT menu to migrate to the new modules +DEFINE_FWK_MODULE(SiPixelCompareTracks); DEFINE_FWK_MODULE(SiPixelPhase1CompareTracks); DEFINE_FWK_MODULE(SiPixelPhase2CompareTracks); DEFINE_FWK_MODULE(SiPixelHIonPhase1CompareTracks); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc index f4c8968fafb16..df84da66fbd1b 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc @@ -16,10 +16,9 @@ #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -template class SiPixelMonitorRecHitsSoAAlpaka : public DQMEDAnalyzer { public: - using HitsOnHost = TrackingRecHitHost; + using HitsOnHost = reco::TrackingRecHitHost; explicit SiPixelMonitorRecHitsSoAAlpaka(const edm::ParameterSet&); ~SiPixelMonitorRecHitsSoAAlpaka() override = default; @@ -61,8 +60,8 @@ class SiPixelMonitorRecHitsSoAAlpaka : public DQMEDAnalyzer { // // constructors // -template -SiPixelMonitorRecHitsSoAAlpaka::SiPixelMonitorRecHitsSoAAlpaka(const edm::ParameterSet& iConfig) + +SiPixelMonitorRecHitsSoAAlpaka::SiPixelMonitorRecHitsSoAAlpaka(const edm::ParameterSet& iConfig) : geomToken_(esConsumes()), topoToken_(esConsumes()), tokenSoAHits_(consumes(iConfig.getParameter("pixelHitsSrc"))), @@ -71,8 +70,8 @@ SiPixelMonitorRecHitsSoAAlpaka::SiPixelMonitorRecHitsSoAAlpaka(const edm::Par // // Begin Run // -template -void SiPixelMonitorRecHitsSoAAlpaka::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { + +void SiPixelMonitorRecHitsSoAAlpaka::dqmBeginRun(const edm::Run& iRun, const edm::EventSetup& iSetup) { tkGeom_ = &iSetup.getData(geomToken_); tTopo_ = &iSetup.getData(topoToken_); } @@ -80,8 +79,8 @@ void SiPixelMonitorRecHitsSoAAlpaka::dqmBeginRun(const edm::Run& iRun, const // // -- Analyze // -template -void SiPixelMonitorRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + +void SiPixelMonitorRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { const auto& rhsoaHandle = iEvent.getHandle(tokenSoAHits_); if (!rhsoaHandle.isValid()) { edm::LogWarning("SiPixelMonitorRecHitsSoAAlpaka") << "No RecHits SoA found \n returning!"; @@ -136,10 +135,10 @@ void SiPixelMonitorRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const // // -- Book Histograms // -template -void SiPixelMonitorRecHitsSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { + +void SiPixelMonitorRecHitsSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { iBook.cd(); iBook.setCurrentFolder(topFolderName_); @@ -179,8 +178,7 @@ void SiPixelMonitorRecHitsSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, } } -template -void SiPixelMonitorRecHitsSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { +void SiPixelMonitorRecHitsSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { // monitorpixelRecHitsSoA edm::ParameterSetDescription desc; desc.add("pixelHitsSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); @@ -188,11 +186,13 @@ void SiPixelMonitorRecHitsSoAAlpaka::fillDescriptions(edm::ConfigurationDescr descriptions.addWithDefaultLabel(desc); } -using SiPixelPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; -using SiPixelPhase2MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; -using SiPixelHIonPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; +using SiPixelPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; +using SiPixelPhase2MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; +using SiPixelHIonPhase1MonitorRecHitsSoAAlpaka = SiPixelMonitorRecHitsSoAAlpaka; #include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelMonitorRecHitsSoAAlpaka); DEFINE_FWK_MODULE(SiPixelPhase1MonitorRecHitsSoAAlpaka); DEFINE_FWK_MODULE(SiPixelPhase2MonitorRecHitsSoAAlpaka); DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorRecHitsSoAAlpaka); + diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc index 8bd1cdfa2e429..9b519116e149d 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc @@ -24,11 +24,11 @@ #include "DQMServices/Core/interface/DQMEDAnalyzer.h" #include "DQMServices/Core/interface/DQMStore.h" #include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" -template class SiPixelMonitorTrackSoAAlpaka : public DQMEDAnalyzer { public: - using PixelTrackHeterogeneous = TracksHost; + using PixelTrackHeterogeneous = reco::TracksHost; explicit SiPixelMonitorTrackSoAAlpaka(const edm::ParameterSet&); ~SiPixelMonitorTrackSoAAlpaka() override = default; void bookHistograms(DQMStore::IBooker& ibooker, edm::Run const& iRun, edm::EventSetup const& iSetup) override; @@ -64,8 +64,7 @@ class SiPixelMonitorTrackSoAAlpaka : public DQMEDAnalyzer { // constructors // -template -SiPixelMonitorTrackSoAAlpaka::SiPixelMonitorTrackSoAAlpaka(const edm::ParameterSet& iConfig) { +SiPixelMonitorTrackSoAAlpaka::SiPixelMonitorTrackSoAAlpaka(const edm::ParameterSet& iConfig) { tokenSoATrack_ = consumes(iConfig.getParameter("pixelTrackSrc")); topFolderName_ = iConfig.getParameter("topFolderName"); //"SiPixelHeterogeneous/PixelTrackSoA"; useQualityCut_ = iConfig.getParameter("useQualityCut"); @@ -75,8 +74,8 @@ SiPixelMonitorTrackSoAAlpaka::SiPixelMonitorTrackSoAAlpaka(const edm::Paramet // // -- Analyze // -template -void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { + +void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm::EventSetup& iSetup) { const auto& tsoaHandle = iEvent.getHandle(tokenSoATrack_); if (!tsoaHandle.isValid()) { edm::LogWarning("SiPixelMonitorTrackSoAAlpaka") << "No Track SoA found \n returning!" << std::endl; @@ -90,7 +89,7 @@ void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const ed int32_t nLooseAndAboveTracks = 0; for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = tsoa.view().detIndices().size(it); + auto nHits = reco::nHits(tsoa.const_view(), it); auto nLayers = tsoa.view()[it].nLayers(); if (nHits == 0) break; // this is a guard @@ -139,10 +138,10 @@ void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const ed // // -- Book Histograms // -template -void SiPixelMonitorTrackSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, - edm::Run const& iRun, - edm::EventSetup const& iSetup) { + +void SiPixelMonitorTrackSoAAlpaka::bookHistograms(DQMStore::IBooker& iBook, + edm::Run const& iRun, + edm::EventSetup const& iSetup) { iBook.cd(); iBook.setCurrentFolder(topFolderName_); @@ -181,8 +180,7 @@ hChi2VsEta = iBook.bookProfile("nChi2ndofVsEta", fmt::format("{} vs track #eta;T } } -template -void SiPixelMonitorTrackSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { +void SiPixelMonitorTrackSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { // monitorpixelTrackSoA edm::ParameterSetDescription desc; desc.add("pixelTrackSrc", edm::InputTag("pixelTracksAlpaka")); @@ -192,10 +190,12 @@ void SiPixelMonitorTrackSoAAlpaka::fillDescriptions(edm::ConfigurationDescrip descriptions.addWithDefaultLabel(desc); } -using SiPixelPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; -using SiPixelPhase2MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; -using SiPixelHIonPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +using SiPixelPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +using SiPixelPhase2MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +using SiPixelHIonPhase1MonitorTrackSoAAlpaka = SiPixelMonitorTrackSoAAlpaka; +// Duplicates to keep them alive for the HLT menu to migrate to the new modules +DEFINE_FWK_MODULE(SiPixelMonitorTrackSoAAlpaka); DEFINE_FWK_MODULE(SiPixelPhase1MonitorTrackSoAAlpaka); DEFINE_FWK_MODULE(SiPixelPhase2MonitorTrackSoAAlpaka); DEFINE_FWK_MODULE(SiPixelHIonPhase1MonitorTrackSoAAlpaka); diff --git a/DataFormats/TrackSoA/interface/TrackDefinitions.h b/DataFormats/TrackSoA/interface/TrackDefinitions.h index c1b84c47ec6e4..56a30bec35dcf 100644 --- a/DataFormats/TrackSoA/interface/TrackDefinitions.h +++ b/DataFormats/TrackSoA/interface/TrackDefinitions.h @@ -1,5 +1,5 @@ -#ifndef DataFormats_Track_interface_TrackDefinitions_h -#define DataFormats_Track_interface_TrackDefinitions_h +#ifndef DataFormats_TrackSoA_interface_TrackDefinitions_h +#define DataFormats_TrackSoA_interface_TrackDefinitions_h #include #include #include diff --git a/DataFormats/TrackSoA/interface/TracksDevice.h b/DataFormats/TrackSoA/interface/TracksDevice.h index f22dae48b5cf4..38c61aec7cfe0 100644 --- a/DataFormats/TrackSoA/interface/TracksDevice.h +++ b/DataFormats/TrackSoA/interface/TracksDevice.h @@ -1,5 +1,5 @@ -#ifndef DataFormats_Track_interface_TracksDevice_h -#define DataFormats_Track_interface_TracksDevice_h +#ifndef DataFormats_TrackSoA_interface_TracksDevice_h +#define DataFormats_TrackSoA_interface_TracksDevice_h #include @@ -10,36 +10,9 @@ #include "DataFormats/TrackSoA/interface/TrackDefinitions.h" #include "DataFormats/TrackSoA/interface/TracksSoA.h" -// TODO: The class is created via inheritance of the PortableCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TracksDevice : public PortableDeviceCollection, TDev> { -public: - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime - - TracksDevice(edm::Uninitialized) - : PortableDeviceCollection, TDev>{edm::kUninitialized} { - } // necessary for ROOT dictionaries - - using PortableDeviceCollection, TDev>::view; - using PortableDeviceCollection, TDev>::const_view; - using PortableDeviceCollection, TDev>::buffer; - - // Constructor which specifies the SoA size - template - explicit TracksDevice(TQueue& queue) : PortableDeviceCollection, TDev>(S, queue) {} -}; - -namespace pixelTrack { - - template - using TracksDevicePhase1 = TracksDevice; +namespace reco { template - using TracksDeviceHIonPhase1 = TracksDevice; - template - using TracksDevicePhase2 = TracksDevice; - -} // namespace pixelTrack + using TracksDevice = PortableDeviceMultiCollection; +} #endif // DataFormats_Track_TracksDevice_H diff --git a/DataFormats/TrackSoA/interface/TracksHost.h b/DataFormats/TrackSoA/interface/TracksHost.h index 0b56457f58bce..e462764bdc7bd 100644 --- a/DataFormats/TrackSoA/interface/TracksHost.h +++ b/DataFormats/TrackSoA/interface/TracksHost.h @@ -1,47 +1,11 @@ -#ifndef DataFormats_Track_TracksHost_H -#define DataFormats_Track_TracksHost_H +#ifndef DataFormats_TrackSoA_interface_TracksHost_H +#define DataFormats_TrackSoA_interface_TracksHost_H -#include - -#include - -#include "DataFormats/Common/interface/Uninitialized.h" #include "DataFormats/Portable/interface/PortableHostCollection.h" -#include "DataFormats/TrackSoA/interface/TrackDefinitions.h" #include "DataFormats/TrackSoA/interface/TracksSoA.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -// TODO: The class is created via inheritance of the PortableHostCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -template -class TracksHost : public PortableHostCollection> { -public: - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; //TODO: this could be made configurable at runtime - - TracksHost(edm::Uninitialized) - : PortableHostCollection>{edm::kUninitialized} { - } // necessary for ROOT dictionaries - - using PortableHostCollection>::view; - using PortableHostCollection>::const_view; - using PortableHostCollection>::buffer; - - // Constructor which specifies the SoA size - template - explicit TracksHost(TQueue& queue) : PortableHostCollection>(S, queue) {} - - // Constructor which specifies the DevHost - explicit TracksHost(alpaka_common::DevHost const& host) - : PortableHostCollection>(S, host) {} -}; - -namespace pixelTrack { - - using TracksHostPhase1 = TracksHost; - using TracksHostPhase2 = TracksHost; - using TracksHostHIonPhase1 = TracksHost; -} // namespace pixelTrack +namespace reco { + using TracksHost = PortableHostMultiCollection; +} -#endif // DataFormats_Track_TracksHost_H +#endif // DataFormats_TrackSoA_interface_TracksHost_H diff --git a/DataFormats/TrackSoA/interface/TracksSoA.h b/DataFormats/TrackSoA/interface/TracksSoA.h index ed4ef2e5a4c93..78f363778c36a 100644 --- a/DataFormats/TrackSoA/interface/TracksSoA.h +++ b/DataFormats/TrackSoA/interface/TracksSoA.h @@ -12,89 +12,124 @@ namespace reco { - template - struct TrackSoA { - static constexpr int32_t S = TrackerTraits::maxNumberOfTuples; - static constexpr int32_t H = TrackerTraits::avgHitsPerTrack; - // Aliases in order to not confuse the GENERATE_SOA_LAYOUT - // macro with weird colons and angled brackets. - using Vector5f = Eigen::Matrix; - using Vector15f = Eigen::Matrix; - using Quality = pixelTrack::Quality; - - using hindex_type = uint32_t; - - using HitContainer = cms::alpakatools::OneToManyAssocSequential; - - GENERATE_SOA_LAYOUT(Layout, - SOA_COLUMN(Quality, quality), - SOA_COLUMN(float, chi2), - SOA_COLUMN(int8_t, nLayers), - SOA_COLUMN(float, eta), - SOA_COLUMN(float, pt), - // state at the beam spot: {phi, tip, 1/pt, cotan(theta), zip} - SOA_EIGEN_COLUMN(Vector5f, state), - SOA_EIGEN_COLUMN(Vector15f, covariance), - SOA_SCALAR(int, nTracks), - SOA_SCALAR(HitContainer, hitIndices), - SOA_SCALAR(HitContainer, detIndices)) - }; - - template - using TrackLayout = typename reco::TrackSoA::template Layout<>; - template - using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; - template - using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; - - /* Implement a type trait to identify the specialisations of TrackSoAConstView - * - * This is done explicitly for all possible pixel topologies, because we did not find a way - * to use template deduction with a partial specialisation. - */ - template - struct IsTrackSoAConstView : std::false_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - template <> - struct IsTrackSoAConstView> : std::true_type {}; - - template - constexpr bool isTrackSoAConstView = IsTrackSoAConstView::value; - - template >> - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float charge(ConstView const& tracks, int32_t i) { + // Aliases in order to not confuse the GENERATE_SOA_LAYOUT + // macro with weird colons and angled brackets. + using Vector5f = Eigen::Matrix; + using Vector15f = Eigen::Matrix; + using Quality = pixelTrack::Quality; + + GENERATE_SOA_LAYOUT(TrackLayout, + SOA_COLUMN(Quality, quality), + SOA_COLUMN(float, chi2), + SOA_COLUMN(int8_t, nLayers), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, pt), + // state at the beam spot: {phi, tip, 1/pt, cotan(theta), zip} + SOA_EIGEN_COLUMN(Vector5f, state), + SOA_EIGEN_COLUMN(Vector15f, covariance), + SOA_SCALAR(int, nTracks), + SOA_COLUMN(uint32_t, hitOffsets)) + + GENERATE_SOA_LAYOUT(TrackHitsLayout, SOA_COLUMN(uint32_t, id), SOA_COLUMN(uint32_t, detId)) + + using TrackSoA = TrackLayout<>; + using TrackSoAView = TrackSoA::View; + using TrackSoAConstView = TrackSoA::ConstView; + + using TrackHitSoA = TrackHitsLayout<>; + using TrackHitSoAView = TrackHitSoA::View; + using TrackHitSoAConstView = TrackHitSoA::ConstView; + + // All these below were constexpr. Now I get this: + // note: non-literal type 'reco::TrackLayout<128, false>::ConstViewTemplateFreeParams<128, false, true, true>::const_element' + // cannot be used in a constant expression + + // TODO: move to use the layer gaps defined in CAGeometry + ALPAKA_FN_HOST_ACC inline int nLayers(const TrackSoAConstView &tracks, + const TrackHitSoAConstView &hits, + uint16_t maxLayers, + uint32_t const *__restrict__ layerStarts, + int32_t i) { + auto start = (i == 0) ? 0 : tracks[i - 1].hitOffsets(); + auto end = tracks[i].hitOffsets(); + auto hitId = hits[start].id(); + int nl = 1; + int ol = 0; + while (hitId >= layerStarts[ol + 1] and ol < maxLayers) + ++ol; + ++start; + for (; start < end; ++start) { + hitId = hits[start].id(); + int il = 0; + while (hitId >= layerStarts[il + 1] and il < maxLayers) + ++il; + if (il != ol) + ++nl; + ol = il; + } + return nl; + } + + ALPAKA_FN_HOST_ACC inline float charge(const TrackSoAConstView &tracks, int32_t i) { //was: std::copysign(1.f, tracks[i].state()(2)). Will be constexpr with C++23 float v = tracks[i].state()(2); return float((0.0f < v) - (v < 0.0f)); } - template >> - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float phi(ConstView const& tracks, int32_t i) { - return tracks[i].state()(0); + ALPAKA_FN_HOST_ACC inline float phi(const TrackSoAConstView &tracks, int32_t i) { return tracks[i].state()(0); } + + ALPAKA_FN_HOST_ACC inline float tip(const TrackSoAConstView &tracks, int32_t i) { return tracks[i].state()(1); } + + ALPAKA_FN_HOST_ACC inline float zip(const TrackSoAConstView &tracks, int32_t i) { return tracks[i].state()(4); } + + ALPAKA_FN_HOST_ACC inline bool isTriplet(const TrackSoAConstView &tracks, int32_t i) { + return tracks[i].nLayers() == 3; } - template >> - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float tip(ConstView const& tracks, int32_t i) { - return tracks[i].state()(1); + ALPAKA_FN_HOST_ACC inline int nHits(const TrackSoAConstView &tracks, int i) { + auto start = (i == 0) ? 0 : tracks[i - 1].hitOffsets(); + return tracks[i].hitOffsets() - start; } - template >> - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr float zip(ConstView const& tracks, int32_t i) { - return tracks[i].state()(4); + // state at the beam spot: { phi, tip, 1/pt, cotan(theta), zip } + + template + ALPAKA_FN_HOST_ACC inline void copyFromCircle( + TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) { + tracks[i].state() << cp.template cast(), lp.template cast(); + + tracks[i].state()(2) = tracks[i].state()(2) * b; + auto cov = tracks[i].covariance(); + cov(0) = ccov(0, 0); + cov(1) = ccov(0, 1); + cov(2) = b * float(ccov(0, 2)); + cov(4) = cov(3) = 0; + cov(5) = ccov(1, 1); + cov(6) = b * float(ccov(1, 2)); + cov(8) = cov(7) = 0; + cov(9) = b * b * float(ccov(2, 2)); + cov(11) = cov(10) = 0; + cov(12) = lcov(0, 0); + cov(13) = lcov(0, 1); + cov(14) = lcov(1, 1); } - template >> - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr bool isTriplet(ConstView const& tracks, int32_t i) { - return tracks[i].nLayers() == 3; + template + ALPAKA_FN_HOST_ACC inline void copyFromDense(TrackSoAView &tracks, V5 const &v, M5 const &cov, int32_t i) { + tracks[i].state() = v.template cast(); + for (int j = 0, ind = 0; j < 5; ++j) + for (auto k = j; k < 5; ++k) + tracks[i].covariance()(ind++) = cov(j, k); + } + + template + ALPAKA_FN_HOST_ACC inline void copyToDense(const TrackSoAConstView &tracks, V5 &v, M5 &cov, int32_t i) { + v = tracks[i].state().template cast(); + for (int j = 0, ind = 0; j < 5; ++j) { + cov(j, j) = tracks[i].covariance()(ind++); + for (auto k = j + 1; k < 5; ++k) + cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++); + } } } // namespace reco diff --git a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h index f50756f3ddbca..8d9a65f69d67c 100644 --- a/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h +++ b/DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h @@ -10,79 +10,6 @@ #include "DataFormats/TrackSoA/interface/TracksSoA.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" -// Methods that operate on View and ConstView of the TrackSoA, and cannot be class methods. -template -struct TracksUtilities { - using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; - using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; - using hindex_type = typename reco::TrackSoA::hindex_type; - - // state at the beam spot: { phi, tip, 1/pt, cotan(theta), zip } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromCircle( - TrackSoAView &tracks, V3 const &cp, M3 const &ccov, V2 const &lp, M2 const &lcov, float b, int32_t i) { - tracks[i].state() << cp.template cast(), lp.template cast(); - - tracks[i].state()(2) = tracks[i].state()(2) * b; - auto cov = tracks[i].covariance(); - cov(0) = ccov(0, 0); - cov(1) = ccov(0, 1); - cov(2) = b * float(ccov(0, 2)); - cov(4) = cov(3) = 0; - cov(5) = ccov(1, 1); - cov(6) = b * float(ccov(1, 2)); - cov(8) = cov(7) = 0; - cov(9) = b * b * float(ccov(2, 2)); - cov(11) = cov(10) = 0; - cov(12) = lcov(0, 0); - cov(13) = lcov(0, 1); - cov(14) = lcov(1, 1); - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyFromDense(TrackSoAView &tracks, - V5 const &v, - M5 const &cov, - int32_t i) { - tracks[i].state() = v.template cast(); - for (int j = 0, ind = 0; j < 5; ++j) - for (auto k = j; k < 5; ++k) - tracks[i].covariance()(ind++) = cov(j, k); - } - - template - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr void copyToDense(const TrackSoAConstView &tracks, - V5 &v, - M5 &cov, - int32_t i) { - v = tracks[i].state().template cast(); - for (int j = 0, ind = 0; j < 5; ++j) { - cov(j, j) = tracks[i].covariance()(ind++); - for (auto k = j + 1; k < 5; ++k) - cov(k, j) = cov(j, k) = tracks[i].covariance()(ind++); - } - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int computeNumberOfLayers(const TrackSoAConstView &tracks, - int32_t i) { - auto pdet = tracks.detIndices().begin(i); - int nl = 1; - auto ol = pixelTopology::getLayer(*pdet); - for (; pdet < tracks.detIndices().end(i); ++pdet) { - auto il = pixelTopology::getLayer(*pdet); - if (il != ol) - ++nl; - ol = il; - } - return nl; - } - - ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE static constexpr int nHits(const TrackSoAConstView &tracks, int i) { - return tracks.detIndices().size(i); - } -}; - namespace pixelTrack { template @@ -90,8 +17,8 @@ namespace pixelTrack { template struct QualityCutsT> { - using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; - using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; + using TrackSoAView = reco::TrackSoAView; + using TrackSoAConstView = reco::TrackSoAConstView; float chi2Coeff[4]; float chi2MaxPt; // GeV float chi2Scale; @@ -152,8 +79,8 @@ namespace pixelTrack { template struct QualityCutsT> { - using TrackSoAView = typename reco::TrackSoA::template Layout<>::View; - using TrackSoAConstView = typename reco::TrackSoA::template Layout<>::ConstView; + using TrackSoAView = reco::TrackSoAView; + using TrackSoAConstView = reco::TrackSoAConstView; float maxChi2; float minPt; @@ -171,8 +98,4 @@ namespace pixelTrack { } // namespace pixelTrack -// TODO: Should those be placed in the ALPAKA_ACCELERATOR_NAMESPACE -template struct TracksUtilities; -template struct TracksUtilities; - #endif // DataFormats_TrackSoA_interface_alpaka_TrackUtilities_h diff --git a/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h b/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h index c9294d693d4c4..0283ab4c645c0 100644 --- a/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h +++ b/DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h @@ -13,42 +13,15 @@ #include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -// TODO: The class is created via inheritance of the PortableCollection. -// This is generally discouraged, and should be done via composition. -// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 - -namespace ALPAKA_ACCELERATOR_NAMESPACE { - - template - using TracksSoACollection = std::conditional_t, - TracksHost, - TracksDevice>; - - //Classes definition for Phase1/Phase2/HIonPhase1, to make the classes_def lighter. Not actually used in the code. - namespace pixelTrack { - using TracksSoACollectionPhase1 = TracksSoACollection; - using TracksSoACollectionPhase2 = TracksSoACollection; - using TracksSoACollectionHIonPhase1 = TracksSoACollection; - } // namespace pixelTrack -} // namespace ALPAKA_ACCELERATOR_NAMESPACE - -namespace cms::alpakatools { - template - struct CopyToHost> { - template - static auto copyAsync(TQueue& queue, TracksDevice const& deviceData) { - ::TracksHost hostData(queue); - alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); -#ifdef GPU_DEBUG - printf("TracksSoACollection: I'm copying to host.\n"); -#endif - return hostData; - } - }; -} // namespace cms::alpakatools - -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionPhase1, pixelTrack::TracksHostPhase1); -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionPhase2, pixelTrack::TracksHostPhase2); -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(pixelTrack::TracksSoACollectionHIonPhase1, pixelTrack::TracksHostHIonPhase1); +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using ::reco::TracksDevice; + using ::reco::TracksHost; + using TracksSoACollection = + std::conditional_t, TracksHost, TracksDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::TracksSoACollection, reco::TracksHost); #endif // DataFormats_TrackSoA_interface_alpaka_TracksSoACollection_h diff --git a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml index 08d7eb9724b34..fec193f9d7b46 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/TrackSoA/src/alpaka/classes_cuda_def.xml @@ -1,16 +1,6 @@ - - - - - - - - - - - - - - + + + + diff --git a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml index 0cd8cf9bb49d7..9410772a49518 100644 --- a/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/TrackSoA/src/alpaka/classes_rocm_def.xml @@ -1,16 +1,6 @@ - - - - - - - - - - - - - - + + + + diff --git a/DataFormats/TrackSoA/src/classes.cc b/DataFormats/TrackSoA/src/classes.cc index 97e00cc5b5638..ec870b118ed7d 100644 --- a/DataFormats/TrackSoA/src/classes.cc +++ b/DataFormats/TrackSoA/src/classes.cc @@ -1,9 +1,5 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" -#include "DataFormats/TrackSoA/interface/TracksSoA.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" using namespace reco; - -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); -// SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); //TODO: For the moment we live without HIons +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(TracksHost); diff --git a/DataFormats/TrackSoA/src/classes.h b/DataFormats/TrackSoA/src/classes.h index c97bb234d7e18..a3e63f1e471f3 100644 --- a/DataFormats/TrackSoA/src/classes.h +++ b/DataFormats/TrackSoA/src/classes.h @@ -5,6 +5,4 @@ #include "DataFormats/TrackSoA/interface/TracksSoA.h" #include "DataFormats/TrackSoA/interface/TracksHost.h" -using namespace pixelTopology; - #endif // DataFormats_TrackSoA_src_classes_h diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml index dcbe554334b93..87f2871042e93 100644 --- a/DataFormats/TrackSoA/src/classes_def.xml +++ b/DataFormats/TrackSoA/src/classes_def.xml @@ -1,28 +1,18 @@ - - - - - - - + + - - - - + + - - - + + + - - - - + + + - - - + diff --git a/DataFormats/TrackSoA/test/BuildFile.xml b/DataFormats/TrackSoA/test/BuildFile.xml index f5cf50149d57e..35e07274b935e 100644 --- a/DataFormats/TrackSoA/test/BuildFile.xml +++ b/DataFormats/TrackSoA/test/BuildFile.xml @@ -11,3 +11,14 @@ + + + + + + + + + + + diff --git a/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc b/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc new file mode 100644 index 0000000000000..88dce4cb951cb --- /dev/null +++ b/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc @@ -0,0 +1,50 @@ +#include "DataFormats/DetId/interface/DetId.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/StreamID.h" + +#include "DataFormats/TrackSoA/interface/TracksHost.h" + +namespace edmtest { + + class TestReadHostTrackSoA : public edm::global::EDAnalyzer<> { + public: + TestReadHostTrackSoA(edm::ParameterSet const&); + void analyze(edm::StreamID, edm::Event const&, edm::EventSetup const&) const override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + private: + edm::EDGetTokenT<::reco::TracksHost> getToken_; + }; + + TestReadHostTrackSoA::TestReadHostTrackSoA(edm::ParameterSet const& iPSet) + : getToken_(consumes(iPSet.getParameter("input"))) {} + + void TestReadHostTrackSoA::analyze(edm::StreamID, edm::Event const& iEvent, edm::EventSetup const&) const { + auto const& tracks = iEvent.get(getToken_); + auto tracksView = tracks.view(); + + for (int i = 0; i < tracksView.metadata().size(); ++i) { + if (tracksView[i].eta() != float(i)) { + throw cms::Exception("TestReadHostTrackSoA Failure") << "TestReadHostTrackSoA::analyze, entry. i = " << i; + } + } + } + + void TestReadHostTrackSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("input"); + descriptions.addDefault(desc); + } +} // namespace edmtest + +using edmtest::TestReadHostTrackSoA; +DEFINE_FWK_MODULE(TestReadHostTrackSoA); diff --git a/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc b/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc new file mode 100644 index 0000000000000..d9c9087cced6c --- /dev/null +++ b/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc @@ -0,0 +1,50 @@ +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "FWCore/Utilities/interface/StreamID.h" + +#include "DataFormats/TrackSoA/interface/TracksHost.h" + +#include +#include +#include + +namespace edmtest { + + class TestWriteHostTrackSoA : public edm::global::EDProducer<> { + public: + TestWriteHostTrackSoA(edm::ParameterSet const&); + void produce(edm::StreamID, edm::Event&, edm::EventSetup const&) const override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + private: + unsigned int trackSize_; + edm::EDPutTokenT<::reco::TracksHost> putToken_; + }; + + TestWriteHostTrackSoA::TestWriteHostTrackSoA(edm::ParameterSet const& iPSet) + : trackSize_(iPSet.getParameter("trackSize")), putToken_(produces()) {} + + void TestWriteHostTrackSoA::produce(edm::StreamID, edm::Event& iEvent, edm::EventSetup const&) const { + ::reco::TracksHost tracks({{int(trackSize_), int(4 * trackSize_)}}, cms::alpakatools::host()); + auto tracksView = tracks.view(); + for (unsigned int i = 0; i < trackSize_; ++i) { + tracksView[i].eta() = float(i); + } + iEvent.emplace(putToken_, std::move(tracks)); + } + + void TestWriteHostTrackSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("trackSize", 1000); + descriptions.addDefault(desc); + } +} // namespace edmtest + +using edmtest::TestWriteHostTrackSoA; +DEFINE_FWK_MODULE(TestWriteHostTrackSoA); diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc index 93e6df967fc84..09b3bebeadcfd 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc @@ -4,7 +4,7 @@ Creates an instance of the class (automatically allocates memory on device), passes the view of the SoA data to - the CUDA kernels which: + the kernels which: - Fill the SoA with data. - Verify that the data written is correct. @@ -31,7 +31,7 @@ #include "TrackSoAHeterogeneous_test.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; -using namespace ALPAKA_ACCELERATOR_NAMESPACE::pixelTrack; +using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; int main() { // Get the list of devices on the current platform @@ -50,14 +50,17 @@ int main() { { // Instantiate tracks on device. PortableDeviceCollection allocates // SoA on device automatically. - TracksSoACollection tracks_d(queue); - testTrackSoA::runKernels(tracks_d.view(), queue); + constexpr auto nTracks = 1000; + constexpr auto nHits = nTracks * 5; + + TracksSoACollection tracks_d({{nTracks, nHits}}, queue); + testTrackSoA::runKernels(tracks_d.view(), queue); // Instantate tracks on host. This is where the data will be // copied to from device. - TracksHost tracks_h(queue); + ::reco::TracksHost tracks_h({{nTracks, nHits}}, queue); - std::cout << tracks_h.view().metadata().size() << std::endl; + std::cout << "no. of tracks = " << tracks_h.view().metadata().size() << std::endl; alpaka::memcpy(queue, tracks_h.buffer(), tracks_d.const_buffer()); alpaka::wait(queue); @@ -77,7 +80,7 @@ int main() { for (int i = 0; i < 10; ++i) { std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2() << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t" - << tracks_h.view().hitIndices().off[i] << std::endl; + << tracks_h.view()[i].hitOffsets() << std::endl; } } } diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc index 744dd7c4d1ca3..ad6b92ead096f 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.dev.cc @@ -20,10 +20,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // Kernel which fills the TrackSoAView with data // to test writing to it - template class TestFillKernel { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, TrackSoAView tracks_view, int32_t nTracks) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, TrackSoAView tracks_view, int32_t nTracks) const { if (cms::alpakatools::once_per_grid(acc)) { tracks_view.nTracks() = nTracks; } @@ -34,19 +33,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tracks_view[j].chi2() = (float)j; tracks_view[j].quality() = (Quality)(j % 256); tracks_view[j].nLayers() = j % 128; - tracks_view.hitIndices().off[j] = j; + tracks_view[j].hitOffsets() = j; } } }; // Kernel which reads from the TrackSoAView to verify // that it was written correctly from the fill kernel - template class TestVerifyKernel { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - TrackSoAConstView tracks_view, - int32_t nTracks) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, TrackSoAConstView tracks_view, int32_t nTracks) const { if (cms::alpakatools::once_per_grid(acc)) { ALPAKA_ASSERT(tracks_view.nTracks() == nTracks); } @@ -56,24 +52,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT(abs(tracks_view[j].chi2() - (float)j) < .0001); ALPAKA_ASSERT(tracks_view[j].quality() == (Quality)(j % 256)); ALPAKA_ASSERT(tracks_view[j].nLayers() == j % 128); - ALPAKA_ASSERT(tracks_view.hitIndices().off[j] == uint32_t(j)); + ALPAKA_ASSERT(tracks_view[j].hitOffsets() == uint32_t(j)); } } }; // Host function which invokes the two kernels above - template - void runKernels(TrackSoAView tracks_view, Queue& queue) { + void runKernels(TrackSoAView tracks_view, Queue& queue) { int32_t tracks = 420; uint32_t items = 64; uint32_t groups = divide_up_by(tracks, items); auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view, tracks); - alpaka::exec(queue, workDiv, TestVerifyKernel{}, tracks_view, tracks); + alpaka::exec(queue, workDiv, TestFillKernel{}, tracks_view, tracks); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, tracks_view, tracks); } - template void runKernels(TrackSoAView tracks_view, Queue& queue); - template void runKernels(TrackSoAView tracks_view, Queue& queue); - } // namespace testTrackSoA + } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.h b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.h index 9443559566ce1..9fbc9b781af63 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.h +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.h @@ -6,8 +6,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackSoA { - template - void runKernels(reco::TrackSoAView tracks_view, Queue& queue); + void runKernels(::reco::TrackSoAView tracks_view, Queue& queue); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackSoA diff --git a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc index 507d3dde5e120..7c3bc1ababfe4 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc @@ -23,7 +23,7 @@ // Each test binary is built for a single Alpaka backend. using namespace ALPAKA_ACCELERATOR_NAMESPACE; - +using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; int main() { // Get the list of devices on the current platform. auto const& devices = cms::alpakatools::devices(); @@ -39,7 +39,7 @@ int main() { // Inner scope to deallocate memory before destroying the stream. { - TracksSoACollection tracks_d(queue); + TracksSoACollection tracks_d({{1000, 5000}}, queue); test::testTrackSoA(queue, tracks_d.view()); diff --git a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.dev.cc b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.dev.cc index 8a69df26e9d35..5ceb5dfbbd514 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.dev.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.dev.cc @@ -33,9 +33,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { template struct TestTrackSoA { - using Utils = TracksUtilities; - - ALPAKA_FN_ACC void operator()(Acc1D const& acc, reco::TrackSoAView tracks) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, reco::TrackSoAView tracks) const { Vector5d par0; par0 << 0.2, 0.1, 3.5, 0.8, 0.1; Vector5d e0; @@ -43,10 +41,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { Matrix5d cov0 = buildCovariance(e0); for (auto i : uniform_elements(acc, tracks.metadata().size())) { - Utils::copyFromDense(tracks, par0, cov0, i); + reco::copyFromDense(tracks, par0, cov0, i); Vector5d par1; Matrix5d cov1; - Utils::copyToDense(tracks, par1, cov1, i); + reco::copyToDense(tracks, par1, cov1, i); Vector5d deltaV = par1 - par0; Matrix5d deltaM = cov1 - cov0; for (int j = 0; j < 5; ++j) { @@ -64,12 +62,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { } // namespace template - void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks) { + void testTrackSoA(Queue& queue, ::reco::TrackSoAView& tracks) { auto grid = make_workdiv(1, 64); alpaka::exec(queue, grid, TestTrackSoA{}, tracks); } - template void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks); - template void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks); + template void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks); + template void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.h b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.h index a6d7688db3698..61b9cd150e615 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.h +++ b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.h @@ -8,7 +8,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::test { template - void testTrackSoA(Queue& queue, reco::TrackSoAView& tracks); + void testTrackSoA(Queue& queue, ::reco::TrackSoAView& tracks); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::test diff --git a/DataFormats/TrackSoA/test/testReadHostTrackSoA.py b/DataFormats/TrackSoA/test/testReadHostTrackSoA.py new file mode 100644 index 0000000000000..965dec6b93c17 --- /dev/null +++ b/DataFormats/TrackSoA/test/testReadHostTrackSoA.py @@ -0,0 +1,20 @@ +import FWCore.ParameterSet.Config as cms +import sys + +process = cms.Process("READ") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:"+sys.argv[1])) + +process.testReadHostTrackSoA = cms.EDAnalyzer("TestReadHostTrackSoA", + input = cms.InputTag("trackSoA", "", "WRITE") +) + +process.out = cms.OutputModule("PoolOutputModule", + fileName = cms.untracked.string('testTrackSoAReader.root'), + fastCloning = cms.untracked.bool(False) +) + +process.path = cms.Path(process.testReadHostTrackSoA) + +process.endPath = cms.EndPath(process.out) + diff --git a/DataFormats/TrackSoA/test/testWriteAndReadTrackSoA.sh b/DataFormats/TrackSoA/test/testWriteAndReadTrackSoA.sh new file mode 100755 index 0000000000000..a11438ba64fbf --- /dev/null +++ b/DataFormats/TrackSoA/test/testWriteAndReadTrackSoA.sh @@ -0,0 +1,22 @@ +echo '#### Test Writing and Reading TrackSoA' + +scriptdir=$CMSSW_BASE/src/DataFormats/TrackSoA/test/ + +echo '> Writing' + +cmsRun ${scriptdir}/testWriteHostTrackSoA.py testTrackSoa.root + +if [ $? -ne 0 ]; then + exit 1; +fi + +echo '> Reading' + +cmsRun ${scriptdir}/testReadHostTrackSoA.py testTrackSoa.root + +if [ $? -ne 0 ]; then + exit 1; +fi + +echo '>>>> Done! <<<<' + diff --git a/DataFormats/TrackSoA/test/testWriteHostTrackSoA.py b/DataFormats/TrackSoA/test/testWriteHostTrackSoA.py new file mode 100644 index 0000000000000..8ad14e5960588 --- /dev/null +++ b/DataFormats/TrackSoA/test/testWriteHostTrackSoA.py @@ -0,0 +1,22 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("WRITE") + +process.load("FWCore.MessageService.MessageLogger_cfi") + +process.source = cms.Source("EmptySource") +process.maxEvents.input = 5 + +process.trackSoA = cms.EDProducer("TestWriteHostTrackSoA", + trackSize = cms.uint32(2708) +) + +process.out = cms.OutputModule("PoolOutputModule", + fileName = cms.untracked.string(sys.argv[1]) +) + +process.path = cms.Path(process.trackSoA) +process.endPath = cms.EndPath(process.out) + +'testTrackSoAWriter.root' + diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h index 885aba8f106a5..96455bf938a87 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h @@ -9,51 +9,73 @@ #include "DataFormats/Portable/interface/PortableDeviceCollection.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" -template -class TrackingRecHitDevice : public PortableDeviceCollection, TDev> { -public: - using hitSoA = TrackingRecHitSoA; - - // Need to decorate the class with the inherited portable accessors being now a template - using PortableDeviceCollection, TDev>::view; - using PortableDeviceCollection, TDev>::const_view; - using PortableDeviceCollection, TDev>::buffer; - - TrackingRecHitDevice(edm::Uninitialized) - : PortableDeviceCollection, TDev>{edm::kUninitialized} {} - - // Constructor which specifies the SoA size, number of BPIX1 hits, and the modules entry points - template - explicit TrackingRecHitDevice(TQueue queue, uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart) - : PortableDeviceCollection, TDev>(nHits, queue), offsetBPIX2_{offsetBPIX2} { - auto start_h = cms::alpakatools::make_device_view(queue, hitsModuleStart, TrackerTraits::numberOfModules + 1); - auto start_d = - cms::alpakatools::make_device_view(queue, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1); - alpaka::memcpy(queue, start_d, start_h); - - auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_); - auto off_d = cms::alpakatools::make_device_view(queue, view().offsetBPIX2()); - alpaka::memcpy(queue, off_d, off_h); - } - - uint32_t nHits() const { return view().metadata().size(); } - - int32_t offsetBPIX2() const { return offsetBPIX2_; } - - uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } - - // asynchronously update the information cached within the class itself from the information on the device - template - void updateFromDevice(TQueue queue) { - auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_); - auto off_d = cms::alpakatools::make_device_view(queue, view().offsetBPIX2()); - alpaka::memcpy(queue, off_h, off_d); - } - -private: - // offsetBPIX2 is used on host functions so is useful to have it also stored in the class and not only in the layout - int32_t offsetBPIX2_ = 0; -}; +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 + +namespace reco { + + template + using HitPortableCollectionDevice = PortableDeviceMultiCollection; + + template + class TrackingRecHitDevice : public HitPortableCollectionDevice { + public: + TrackingRecHitDevice() = default; + + TrackingRecHitDevice(edm::Uninitialized) : HitPortableCollectionDevice{edm::kUninitialized} {} + + // Constructor which specifies only the SoA size, to be used when copying the results from host to device + template + explicit TrackingRecHitDevice(TQueue queue, uint32_t nHits, uint32_t nModules) + : HitPortableCollectionDevice({{int(nHits), int(nModules + 1)}}, queue) {} + + // N.B. why this + 1? Because the HitModulesLayout is holding the + // moduleStart vector that is a cumulative sum of all the hits + // in each module. The extra element of the array (the last one) + // is used to hold the total number of hits. We are "hiding" this + // in the constructor so that one can build the TrackingRecHit class + // in a more natural way, just using the number of needed modules. + + // Constructor from clusters + template + explicit TrackingRecHitDevice(TQueue queue, SiPixelClustersDevice const &clusters) + : HitPortableCollectionDevice({{int(clusters.nClusters()), clusters.view().metadata().size()}}, queue), + offsetBPIX2_{clusters.offsetBPIX2()} { + auto hitsView = this->template view(); + auto modsView = this->template view(); + + auto nModules = clusters.view().metadata().size(); + + auto clusters_m = cms::alpakatools::make_device_view(queue, clusters.view().clusModuleStart(), nModules); + auto hits_m = cms::alpakatools::make_device_view(queue, modsView.moduleStart(), nModules); + + alpaka::memcpy(queue, hits_m, clusters_m); + + auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_); + auto off_d = cms::alpakatools::make_device_view(queue, hitsView.offsetBPIX2()); + alpaka::memcpy(queue, off_d, off_h); + } + + uint32_t nHits() const { return this->template view().metadata().size(); } + uint32_t nModules() const { return this->template view().metadata().size() - 1; } + + int32_t offsetBPIX2() const { return offsetBPIX2_; } + + // asynchronously update the information cached within the class itself from the information on the device + template + void updateFromDevice(TQueue queue) { + auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_); + auto off_d = cms::alpakatools::make_device_view(queue, this->template view().offsetBPIX2()); + alpaka::memcpy(queue, off_h, off_d); + } + + private: + // offsetBPIX2 is used on host functions so is useful to have it also stored in the class and not only in the layout + int32_t offsetBPIX2_ = 0; + }; +} // namespace reco #endif // DataFormats_RecHits_interface_TrackingRecHitSoADevice_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h index 1480236f9517b..528f1a2205689 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h @@ -8,47 +8,55 @@ #include "DataFormats/Common/interface/Uninitialized.h" #include "DataFormats/Portable/interface/PortableHostCollection.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -template -class TrackingRecHitHost : public PortableHostCollection> { -public: - using hitSoA = TrackingRecHitSoA; +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 - // Need to decorate the class with the inherited portable accessors being now a template - using PortableHostCollection>::view; - using PortableHostCollection>::const_view; - using PortableHostCollection>::buffer; +namespace reco { - TrackingRecHitHost(edm::Uninitialized) - : PortableHostCollection>{edm::kUninitialized} {} + using HitPortableCollectionHost = PortableHostMultiCollection; - // Constructor which specifies only the SoA size, to be used when copying the results from the device to the host - template - explicit TrackingRecHitHost(TQueue queue, uint32_t nHits) - : PortableHostCollection>(nHits, queue) {} + class TrackingRecHitHost : public HitPortableCollectionHost { + public: + TrackingRecHitHost(edm::Uninitialized) + : PortableHostMultiCollection{edm::kUninitialized} {} - // Constructor which specifies the SoA size, number of BPIX1 hits, and the modules entry points - template - explicit TrackingRecHitHost(TQueue queue, uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart) - : PortableHostCollection>(nHits, queue) { - std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().data()); - view().offsetBPIX2() = offsetBPIX2; - } + // Constructor which specifies only the SoA size, to be used when copying the results from the device to the host + template + explicit TrackingRecHitHost(TQueue queue, uint32_t nHits, uint32_t nModules) + : HitPortableCollectionHost({{int(nHits), int(nModules + 1)}}, queue) {} + // Why this +1? See TrackingRecHitDevice.h constructor for an explanation - uint32_t nHits() const { return view().metadata().size(); } + // Constructor from clusters + template + explicit TrackingRecHitHost(TQueue queue, SiPixelClustersHost const &clusters) + : HitPortableCollectionHost({{int(clusters.nClusters()), clusters.view().metadata().size()}}, queue) { + auto hitsView = this->template view(); + auto modsView = this->template view(); - int32_t offsetBPIX2() const { return view().offsetBPIX2(); } + auto nModules = clusters.view().metadata().size(); - uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } + auto clusters_m = cms::alpakatools::make_host_view(clusters.view().clusModuleStart(), nModules); + auto hits_m = cms::alpakatools::make_host_view(modsView.moduleStart(), nModules); - // do nothing for a host collection - template - void updateFromDevice(TQueue) {} -}; + alpaka::memcpy(queue, hits_m, clusters_m); -using TrackingRecHitHostPhase1 = TrackingRecHitHost; -using TrackingRecHitHostPhase2 = TrackingRecHitHost; -using TrackingRecHitHostHIonPhase1 = TrackingRecHitHost; + hitsView.offsetBPIX2() = clusters.offsetBPIX2(); + } + + uint32_t nHits() const { return this->template view().metadata().size(); } + uint32_t nModules() const { return this->template view().metadata().size() - 1; } + + int32_t offsetBPIX2() const { return this->template view().offsetBPIX2(); } + + // do nothing for a host collection + template + void updateFromDevice(TQueue) {} + }; + +} // namespace reco #endif // DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h index 679bdc48da262..cb1574f8a31b3 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h @@ -8,22 +8,9 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" -template -struct TrackingRecHitSoA { - using hindex_type = typename TrackerTraits::hindex_type; - using PhiBinner = cms::alpakatools::HistoContainer; //28 for phase2 geometry - using PhiBinnerView = typename PhiBinner::View; - using PhiBinnerStorageType = typename PhiBinner::index_type; - using AverageGeometry = pixelTopology::AverageGeometryT; - using HitLayerStartArray = std::array; - using HitModuleStartArray = std::array; - - GENERATE_SOA_LAYOUT(Layout, +namespace reco { + + GENERATE_SOA_LAYOUT(TrackingHitsLayout, SOA_COLUMN(float, xLocal), SOA_COLUMN(float, yLocal), SOA_COLUMN(float, xerrLocal), @@ -37,19 +24,38 @@ struct TrackingRecHitSoA { SOA_COLUMN(int16_t, clusterSizeX), SOA_COLUMN(int16_t, clusterSizeY), SOA_COLUMN(uint16_t, detectorIndex), - SOA_SCALAR(int32_t, offsetBPIX2), - SOA_COLUMN(PhiBinnerStorageType, phiBinnerStorage), - SOA_SCALAR(HitModuleStartArray, hitsModuleStart), - SOA_SCALAR(HitLayerStartArray, hitsLayerStart), - SOA_SCALAR(AverageGeometry, averageGeometry), - SOA_SCALAR(PhiBinner, phiBinner)); -}; - -template -using TrackingRecHitLayout = typename TrackingRecHitSoA::template Layout<>; -template -using TrackingRecHitSoAView = typename TrackingRecHitSoA::template Layout<>::View; -template -using TrackingRecHitSoAConstView = typename TrackingRecHitSoA::template Layout<>::ConstView; + SOA_SCALAR(int32_t, offsetBPIX2)); + + GENERATE_SOA_LAYOUT(HitModulesLayout, SOA_COLUMN(uint32_t, moduleStart)); + + // N.B. this layout is not really included by default in the hits SoA + // This holds the needed parameters to activate (via ONLY_TRIPLETS_IN_HOLE) the + // calculations to check if a triplet points to the disk hole + // and then retain only those that fulfil this requirement. + // At the moment this feature is not fully (re)implemented. + + GENERATE_SOA_LAYOUT(AverageGeometryLayout, + SOA_COLUMN(float, ladderZ), + SOA_COLUMN(float, ladderX), + SOA_COLUMN(float, ladderY), + SOA_COLUMN(float, ladderR), + SOA_COLUMN(float, ladderMinZ), + SOA_COLUMN(float, ladderMaxZ), + SOA_SCALAR(int32_t, endCapZPos), + SOA_SCALAR(int32_t, endCapZNeg)) + + using TrackingRecHitSoA = TrackingHitsLayout<>; + using TrackingRecHitView = TrackingRecHitSoA::View; + using TrackingRecHitConstView = TrackingRecHitSoA::ConstView; + + using HitModuleSoA = HitModulesLayout<>; + using HitModuleSoAView = HitModuleSoA::View; + using HitModuleSoAConstView = HitModuleSoA::ConstView; + + using AverageGeometrySoA = AverageGeometryLayout<>; + using AverageGeometryView = AverageGeometrySoA::View; + using AverageGeometryConstView = AverageGeometrySoA::ConstView; + +}; // namespace reco #endif diff --git a/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h index 7c3fef745c669..6750a42334bcb 100644 --- a/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h +++ b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h @@ -1,6 +1,8 @@ #ifndef DataFormats_TrackingRecHitSoA_interface_alpaka_TrackingRecHitsSoACollection_h #define DataFormats_TrackingRecHitSoA_interface_alpaka_TrackingRecHitsSoACollection_h +// #define GPU_DEBUG + #include #include @@ -12,35 +14,31 @@ #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -namespace ALPAKA_ACCELERATOR_NAMESPACE { +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { - template using TrackingRecHitsSoACollection = std::conditional_t, - TrackingRecHitHost, - TrackingRecHitDevice>; - - // Classes definition for Phase1/Phase2, to make the classes_def lighter. Not actually used in the code. - using TrackingRecHitSoAPhase1 = TrackingRecHitsSoACollection; - using TrackingRecHitSoAPhase2 = TrackingRecHitsSoACollection; - using TrackingRecHitSoAHIonPhase1 = TrackingRecHitsSoACollection; - -} // namespace ALPAKA_ACCELERATOR_NAMESPACE + ::reco::TrackingRecHitHost, + ::reco::TrackingRecHitDevice>; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco namespace cms::alpakatools { - template - struct CopyToHost> { + template + struct CopyToHost<::reco::TrackingRecHitDevice> { template - static auto copyAsync(TQueue& queue, TrackingRecHitDevice const& deviceData) { - TrackingRecHitHost hostData(queue, deviceData.view().metadata().size()); + static auto copyAsync(TQueue& queue, ::reco::TrackingRecHitDevice const& deviceData) { + auto nHits = deviceData.nHits(); + + reco::TrackingRecHitHost hostData(queue, nHits, deviceData.nModules()); // Don't bother if zero hits - if (deviceData.view().metadata().size() == 0) { - std::memset(hostData.buffer().data(), - 0, - alpaka::getExtentProduct(hostData.buffer()) * - sizeof(alpaka::Elem::Buffer>)); + if (nHits == 0) { + std::memset( + hostData.buffer().data(), + 0, + alpaka::getExtentProduct(hostData.buffer()) * sizeof(alpaka::Elem)); return hostData; } @@ -49,30 +47,47 @@ namespace cms::alpakatools { printf("TrackingRecHitsSoACollection: I'm copying to host.\n"); alpaka::wait(queue); assert(deviceData.nHits() == hostData.nHits()); + assert(deviceData.nModules() == hostData.nModules()); assert(deviceData.offsetBPIX2() == hostData.offsetBPIX2()); #endif + return hostData; } + }; - // Update the contents address of the phiBinner histo container after the copy from device happened - static void postCopy(TrackingRecHitHost& hostData) { - // Don't bother if zero hits - if (hostData.view().metadata().size() == 0) { - return; + template <> + struct CopyToDevice<::reco::TrackingRecHitHost> { + template + static auto copyAsync(TQueue& queue, reco::TrackingRecHitHost const& hostData) { + using TDevice = typename alpaka::trait::DevType::type; + + auto nHits = hostData.nHits(); + + reco::TrackingRecHitDevice deviceData(queue, nHits, hostData.nModules()); + + if (nHits == 0) { + std::memset( + deviceData.buffer().data(), + 0, + alpaka::getExtentProduct(deviceData.buffer()) * sizeof(alpaka::Elem)); + return deviceData; } - typename TrackingRecHitSoA::PhiBinnerView pbv; - pbv.assoc = &(hostData.view().phiBinner()); - pbv.offSize = -1; - pbv.offStorage = nullptr; - pbv.contentSize = hostData.nHits(); - pbv.contentStorage = hostData.view().phiBinnerStorage(); - hostData.view().phiBinner().initStorage(pbv); + + alpaka::memcpy(queue, deviceData.buffer(), hostData.buffer()); + +#ifdef GPU_DEBUG + printf("TrackingRecHitsSoACollection: I'm copying to device.\n"); + alpaka::wait(queue); + assert(deviceData.nHits() == hostData.nHits()); + assert(deviceData.nModules() == hostData.nModules()); + assert(deviceData.offsetBPIX2() == hostData.offsetBPIX2()); +#endif + return deviceData; } }; + } // namespace cms::alpakatools -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase1, TrackingRecHitHostPhase1); -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase2, TrackingRecHitHostPhase2); -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAHIonPhase1, TrackingRecHitHostHIonPhase1); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::TrackingRecHitsSoACollection, reco::TrackingRecHitHost); #endif // DataFormats_TrackingRecHitSoA_interface_alpaka_TrackingRecHitsSoACollection_h diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml index 80c267b57d585..95532fe05d10d 100644 --- a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml @@ -1,16 +1,6 @@ - - - - - - - - - - - - - - + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml index bc4c969137121..2aa4145b5dd88 100644 --- a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml @@ -1,17 +1,6 @@ - - - - - - - - - - - - - - + + + diff --git a/DataFormats/TrackingRecHitSoA/src/classes.cc b/DataFormats/TrackingRecHitSoA/src/classes.cc index bbcc923b04373..f6a086b6c8c42 100644 --- a/DataFormats/TrackingRecHitSoA/src/classes.cc +++ b/DataFormats/TrackingRecHitSoA/src/classes.cc @@ -1,7 +1,5 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" -#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); -SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); +using namespace reco; +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(HitPortableCollectionHost); diff --git a/DataFormats/TrackingRecHitSoA/src/classes.h b/DataFormats/TrackingRecHitSoA/src/classes.h index d405a88ed6ace..c23caaa2d916b 100644 --- a/DataFormats/TrackingRecHitSoA/src/classes.h +++ b/DataFormats/TrackingRecHitSoA/src/classes.h @@ -4,8 +4,6 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -using namespace pixelTopology; +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" #endif // DataFormats_TrackingRecHitSoA_src_classes_h diff --git a/DataFormats/TrackingRecHitSoA/src/classes_def.xml b/DataFormats/TrackingRecHitSoA/src/classes_def.xml index 54c0a3d30a365..2a46ab515133d 100644 --- a/DataFormats/TrackingRecHitSoA/src/classes_def.xml +++ b/DataFormats/TrackingRecHitSoA/src/classes_def.xml @@ -1,28 +1,22 @@ - - - - - - - + + - - - - + + + - - - + + - - - - + + + - - - + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/BuildFile.xml b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml index 5b61a3460fb7d..deb0d370ede76 100644 --- a/DataFormats/TrackingRecHitSoA/test/BuildFile.xml +++ b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml @@ -4,3 +4,14 @@ + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc b/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc new file mode 100644 index 0000000000000..f28166559febe --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc @@ -0,0 +1,54 @@ +#include "DataFormats/DetId/interface/DetId.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDAnalyzer.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDGetToken.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "FWCore/Utilities/interface/StreamID.h" + +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" + +#include + +namespace edmtest { + + class TestReadHostHitSoA : public edm::global::EDAnalyzer<> { + public: + TestReadHostHitSoA(edm::ParameterSet const&); + void analyze(edm::StreamID, edm::Event const&, edm::EventSetup const&) const override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + using HitsOnHost = ::reco::TrackingRecHitHost; + + private: + edm::EDGetTokenT getToken_; + }; + + TestReadHostHitSoA::TestReadHostHitSoA(edm::ParameterSet const& iPSet) + : getToken_(consumes(iPSet.getParameter("input"))) {} + + void TestReadHostHitSoA::analyze(edm::StreamID, edm::Event const& iEvent, edm::EventSetup const&) const { + auto const& hits = iEvent.get(getToken_); + auto hitsView = hits.view(); + + for (int i = 0; i < hitsView.metadata().size(); ++i) { + if (hitsView[i].xGlobal() != float(i)) { + throw cms::Exception("TestWriteHostHitSoA Failure") << "TestReadHostHitSoA::analyze, entry. i = " << i; + } + } + } + + void TestReadHostHitSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("input"); + descriptions.addDefault(desc); + } +} // namespace edmtest + +using edmtest::TestReadHostHitSoA; +DEFINE_FWK_MODULE(TestReadHostHitSoA); diff --git a/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc b/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc new file mode 100644 index 0000000000000..02488a3c133aa --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc @@ -0,0 +1,52 @@ +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/EDPutToken.h" +#include "FWCore/Utilities/interface/StreamID.h" + +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" + +#include +#include +#include + +namespace edmtest { + + class TestWriteHostHitSoA : public edm::global::EDProducer<> { + public: + TestWriteHostHitSoA(edm::ParameterSet const&); + void produce(edm::StreamID, edm::Event&, edm::EventSetup const&) const override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + + using HitsOnHost = ::reco::TrackingRecHitHost; + + private: + unsigned int hitSize_; + edm::EDPutTokenT putToken_; + }; + + TestWriteHostHitSoA::TestWriteHostHitSoA(edm::ParameterSet const& iPSet) + : hitSize_(iPSet.getParameter("hitSize")), putToken_(produces()) {} + + void TestWriteHostHitSoA::produce(edm::StreamID, edm::Event& iEvent, edm::EventSetup const&) const { + HitsOnHost hits(cms::alpakatools::host(), hitSize_, 100); + auto hitsView = hits.view(); + for (unsigned int i = 0; i < hitSize_; ++i) { + hitsView[i].xGlobal() = float(i); + } + iEvent.emplace(putToken_, std::move(hits)); + } + + void TestWriteHostHitSoA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("hitSize", 1000); + descriptions.addDefault(desc); + } +} // namespace edmtest + +using edmtest::TestWriteHostHitSoA; +DEFINE_FWK_MODULE(TestWriteHostHitSoA); diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc index 8f317cc2725f0..eaf499345388d 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc @@ -3,6 +3,7 @@ #include +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" #include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" @@ -16,6 +17,7 @@ #include "Hits_test.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; +using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; int main() { // Get the list of devices on the current platform @@ -34,33 +36,69 @@ int main() { { uint32_t nHits = 2000; int32_t offset = 100; - auto moduleStartH = - cms::alpakatools::make_host_buffer(queue, pixelTopology::Phase1::numberOfModules + 1); - for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; ++i) { + uint32_t nModules = 200; + + SiPixelClustersSoACollection clusters(nModules, queue); + clusters.setNClusters(nHits, offset); + + auto moduleStartH = cms::alpakatools::make_host_buffer(queue, nModules + 1); + + for (size_t i = 0; i < nModules + 1; ++i) { moduleStartH[i] = i * 2; } - auto moduleStartD = - cms::alpakatools::make_device_buffer(queue, pixelTopology::Phase1::numberOfModules + 1); + + auto hitsX = cms::alpakatools::make_host_buffer(queue, nHits); + for (size_t i = 0; i < nHits; ++i) { + hitsX[i] = float(i) * 2; + } + + auto moduleStartD = cms::alpakatools::make_device_view(queue, clusters.view().clusModuleStart(), nHits); alpaka::memcpy(queue, moduleStartD, moduleStartH); - TrackingRecHitsSoACollection tkhit(queue, nHits, offset, moduleStartD.data()); - testTrackingRecHitSoA::runKernels(tkhit.view(), queue); + TrackingRecHitsSoACollection tkhit(queue, clusters); + + // exercise the copy of a full column (on device) + auto hitXD = cms::alpakatools::make_device_view(queue, tkhit.view().xLocal(), nHits); + alpaka::memcpy(queue, hitXD, hitsX); + + // exercise the memset of a colum (on device) + auto hitYD = cms::alpakatools::make_device_view(queue, tkhit.view().yGlobal(), nHits); + constexpr float constYG = -14.0458; + std::vector constYV(nHits, constYG); + auto constYGV_v = cms::alpakatools::make_host_view(constYV.data(), nHits); + alpaka::memcpy(queue, hitYD, constYGV_v); + + testTrackingRecHitSoA::runKernels(tkhit.view(), tkhit.view<::reco::HitModuleSoA>(), queue); tkhit.updateFromDevice(queue); #if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED or defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED // requires c++23 to make cms::alpakatools::CopyToHost compile using if constexpr // see https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2593r0.html - TrackingRecHitHost const& host_collection = tkhit; - // wait for the kernel to complete - alpaka::wait(queue); + ::reco::TrackingRecHitHost const& host_collection = tkhit; #else - using CopyT = cms::alpakatools::CopyToHost >; - TrackingRecHitHost host_collection = CopyT::copyAsync(queue, tkhit); - // wait for the kernel and the copy to complete + ::reco::TrackingRecHitHost host_collection = + cms::alpakatools::CopyToHost<::reco::TrackingRecHitDevice>::copyAsync(queue, tkhit); alpaka::wait(queue); - CopyT::postCopy(host_collection); #endif + alpaka::QueueCpuBlocking queue_host{cms::alpakatools::host()}; + + ::reco::TrackingRecHitHost host_collection_2(cms::alpakatools::host(), nHits, nModules); + + // exercise the memset of a colum (on host) + auto hitLYH = cms::alpakatools::make_host_view(host_collection_2.view().yLocal(), nHits); + constexpr float constYL = -27.0855; + std::vector constYLV(nHits, constYL); + auto constYL_v = cms::alpakatools::make_host_view(constYLV.data(), nHits); + alpaka::memcpy(queue_host, hitLYH, constYL_v); + + // wait for the copy above to complete + alpaka::wait(queue_host); + + assert(host_collection.view().xLocal()[12] == 24.); + assert(host_collection.view().yGlobal()[int(nHits / 2)] == constYG); + assert(host_collection_2.view().yLocal()[nHits - 1] == constYL); + assert(tkhit.nHits() == nHits); assert(tkhit.offsetBPIX2() == 22); // set in the kernel assert(tkhit.nHits() == host_collection.nHits()); diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc index 50e7921ffa316..6a37fa05954c0 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -15,11 +15,13 @@ using namespace alpaka; namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace cms::alpakatools; + using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; namespace testTrackingRecHitSoA { - template struct TestFillKernel { - ALPAKA_FN_ACC void operator()(Acc1D const& acc, TrackingRecHitSoAView soa) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, + ::reco::TrackingRecHitView soa, + ::reco::HitModuleSoAView mods) const { const uint32_t i(alpaka::getIdx(acc)[0u]); const uint32_t j(alpaka::getIdx(acc)[0u]); @@ -29,38 +31,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } soa[i].iphi() = i % 10; - soa.hitsLayerStart()[j] = j; + mods[j].moduleStart() = j; } }; - template struct ShowKernel { - ALPAKA_FN_ACC void operator()(Acc1D const& acc, TrackingRecHitSoAConstView soa) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, + ::reco::TrackingRecHitConstView soa, + ::reco::HitModuleSoAView mods) const { if (cms::alpakatools::once_per_grid(acc)) { - printf("nbins = %d\n", soa.phiBinner().nbins()); printf("offsetBPIX = %d\n", soa.offsetBPIX2()); printf("nHits = %d\n", soa.metadata().size()); - //printf("hitsModuleStart[28] = %d\n", soa[28].hitsModuleStart()); + printf("hitsModuleStart[28] = %d\n", mods[28].moduleStart()); } // can be increased to soa.nHits() for debugging - for (uint32_t i : cms::alpakatools::uniform_elements(acc, 10)) { + for (uint32_t i : cms::alpakatools::uniform_elements(acc, soa.metadata().size())) { printf("iPhi %d -> %d\n", i, soa[i].iphi()); + printf("x %d -> %.2f \n", i, soa[i].xLocal()); } } }; - template - void runKernels(TrackingRecHitSoAView& view, Queue& queue) { + void runKernels(::reco::TrackingRecHitView& view, ::reco::HitModuleSoAView& mods, Queue& queue) { uint32_t items = 64; uint32_t groups = divide_up_by(view.metadata().size(), items); auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, view); - alpaka::exec(queue, workDiv, ShowKernel{}, view); + alpaka::exec(queue, workDiv, TestFillKernel{}, view, mods); + alpaka::exec(queue, workDiv, ShowKernel{}, view, mods); } - template void runKernels(TrackingRecHitSoAView& view, Queue& queue); - template void runKernels(TrackingRecHitSoAView& view, Queue& queue); - } // namespace testTrackingRecHitSoA } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h index fcc0d8e80d589..646c036043b34 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h @@ -6,8 +6,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackingRecHitSoA { - template - void runKernels(TrackingRecHitSoAView& hits, Queue& queue); + void runKernels(::reco::TrackingRecHitView& hits, ::reco::HitModuleSoAView& mods, Queue& queue); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackingRecHitSoA diff --git a/DataFormats/TrackingRecHitSoA/test/testReadHostHitSoA.py b/DataFormats/TrackingRecHitSoA/test/testReadHostHitSoA.py new file mode 100644 index 0000000000000..da68a3667a443 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/testReadHostHitSoA.py @@ -0,0 +1,20 @@ +import FWCore.ParameterSet.Config as cms +import sys + +process = cms.Process("READ") + +process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring("file:"+sys.argv[1])) + +process.testReadHostHitSoA = cms.EDAnalyzer("TestReadHostHitSoA", + input = cms.InputTag("hitSoA", "", "WRITE") +) + +process.out = cms.OutputModule("PoolOutputModule", + fileName = cms.untracked.string('testTrackSoAReader.root'), + fastCloning = cms.untracked.bool(False) +) + +process.path = cms.Path(process.testReadHostHitSoA) + +process.endPath = cms.EndPath(process.out) + diff --git a/DataFormats/TrackingRecHitSoA/test/testWriteAndReadHitSoA.sh b/DataFormats/TrackingRecHitSoA/test/testWriteAndReadHitSoA.sh new file mode 100755 index 0000000000000..746623623a57f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/testWriteAndReadHitSoA.sh @@ -0,0 +1,22 @@ +echo '#### Test Writing and Reading TrackingRecHitSoA' + +scriptdir=$CMSSW_BASE/src/DataFormats/TrackingRecHitSoA/test/ + +echo '> Writing' + +cmsRun ${scriptdir}/testWriteHostHitSoA.py testHitSoa.root + +if [ $? -ne 0 ]; then + exit 1; +fi + +echo '> Reading' + +cmsRun ${scriptdir}/testReadHostHitSoA.py testHitSoa.root + +if [ $? -ne 0 ]; then + exit 1; +fi + +echo '>>>> Done! <<<<' + diff --git a/DataFormats/TrackingRecHitSoA/test/testWriteHostHitSoA.py b/DataFormats/TrackingRecHitSoA/test/testWriteHostHitSoA.py new file mode 100644 index 0000000000000..344882440b01e --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/testWriteHostHitSoA.py @@ -0,0 +1,20 @@ +import FWCore.ParameterSet.Config as cms + +process = cms.Process("WRITE") + +process.load("FWCore.MessageService.MessageLogger_cfi") + +process.source = cms.Source("EmptySource") +process.maxEvents.input = 5 + +process.hitSoA = cms.EDProducer("TestWriteHostHitSoA", + hitSize = cms.uint32(2708) +) + +process.out = cms.OutputModule("PoolOutputModule", + fileName = cms.untracked.string(sys.argv[1]) +) + +process.path = cms.Path(process.hitSoA) +process.endPath = cms.EndPath(process.out) + diff --git a/Geometry/CommonTopologies/interface/SimplePixelTopology.h b/Geometry/CommonTopologies/interface/SimplePixelTopology.h index bf4117f4e0a9f..d775302a993f5 100644 --- a/Geometry/CommonTopologies/interface/SimplePixelTopology.h +++ b/Geometry/CommonTopologies/interface/SimplePixelTopology.h @@ -9,7 +9,12 @@ namespace pixelTopology { constexpr auto maxNumberOfLadders = 160; - constexpr uint32_t maxLayers = 28; + constexpr uint8_t maxLayers = 28; + constexpr uint8_t maxPairs = 64; + + // TODO + // Once CUDA is dropped this could be wrapped in #ifdef CA_TRIPLETS_HOLE + // see DataFormats/TrackingRecHitSoa/interface/TrackingRecHitSoA.h template struct AverageGeometryT { @@ -131,7 +136,7 @@ namespace phase1PixelTopology { using pixelTopology::phi0p06; using pixelTopology::phi0p07; - constexpr uint32_t numberOfLayers = 28; + constexpr uint32_t numberOfLayers = 10; constexpr int nPairs = 13 + 2 + 4; constexpr uint16_t numberOfModules = 1856; @@ -186,6 +191,11 @@ namespace phase1PixelTopology { HOST_DEVICE_CONSTANT float maxr[nPairs] = { 20., 9., 9., 20., 7., 7., 5., 5., 20., 6., 6., 5., 5., 20., 20., 9., 9., 9., 9.}; + HOST_DEVICE_CONSTANT float dcaCuts[numberOfLayers] = {0.15, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25}; + + HOST_DEVICE_CONSTANT float thetaCuts[numberOfLayers] = { + 0.002, 0.002, 0.002, 0.002, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003}; + static constexpr uint32_t layerStart[numberOfLayers + 1] = {0, 96, 320, @@ -230,9 +240,36 @@ namespace phase2PixelTopology { 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, // POS Jump (48) 16, 18, 17, 19, 18, 20, 19, 21, 20, 22, 21, 23, 22, 24, // NEG Jump (55) }; - HOST_DEVICE_CONSTANT uint32_t layerStart[numberOfLayers + 1] = { - 0, 216, 432, 612, 864, 972, 1080, 1188, 1296, 1404, 1512, 1620, 1728, 1904, 2080, - 2256, 2432, 2540, 2648, 2756, 2864, 2972, 3080, 3188, 3296, 3472, 3648, 3824, numberOfModules}; + + HOST_DEVICE_CONSTANT uint32_t layerStart[numberOfLayers + 1] = {0, + 216, + 432, + 612, // Barrel + 864, + 972, + 1080, + 1188, + 1296, + 1404, + 1512, + 1620, + 1728, + 1904, + 2080, + 2256, // Fp + 2432, + 2540, + 2648, + 2756, + 2864, + 2972, + 3080, + 3188, + 3296, + 3472, + 3648, + 3824, // Np + numberOfModules}; HOST_DEVICE_CONSTANT int16_t phicuts[nPairs]{ phi0p05, phi0p05, phi0p05, phi0p06, phi0p07, phi0p07, phi0p06, phi0p07, phi0p07, phi0p05, phi0p05, @@ -258,6 +295,17 @@ namespace phase2PixelTopology { 6.0, 5.0, 6.0, 6.0, 6.0, 6.0, 5.0, 6.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 8.0, 8.0, 8.0, 8.0, 6.0, 5.0, 5.0, 5.0, 6.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 11.0, 9.0, 9.0, 9.0, 8.0, 8.0, 8.0, 11.0}; + + HOST_DEVICE_CONSTANT float dcaCuts[numberOfLayers] = {0.15, //BPix1 + 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, + 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, + 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25}; + + HOST_DEVICE_CONSTANT float thetaCuts[numberOfLayers] = {0.002, 0.002, 0.002, 0.002, // BPix + 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, + 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, + 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003}; + } // namespace phase2PixelTopology namespace phase1HIonPixelTopology { @@ -287,6 +335,12 @@ namespace phase1HIonPixelTopology { phi0p09, phi0p09}; + HOST_DEVICE_CONSTANT float dcaCuts[phase1PixelTopology::numberOfLayers] = { + 0.05, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; + + HOST_DEVICE_CONSTANT float thetaCuts[phase1PixelTopology::numberOfLayers] = { + 0.001, 0.001, 0.001, 0.001, 0.002, 0.002, 0.002, 0.002, 0.002, 0.002}; + } // namespace phase1HIonPixelTopology namespace pixelTopology { @@ -299,7 +353,7 @@ namespace pixelTopology { static constexpr uint32_t maxCellNeighbors = 64; static constexpr uint32_t maxCellTracks = 302; - static constexpr uint32_t maxHitsOnTrack = 15; + static constexpr uint32_t maxHitsOnTrack = 20; static constexpr uint32_t maxHitsOnTrackForFullFit = 6; static constexpr uint32_t avgHitsPerTrack = 7; static constexpr uint32_t maxCellsPerHit = 256; @@ -315,7 +369,7 @@ namespace pixelTopology { static constexpr uint32_t maxSizeCluster = 2047; - static constexpr uint32_t getDoubletsFromHistoMaxBlockSize = 64; // for both x and y + static constexpr uint32_t getDoubletsFromHistoMaxBlockSize = 128; // for both x and y static constexpr uint32_t getDoubletsFromHistoMinBlocksPerMP = 16; static constexpr uint16_t last_bpix1_detIndex = 216; diff --git a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py index d52dbb5f12d09..6baca6635d7b8 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/modules/hltPhase2PixelTracksSoA_cfi.py @@ -2,42 +2,47 @@ hltPhase2PixelTracksSoA = cms.EDProducer('CAHitNtupletAlpakaPhase2@alpaka', pixelRecHitSrc = cms.InputTag('hltPhase2SiPixelRecHitsSoA'), - CPE = cms.string('PixelCPEFastParamsPhase2'), ptmin = cms.double(0.9), - CAThetaCutBarrel = cms.double(0.002), - CAThetaCutForward = cms.double(0.003), hardCurvCut = cms.double(0.0328407225), - dcaCutInnerTriplet = cms.double(0.15), - dcaCutOuterTriplet = cms.double(0.25), earlyFishbone = cms.bool(True), lateFishbone = cms.bool(False), fillStatistics = cms.bool(False), minHitsPerNtuplet = cms.uint32(4), - phiCuts = cms.vint32( - 522, 522, 522, 626, 730, 730, 626, 730, 730, 522, 522, - 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, - 522, 522, 522, 522, 522, 522, 522, 730, 730, 730, 730, - 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, - 730, 730, 730, 522, 522, 522, 522, 522, 522, 522, 522 - ), - maxNumberOfDoublets = cms.uint32(5*512*1024), + maxNumberOfDoublets = cms.string(str(5*512*1024)), + maxNumberOfTuples = cms.string(str(32*1024)), + cellPtCut = cms.double(0.85), + cellZ0Cut = cms.double(7.5), + minYsizeB1 = cms.int32(25), + minYsizeB2 = cms.int32(15), + maxDYsize12 = cms.int32(12), + maxDYsize = cms.int32(10), + maxDYPred = cms.int32(20), + avgHitsPerTrack = cms.double(7.0), + avgCellsPerHit = cms.double(6), + avgCellsPerCell = cms.double(0.151), + avgTracksPerCell = cms.double(0.040), minHitsForSharingCut = cms.uint32(10), fitNas4 = cms.bool(False), - doClusterCut = cms.bool(True), - doZ0Cut = cms.bool(True), - doPtCut = cms.bool(True), useRiemannFit = cms.bool(False), doSharedHitCut = cms.bool(True), dupPassThrough = cms.bool(False), useSimpleTripletCleaner = cms.bool(True), - idealConditions = cms.bool(False), - includeJumpingForwardDoublets = cms.bool(True), trackQualityCuts = cms.PSet( maxChi2 = cms.double(5.0), minPt = cms.double(0.9), maxTip = cms.double(0.3), maxZip = cms.double(12.), ), + geometry = cms.PSet( + caDCACuts = cms.vdouble(0.15, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25), + caThetaCuts = cms.vdouble(0.002, 0.002, 0.002, 0.002, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003, 0.003), + startingPairs = cms.vuint32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + pairGraph = cms.vuint32(0, 1, 0, 4, 0, 16, 1, 2, 1, 4, 1, 16, 2, 3, 2, 4, 2, 16, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 0, 2, 0, 5, 0, 17, 0, 6, 0, 18, 1, 3, 1, 5, 1, 17, 1, 6, 1, 18, 11, 12, 12, 13, 13, 14, 14, 15, 23, 24, 24, 25, 25, 26, 26, 27, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 16, 18, 17, 19, 18, 20, 19, 21, 20, 22, 21, 23, 22, 24), + phiCuts = cms.vint32(522, 522, 522, 626, 730, 730, 626, 730, 730, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 522, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 730, 522, 522, 522, 522, 522, 522, 522, 522), + minZ = cms.vdouble(-16, 4, -22, -17, 6, -22, -18, 11, -22, 23, 30, 39, 50, 65, 82, 109, -28, -35, -44, -55, -70, -87, -113, -16, 7, -22, 11, -22, -17, 9, -22, 13, -22, 137, 173, 199, 229, -142, -177, -203, -233, 23, 30, 39, 50, 65, 82, 109, -28, -35, -44, -55, -70, -87, -113), + maxZ = cms.vdouble(17, 22, -4, 17, 22, -6, 18, 22, -11, 28, 35, 44, 55, 70, 87, 113, -23, -30, -39, -50, -65, -82, -109, 17, 22, -7, 22, -10, 17, 22, -9, 22, -13, 142, 177, 203, 233, -137, -173, -199, -229, 28, 35, 44, 55, 70, 87, 113, -23, -30, -39, -50, -65, -82, -109), + maxR = cms.vdouble(5, 5, 5, 7, 8, 8, 7, 7, 7, 6, 6, 6, 6, 5, 6, 5, 6, 6, 6, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 6, 5, 5, 5, 6, 5, 5, 5, 9, 9, 9, 8, 8, 8, 11, 9, 9, 9, 8, 8, 8, 11) + ), # autoselect the alpaka backend alpaka = cms.untracked.PSet(backend = cms.untracked.string('')) ) diff --git a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py index 5e7ba5559bf6f..884840d6ef234 100644 --- a/HLTrigger/Configuration/python/customizeHLTforCMSSW.py +++ b/HLTrigger/Configuration/python/customizeHLTforCMSSW.py @@ -37,6 +37,151 @@ def customiseForOffline(process): return process +def customizeHLTfor47611(process): + """ This customizer + - cleans up the CANtupletAlpaka producers paramters; + - adds the geometry paramters used to fill the CAGeometry; + - adds the average sizes paramters to the CANtupletAlpaka producers; + - for pp and HIN hlt setups. + """ + + ca_producers_pp = ['CAHitNtupletAlpakaPhase1@alpaka','alpaka_serial_sync::CAHitNtupletAlpakaPhase1'] + ca_producers_hi = ['CAHitNtupletAlpakaHIonPhase1@alpaka','alpaka_serial_sync::CAHitNtupletAlpakaHIonPhase1'] + ca_producers = ca_producers_pp + ca_producers_hi + ca_parameters = [ 'CAThetaCutBarrel', 'CAThetaCutForward', + 'dcaCutInnerTriplet', 'dcaCutOuterTriplet', + 'doPtCut', 'doZ0Cut', 'idealConditions', + 'includeJumpingForwardDoublets', 'phiCuts','doClusterCut','CPE'] + + has_pp_producers = False + has_hi_producers = False + + for ca_producer in ca_producers: + for prod in producers_by_type(process, ca_producer): + + for par in ca_parameters: + if hasattr(prod, par): + delattr(prod,par) + + if not hasattr(prod, 'dzdrFact'): + setattr(prod, 'dzdrFact', cms.double(8.0 * 0.0285 / 0.015)) + if not hasattr(prod, 'maxDYsize12'): + setattr(prod, 'maxDYsize12', cms.int32(28)) + if not hasattr(prod, 'maxDYsize'): + setattr(prod, 'maxDYsize', cms.int32(20)) + if not hasattr(prod, 'maxDYPred'): + setattr(prod, 'maxDYPred', cms.int32(20)) + + if hasattr(prod, 'maxNumberOfDoublets'): + v = getattr(prod, 'maxNumberOfDoublets') + delattr(prod, 'maxNumberOfDoublets') + setattr(prod, 'maxNumberOfDoublets', cms.string(str(v.value()))) + + for ca_producer in ca_producers_pp: + for prod in producers_by_type(process, ca_producer): + + has_pp_producers = True + + if not hasattr(prod, 'maxNumberOfTuples'): + setattr(prod,'maxNumberOfTuples',cms.string(str(32*1024))) + + if not hasattr(prod, 'avgCellsPerCell'): + setattr(prod, 'avgCellsPerCell', cms.double(0.071)) + + if not hasattr(prod, 'avgCellsPerHit'): + setattr(prod, 'avgCellsPerHit', cms.double(27)) + + if not hasattr(prod, 'avgHitsPerTrack'): + setattr(prod, 'avgHitsPerTrack', cms.double(4.5)) + + if not hasattr(prod, 'avgTracksPerCell'): + setattr(prod, 'avgTracksPerCell', cms.double(0.127)) + + if not hasattr(prod, 'geometry'): + + geometryPS = cms.PSet( + startingPairs = cms.vuint32( [i for i in range(8)] + [i for i in range(13,20)]), + caDCACuts = cms.vdouble( [0.0918113099491] + [0.420724617835] * 9), + caThetaCuts = cms.vdouble([0.00123302705499] * 4 + [0.00355691321774] * 6), + pairGraph = cms.vuint32( + 0, 1, 0, 4, 0, + 7, 1, 2, 1, 4, + 1, 7, 4, 5, 7, + 8, 2, 3, 2, 4, + 2, 7, 5, 6, 8, + 9, 0, 2, 1, 3, + 0, 5, 0, 8, + 4, 6, 7, 9 + ), + phiCuts = cms.vint32( + 965, 1241, 395, 698, 1058, + 1211, 348, 782, 1016, 810, + 463, 755, 694, 531, 770, + 471, 592, 750, 348 + ), + minZ = cms.vdouble( + -20., 0., -30., -22., 10., + -30., -70., -70., -22., 15., + -30, -70., -70., -20., -22., + 0, -30., -70., -70. + ), + maxZ = cms.vdouble( 20., 30., 0., 22., 30., + -10., 70., 70., 22., 30., + -15., 70., 70., 20., 22., + 30., 0., 70., 70.), + maxR = cms.vdouble(20., 9., 9., 20., 7., + 7., 5., 5., 20., 6., + 6., 5., 5., 20., 20., + 9., 9., 9., 9.) + ) + + setattr(prod, 'geometry', geometryPS) + + for ca_producer in ca_producers_hi: + for prod in producers_by_type(process, ca_producer): + + has_hi_producers = True + + if not hasattr(prod, 'maxNumberOfTuples'): + setattr(prod,'maxNumberOfTuples',cms.string(str(256 * 1024))) # way too much, could be ~20k + + if not hasattr(prod, 'avgCellsPerCell'): + setattr(prod, 'avgCellsPerCell', cms.double(0.5)) + + if not hasattr(prod, 'avgCellsPerHit'): + setattr(prod, 'avgCellsPerHit', cms.double(40)) + + if not hasattr(prod, 'avgHitsPerTrack'): + setattr(prod, 'avgHitsPerTrack', cms.double(5.0)) + + if not hasattr(prod, 'avgTracksPerCell'): + setattr(prod, 'avgTracksPerCell', cms.double(0.5)) + + if not hasattr(prod, 'geometry'): + + geometryPS = cms.PSet( + caDCACuts = cms.vdouble( + 0.05, 0.1, 0.1, 0.1, 0.1, + 0.1, 0.1, 0.1, 0.1, 0.1 + ), + caThetaCuts = cms.vdouble( + 0.001, 0.001, 0.001, 0.001, 0.002, + 0.002, 0.002, 0.002, 0.002, 0.002 + ), + + ## This are the defaults actually + startingPairs = cms.vuint32(0,1,2), + pairGraph = cms.vuint32(0, 1, 0, 4, 0, 7, 1, 2, 1, 4, 1, 7, 4, 5, 7, 8, 2, 3, 2, 4, 2, 7, 5, 6, 8, 9, 0, 2, 1, 3, 0, 5, 0, 8, 4, 6, 7, 9), + phiCuts = cms.vint32(522, 730, 730, 522, 626, 626, 522, 522, 626, 626, 626, 522, 522, 522, 522, 522, 522, 522, 522), + minZ = cms.vdouble(-20, 0, -30, -22, 10, -30, -70, -70, -22, 15, -30, -70, -70, -20, -22, 0, -30, -70, -70), + maxZ = cms.vdouble(20, 30, 0, 22, 30, -10, 70, 70, 22, 30, -15, 70, 70, 20, 22, 30, 0, 70, 70), + maxR = cms.vdouble(20, 9, 9, 20, 7, 7, 5, 5, 20, 6, 6, 5, 5, 20, 20, 9, 9, 9, 9) + ) + + setattr(prod, 'geometry', geometryPS) + + return process + # CMSSW version specific customizations def customizeHLTforCMSSW(process, menuType="GRun"): @@ -45,4 +190,7 @@ def customizeHLTforCMSSW(process, menuType="GRun"): # add call to action function in proper order: newest last! # process = customiseFor12718(process) + process = customizeHLTfor47611(process) + return process + diff --git a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h index afaddcc7f5473..94a1ce883817c 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h +++ b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h @@ -188,8 +188,22 @@ namespace cms::alpakatools { psum[i] = (j < size) ? co[j] : T(0); } alpaka::syncBlockThreads(acc); - blockPrefixScan(acc, psum, psum, blocksPerGrid, ws); - + if constexpr (!requires_single_thread_per_block_v) { + if (blocksPerGrid <= warpSize * warpSize) + blockPrefixScan(acc, psum, blocksPerGrid, ws); + else { + auto off = 0u; + while (off + warpSize * warpSize < blocksPerGrid) { + blockPrefixScan(acc, psum + off, warpSize * warpSize, ws); + off = off + warpSize * warpSize - 1; + // ^ this -1 is to keep the previous round total sum around + alpaka::syncBlockThreads(acc); + } + blockPrefixScan(acc, psum + off, psum + off, blocksPerGrid - off, ws); + } + } else { + blockPrefixScan(acc, psum, blocksPerGrid, ws); + } // now it would have been handy to have the other blocks around... // Simplify the computation by having one version where threads per block = block size // and a second for the one thread per block accelerator. diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc index 7f28301ddb63c..34d3cc2c87db3 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc @@ -194,7 +194,10 @@ int main() { // PORTABLE MULTI-BLOCK PREFIXSCAN uint32_t num_items = 200; - for (int ksize = 1; ksize < 4; ++ksize) { + // with ksize=4 num_items = 2e6 so above warpSize² (elements per block) * warpSize² (blocks) + // for CUDA (32²*32²) allowing to fully test also the "unlimited" multiBlockPrefixScan + // with 256 threads and 7813 blocks + for (int ksize = 1; ksize < 5; ++ksize) { std::cout << "multiblock" << std::endl; num_items *= 10; diff --git a/RecoLocalTracker/ClusterParameterEstimator/BuildFile.xml b/RecoLocalTracker/ClusterParameterEstimator/BuildFile.xml index 5ed83e90681fe..39f252d0b838b 100644 --- a/RecoLocalTracker/ClusterParameterEstimator/BuildFile.xml +++ b/RecoLocalTracker/ClusterParameterEstimator/BuildFile.xml @@ -1,6 +1,10 @@ + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc index bb358df8a52f9..924b8dc9cbf0c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -428,73 +428,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // end of Raw to Digi kernel operator() }; // end of Raw to Digi struct + // just for debugging template - struct FillHitsModuleStart { - ALPAKA_FN_ACC void operator()(Acc1D const &acc, SiPixelClustersSoAView clus_view) const { - // This kernel must run with a single block - [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_ACC(0 == blockIdxLocal); - [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); - ALPAKA_ASSERT_ACC(1 == gridDimension); - - // For the prefix scan algorithm - constexpr int warpSize = cms::alpakatools::warpSize; - constexpr int blockSize = warpSize * warpSize; - - // For Phase1 there are 1856 pixel modules - // For Phase2 there are up to 4000 pixel modules - constexpr uint16_t numberOfModules = TrackerTraits::numberOfModules; - constexpr uint16_t prefixScanUpperLimit = ((numberOfModules / blockSize) + 1) * blockSize; - ALPAKA_ASSERT_ACC(numberOfModules < prefixScanUpperLimit); - - // Limit to maxHitsInModule; - constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) { - clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule()); + struct ShowHitsModuleStart { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { + if (cms::alpakatools::once_per_grid(acc)) { + for (int i = 0; i < TrackerTraits::numberOfModules; i++) + printf("%d \n", clus_view[i].clusModuleStart()); } - - // Use N single-block prefix scan, then update all blocks after the first one. - auto &ws = alpaka::declareSharedVar(acc); - uint32_t *clusModuleStart = clus_view.clusModuleStart() + 1; - uint16_t leftModules = numberOfModules; - while (leftModules > blockSize) { - cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, blockSize, ws); - clusModuleStart += blockSize; - leftModules -= blockSize; - } - cms::alpakatools::blockPrefixScan(acc, clusModuleStart, clusModuleStart, leftModules, ws); - - // The first blockSize modules are properly accounted by the blockPrefixScan. - // The additional modules need to be corrected adding the cuulative value from the last module of the previous block. - for (uint16_t doneModules = blockSize; doneModules < numberOfModules; doneModules += blockSize) { - uint16_t first = doneModules + 1; - uint16_t last = std::min(doneModules + blockSize, numberOfModules); - for (uint16_t i : cms::alpakatools::independent_group_elements(acc, first, last + 1)) { - clus_view[i].clusModuleStart() += clus_view[doneModules].clusModuleStart(); - } - alpaka::syncBlockThreads(acc); - } - -#ifdef GPU_DEBUG - ALPAKA_ASSERT_ACC(0 == clus_view[0].clusModuleStart()); - auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); - ALPAKA_ASSERT_ACC(c0 == clus_view[1].clusModuleStart()); - ALPAKA_ASSERT_ACC(clus_view[1024].clusModuleStart() >= clus_view[1023].clusModuleStart()); - ALPAKA_ASSERT_ACC(clus_view[1025].clusModuleStart() >= clus_view[1024].clusModuleStart()); - ALPAKA_ASSERT_ACC(clus_view[numberOfModules].clusModuleStart() >= clus_view[1025].clusModuleStart()); - - for (uint32_t i : cms::alpakatools::independent_group_elements(acc, numberOfModules)) { - ALPAKA_ASSERT_ACC(clus_view[i + 1].clusModuleStart() >= clus_view[i].clusModuleStart()); - // Check BPX2 (1), FP1 (4) - constexpr auto bpix2 = TrackerTraits::layerStart[1]; - constexpr auto fpix1 = TrackerTraits::layerStart[4]; - if (i == bpix2 || i == fpix1) - printf("moduleStart %d %d\n", i, clus_view[i].clusModuleStart()); - } -#endif - - } // end of FillHitsModuleStart kernel operator() - }; // end of FillHitsModuleStart struct + } + }; // Interface to outside template @@ -649,15 +593,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // available in the rechit producer without additional points of // synchronization/ExternalWork - // MUST be ONE block - const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); - alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + constexpr auto threadsPrefixScan = 1024; + constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan; + auto workDivPrefixScan = cms::alpakatools::make_workdiv(blocksPrefixScan, threadsPrefixScan); + auto bCounter = cms::alpakatools::make_device_buffer(queue); + alpaka::memset(queue, bCounter, 0); + + alpaka::exec(queue, + workDivPrefixScan, + cms::alpakatools::multiBlockPrefixScan(), + clusters_d->view().clusInModule(), + clusters_d->view().clusModuleStart() + 1, + TrackerTraits::numberOfModules, + blocksPrefixScan, + bCounter.data(), + alpaka::getPreferredWarpSize(alpaka::getDev(queue))); // last element holds the number of all clusters const auto clusModuleStartLastElement = cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + numberOfModules, 1u); constexpr int startBPIX2 = TrackerTraits::layerStart[1]; - // element startBPIX2 hold the number of clusters until BPIX2 const auto bpix2ClusterStart = cms::alpakatools::make_device_view(queue, clusters_d->const_view().clusModuleStart() + startBPIX2, 1u); @@ -737,9 +692,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // available in the rechit producer without additional points of // synchronization/ExternalWork - // MUST be ONE block - const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); - alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + constexpr auto threadsPrefixScan = 1024; + constexpr auto blocksPrefixScan = (TrackerTraits::numberOfModules + threadsPrefixScan - 1) / threadsPrefixScan; + auto workDivPrefixScan = cms::alpakatools::make_workdiv(blocksPrefixScan, threadsPrefixScan); + auto bCounter = cms::alpakatools::make_device_buffer(queue); + alpaka::memset(queue, bCounter, 0); + + alpaka::exec(queue, + workDivPrefixScan, + cms::alpakatools::multiBlockPrefixScan(), + clusters_d->view().clusInModule(), + clusters_d->view().clusModuleStart() + 1, + TrackerTraits::numberOfModules, + blocksPrefixScan, + bCounter.data(), + alpaka::getPreferredWarpSize(alpaka::getDev(queue))); // last element holds the number of all clusters const auto clusModuleStartLastElement = diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h index a97add7edb7b3..6bfb93e9babba 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h @@ -14,6 +14,8 @@ #include "DataFormats/GeometrySurface/interface/SOARotation.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +// #define ONLY_TRIPLETS_IN_HOLE + namespace pixelCPEforDevice { // From https://cmssdt.cern.ch/dxr/CMSSW/source/CondFormats/SiPixelTransient/src/SiPixelGenError.cc#485-486 @@ -61,9 +63,6 @@ namespace pixelCPEforDevice { struct CommonParams { float theThicknessB; float theThicknessE; - - uint16_t maxModuleStride; - uint8_t numberOfLaddersInBarrel; }; struct DetParams { @@ -100,13 +99,6 @@ namespace pixelCPEforDevice { Frame frame; }; - template - struct LayerGeometryT { - uint32_t layerStart[TrackerTopology::numberOfLayers + 1]; - uint8_t layer[pixelTopology::layerIndexSize]; - uint16_t maxModuleStride; - }; - constexpr int32_t MaxHitsInIter = pixelClustering::maxHitsInIter(); using ClusParams = ClusParamsT; @@ -409,26 +401,23 @@ namespace pixelCPEforDevice { template struct ParamsOnDeviceT { - using LayerGeometry = LayerGeometryT; - using AverageGeometry = pixelTopology::AverageGeometryT; - CommonParams m_commonParams; // Will contain an array of DetParams instances DetParams m_detParams[TrackerTopology::numberOfModules]; - LayerGeometry m_layerGeometry; - AverageGeometry m_averageGeometry; constexpr CommonParams const& __restrict__ commonParams() const { return m_commonParams; } constexpr DetParams const& __restrict__ detParams(int i) const { return m_detParams[i]; } - constexpr LayerGeometry const& __restrict__ layerGeometry() const { return m_layerGeometry; } - constexpr AverageGeometry const& __restrict__ averageGeometry() const { return m_averageGeometry; } CommonParams& commonParams() { return m_commonParams; } DetParams& detParams(int i) { return m_detParams[i]; } - LayerGeometry& layerGeometry() { return m_layerGeometry; } - AverageGeometry& averageGeometry() { return m_averageGeometry; } - constexpr uint8_t layer(uint16_t id) const { return m_layerGeometry.layer[id / TrackerTopology::maxModuleStride]; }; +#ifdef ONLY_TRIPLETS_IN_HOLE + using AverageGeometry = pixelTopology::AverageGeometryT; + + AverageGeometry m_averageGeometry; + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return m_averageGeometry; } + AverageGeometry& averageGeometry() { return m_averageGeometry; } +#endif // ONLY_TRIPLETS_IN_HOLE }; } // namespace pixelCPEforDevice diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc index 3b8c5371649f0..4b9c083c1252d 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc @@ -21,10 +21,8 @@ #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" -template class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { - using HitModuleStartArray = typename TrackingRecHitSoA::HitModuleStartArray; - using hindex_type = typename TrackerTraits::hindex_type; + using hindex_type = uint32_t; //typename TrackerTraits::hindex_type; using HMSstorage = typename std::vector; public: @@ -34,11 +32,12 @@ class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); // Data has been implicitly copied from Device to Host by the framework - using HitsOnHost = TrackingRecHitHost; + using HitsOnHost = ::reco::TrackingRecHitHost; private: void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + const uint32_t maxHitsInModules_; const edm::ESGetToken geomToken_; const edm::EDGetTokenT hitsToken_; // Alpaka hits const edm::EDGetTokenT clusterToken_; // legacy clusters @@ -46,37 +45,38 @@ class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { const edm::EDPutTokenT hostPutToken_; }; -template -SiPixelRecHitFromSoAAlpaka::SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig) - : geomToken_(esConsumes()), +SiPixelRecHitFromSoAAlpaka::SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig) + : maxHitsInModules_(iConfig.getParameter("maxHitsInModules")), + geomToken_(esConsumes()), hitsToken_(consumes(iConfig.getParameter("pixelRecHitSrc"))), clusterToken_(consumes(iConfig.getParameter("src"))), rechitsPutToken_(produces()), hostPutToken_(produces()) {} -template -void SiPixelRecHitFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { +void SiPixelRecHitFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; + desc.add("maxHitsInModules", phase1PixelTopology::maxNumClustersPerModules) + ->setComment("Max number of hits in a single module"); desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); descriptions.addWithDefaultLabel(desc); } -template -void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, - edm::Event& iEvent, - const edm::EventSetup& iSetup) const { +void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { auto const& hits = iEvent.get(hitsToken_); - auto nHits = hits.view().metadata().size(); - LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " Hits"; + auto hitsView = hits.view(); + auto modulesView = hits.view<::reco::HitModuleSoA>(); + auto nHits = hitsView.metadata().size(); + auto nModules = modulesView.metadata().size(); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " hits in max " << nModules << " modules"; // allocate a buffer for the indices of the clusters - constexpr auto nMaxModules = TrackerTraits::numberOfModules; - SiPixelRecHitCollection output; - output.reserve(nMaxModules, nHits); + output.reserve(nModules, nHits); - HMSstorage hmsp(nMaxModules + 1); + HMSstorage hmsp(nModules + 1); if (0 == nHits) { hmsp.clear(); @@ -87,21 +87,19 @@ void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, // fill content of HMSstorage product, and put it into the Event for (unsigned int idx = 0; idx < hmsp.size(); ++idx) { - hmsp[idx] = hits.view().hitsModuleStart()[idx]; + hmsp[idx] = modulesView.moduleStart()[idx]; } iEvent.emplace(hostPutToken_, std::move(hmsp)); - auto xl = hits.view().xLocal(); - auto yl = hits.view().yLocal(); - auto xe = hits.view().xerrLocal(); - auto ye = hits.view().yerrLocal(); + auto xl = hitsView.xLocal(); + auto yl = hitsView.yLocal(); + auto xe = hitsView.xerrLocal(); + auto ye = hitsView.yerrLocal(); TrackerGeometry const& geom = iSetup.getData(geomToken_); auto const hclusters = iEvent.getHandle(clusterToken_); - constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; - int numberOfDetUnits = 0; int numberOfClusters = 0; for (auto const& dsv : *hclusters) { @@ -113,21 +111,21 @@ void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); assert(pixDet); SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); - auto fc = hits.view().hitsModuleStart()[gind]; - auto lc = hits.view().hitsModuleStart()[gind + 1]; + auto fc = modulesView.moduleStart()[gind]; + auto lc = modulesView.moduleStart()[gind + 1]; auto nhits = lc - fc; assert(lc > fc); LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() << " legacy clusters" << ' ' << fc << ',' << lc << "\n"; - if (nhits > maxHitsInModule) + if (nhits > maxHitsInModules_) edm::LogWarning("SiPixelRecHitFromSoAAlpaka") .format("Too many clusters {} in module {}. Only the first {} hits will be converted", nhits, gind, - maxHitsInModule); + maxHitsInModules_); - nhits = std::min(nhits, maxHitsInModule); + nhits = std::min(nhits, maxHitsInModules_); LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() << " legacy clusters" << ' ' << lc << ',' << fc; @@ -179,11 +177,13 @@ void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, iEvent.emplace(rechitsPutToken_, std::move(output)); } -using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka; -using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka; -using SiPixelRecHitFromSoAAlpakaHIonPhase1 = SiPixelRecHitFromSoAAlpaka; +using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka; +using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka; +using SiPixelRecHitFromSoAAlpakaHIonPhase1 = SiPixelRecHitFromSoAAlpaka; #include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpaka); +// Keeping these to ease the migration of the HLT menu DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1); DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase2); DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaHIonPhase1); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc index 2872cedd14aeb..7d59f331ce1b0 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc @@ -1,7 +1,6 @@ #include #include #include -#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" #include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" #include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h index 2fc1404a03bb7..a36b3a065abc6 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h @@ -19,7 +19,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace pixelgpudetails { using namespace cms::alpakatools; - + using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; template class PixelRecHitKernel { public: @@ -33,11 +33,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; - TrackingRecHitsSoACollection makeHitsAsync(SiPixelDigisSoACollection const& digis_d, - SiPixelClustersSoACollection const& clusters_d, - BeamSpotPOD const* bs_d, - ParamsOnDevice const* cpeParams, - Queue queue) const; + reco::TrackingRecHitsSoACollection makeHitsAsync(SiPixelDigisSoACollection const& digis_d, + SiPixelClustersSoACollection const& clusters_d, + BeamSpotPOD const* bs_d, + ParamsOnDevice const* cpeParams, + Queue queue) const; }; } // namespace pixelgpudetails } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc index 020e81fa2ebcf..8bd80708652ed 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -26,44 +26,21 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace cms::alpakatools; - template - class setHitsLayerStart { - public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - uint32_t const* __restrict__ hitsModuleStart, - pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, - uint32_t* __restrict__ hitsLayerStart) const { - ALPAKA_ASSERT_ACC(0 == hitsModuleStart[0]); - - for (int32_t i : cms::alpakatools::uniform_elements(acc, TrackerTraits::numberOfLayers + 1)) { - hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; -#ifdef GPU_DEBUG - int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; - printf("LayerStart %d/%d at module %d: %d - %d\n", - i, - TrackerTraits::numberOfLayers, - cpeParams->layerGeometry().layerStart[i], - hitsLayerStart[i], - hitsLayerStart[i] - old); -#endif - } - } - }; + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; namespace pixelgpudetails { template - TrackingRecHitsSoACollection PixelRecHitKernel::makeHitsAsync( + TrackingRecHitsSoACollection PixelRecHitKernel::makeHitsAsync( SiPixelDigisSoACollection const& digis_d, SiPixelClustersSoACollection const& clusters_d, BeamSpotPOD const* bs_d, pixelCPEforDevice::ParamsOnDeviceT const* cpeParams, Queue queue) const { using namespace pixelRecHits; - auto nHits = clusters_d.nClusters(); - auto offsetBPIX2 = clusters_d.offsetBPIX2(); - TrackingRecHitsSoACollection hits_d(queue, nHits, offsetBPIX2, clusters_d->clusModuleStart()); + TrackingRecHitsSoACollection hits_d(queue, clusters_d); int activeModulesWithDigis = digis_d.nModules(); @@ -91,40 +68,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #ifdef GPU_DEBUG alpaka::wait(queue); #endif - - // assuming full warp of threads is better than a smaller number... - if (nHits) { - const auto workDiv1D = cms::alpakatools::make_workdiv(1, 32); - alpaka::exec(queue, - workDiv1D, - setHitsLayerStart{}, - clusters_d->clusModuleStart(), - cpeParams, - hits_d.view().hitsLayerStart().data()); - constexpr auto nLayers = TrackerTraits::numberOfLayers; - - // Use a view since it's runtime sized and can't use the implicit definition - // see HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h:100 - typename TrackingRecHitSoA::PhiBinnerView hrv_d; - hrv_d.assoc = &(hits_d.view().phiBinner()); - hrv_d.offSize = -1; - hrv_d.offStorage = nullptr; - hrv_d.contentSize = nHits; - hrv_d.contentStorage = hits_d.view().phiBinnerStorage(); - - cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), - hrv_d, - nLayers, - hits_d.view().iphi(), - hits_d.view().hitsLayerStart().data(), - nHits, - (uint32_t)256, - queue); - -#ifdef GPU_DEBUG - alpaka::wait(queue); -#endif - } } #ifdef GPU_DEBUG diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h index 5f1ca57667cf2..4dfdea37fdc85 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -22,7 +22,7 @@ #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" //#define GPU_DEBUG - +//#define ONLY_TRIPLETS_IN_HOLE namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace pixelRecHits { @@ -36,15 +36,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t numElements, uint32_t nonEmptyModules, SiPixelClustersSoAConstView clusters, - TrackingRecHitSoAView hits) const { + ::reco::TrackingRecHitView hits) const { ALPAKA_ASSERT_ACC(cpeParams); // outer loop: one block per module for (uint32_t module : cms::alpakatools::independent_groups(acc, nonEmptyModules)) { +#ifdef ONLY_TRIPLETS_IN_HOLE // This is necessary only once - consider moving it somewhere else. // Copy the average geometry corrected by the beamspot. if (0 == module) { - auto& agc = hits.averageGeometry(); auto const& ag = cpeParams->averageGeometry(); auto nLadders = TrackerTraits::numberOfLaddersInBarrel; @@ -62,6 +62,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { agc.endCapZ[1] = ag.endCapZ[1] - bs->z; } } +#endif // ONLY_TRIPLETS_IN_HOLE // to be moved in common namespace... using pixelClustering::invalidModuleId; @@ -91,9 +92,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif auto& clusParams = alpaka::declareSharedVar(acc); + for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += maxHitsInIter) { auto first = clusters[1 + module].moduleStart(); - int nClusInIter = alpaka::math::min(acc, maxHitsInIter, endClus - startClus); int lastClus = startClus + nClusInIter; ALPAKA_ASSERT_ACC(nClusInIter <= nclus); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc index 47dd5289538bb..3b7d2912a5363 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc @@ -49,7 +49,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const device::EDGetToken tBeamSpot; const device::EDGetToken tokenClusters_; const device::EDGetToken tokenDigi_; - const device::EDPutToken> tokenHit_; + const device::EDPutToken tokenHit_; const pixelgpudetails::PixelRecHitKernel Algo_; }; diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index 7a6ba09108379..5bdec4b10cb2d 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -45,23 +45,15 @@ src = "siPixelClustersPreSplittingAlpakaSerial" ) -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1 -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2 -from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaHIonPhase1_cfi import siPixelRecHitFromSoAAlpakaHIonPhase1 as _siPixelRecHitFromSoAAlpakaHIonPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpaka_cfi import siPixelRecHitFromSoAAlpaka as _siPixelRecHitFromSoAAlpaka -(alpaka & ~phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitFromSoAAlpakaPhase1.clone( +alpaka.toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitFromSoAAlpaka.clone( pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), src = cms.InputTag('siPixelClustersPreSplitting')) ) -(alpaka & phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitFromSoAAlpakaPhase2.clone( - pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), - src = cms.InputTag('siPixelClustersPreSplitting')) -) - -(alpaka & pp_on_AA & ~phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting, _siPixelRecHitFromSoAAlpakaHIonPhase1.clone( - pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), - src = cms.InputTag('siPixelClustersPreSplitting')) +(alpaka & pp_on_AA & ~phase2_tracker).toModify(siPixelRecHitsPreSplitting, + maxHitsInModules = cms.uint32(2048) ) diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc index b290aabc194d1..a9e4b49dc8ecb 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc @@ -10,6 +10,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +// #define ONLY_TRIPLETS_IN_HOLE //----------------------------------------------------------------------------- //! The constructor. //----------------------------------------------------------------------------- @@ -42,24 +43,14 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { buffer_->commonParams().theThicknessB = m_DetParams.front().theThickness; buffer_->commonParams().theThicknessE = m_DetParams.back().theThickness; - buffer_->commonParams().numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; LogDebug("PixelCPEFastParamsHost") << "thickness " << buffer_->commonParams().theThicknessB << ' ' << buffer_->commonParams().theThicknessE; +#ifdef ONLY_TRIPLETS_IN_HOLE // zero average geometry memset(&buffer_->averageGeometry(), 0, sizeof(pixelTopology::AverageGeometryT)); - // zero layer geometry - memset(&buffer_->layerGeometry(), 0, sizeof(pixelCPEforDevice::LayerGeometryT)); - - uint32_t nLayers = 0; - uint32_t oldLayer = 0; - uint32_t oldLadder = 0; - float rl = 0; - float zl = 0; - float miz = 500, mxz = 0; - float pl = 0; - int nl = 0; +#endif assert(m_DetParams.size() <= TrackerTraits::numberOfModules); @@ -84,33 +75,6 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { auto thickness = g.isBarrel ? buffer_->commonParams().theThicknessB : buffer_->commonParams().theThicknessE; assert(thickness == p.theThickness); - auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); - if (oldLayer != g.layer) { - oldLayer = g.layer; - LogDebug("PixelCPEFastParamsHost") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) - << g.layer << " starting at " << g.rawId << '\n' - << "old layer had " << nl << " ladders"; - nl = 0; - - assert(nLayers <= TrackerTraits::numberOfLayers); - buffer_->layerGeometry().layerStart[nLayers] = i; - ++nLayers; - } - if (oldLadder != ladder) { - oldLadder = ladder; - LogDebug("PixelCPEFastParamsHost") << "new ladder at " << i - << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << ladder - << " starting at " << g.rawId << '\n' - << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f - << ' ' << miz << ' ' << mxz; - rl = 0; - zl = 0; - pl = 0; - miz = 500; - mxz = 0; - nl++; - } - g.shiftX = 0.5f * p.lorentzShiftInCmX; g.shiftY = 0.5f * p.lorentzShiftInCmY; g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; @@ -127,12 +91,6 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { auto rr = pixelCPEforDevice::Rotation(p.theDet->surface().rotation()); g.frame = pixelCPEforDevice::Frame(vv.x(), vv.y(), vv.z(), rr); - zl += vv.z(); - miz = std::min(miz, std::abs(vv.z())); - mxz = std::max(mxz, std::abs(vv.z())); - rl += vv.perp(); - pl += vv.phi(); // (not obvious) - // errors ..... ClusterParamGeneric cp; @@ -295,8 +253,8 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { } } // loop over det - // store last module - buffer_->layerGeometry().layerStart[nLayers] = m_DetParams.size(); +#ifdef ONLY_TRIPLETS_IN_HOLE + // compute ladder baricenter (only in global z) for the barrel constexpr int numberOfModulesInLadder = TrackerTraits::numberOfModulesInLadder; constexpr int numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; @@ -307,8 +265,6 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { constexpr int firstEndcapPos = TrackerTraits::firstEndcapPos; constexpr int firstEndcapNeg = TrackerTraits::firstEndcapNeg; - // compute ladder baricenter (only in global z) for the barrel - // auto& aveGeom = buffer_->averageGeometry(); int il = 0; for (int im = 0, nm = numberOfModulesInBarrel; im < nm; ++im) { @@ -344,6 +300,7 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { // correct for outer ring being closer aveGeom.endCapZ[0] -= TrackerTraits::endcapCorrection; aveGeom.endCapZ[1] += TrackerTraits::endcapCorrection; + #ifdef EDM_ML_DEBUG for (int jl = 0, nl = numberOfLaddersInBarrel; jl < nl; ++jl) { LogDebug("PixelCPEFastParamsHost") << jl << ':' << aveGeom.ladderR[jl] << '/' @@ -354,12 +311,7 @@ void PixelCPEFastParamsHost::fillParamsForDevice() { } LogDebug("PixelCPEFastParamsHost") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; #endif // EDM_ML_DEBUG - - // fill ladders geometry - memcpy(buffer_->layerGeometry().layer, - pixelTopology::layer.data(), - pixelTopology::layer.size()); - buffer_->layerGeometry().maxModuleStride = pixelTopology::maxModuleStride; +#endif //ONLY_TRIPLETS_IN_HOLE } template diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc index 72dd8b0283460..af99e53b96934 100644 --- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc +++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc @@ -48,7 +48,7 @@ #include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" #include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" -#include "DataFormats/TrackSoA/interface/alpaka/TrackUtilities.h" +#include "DataFormats/TrackSoA/interface/TracksSoA.h" #include "DataFormats/TrackSoA/interface/TracksHost.h" #include "DataFormats/VertexSoA/interface/ZVertexHost.h" @@ -146,7 +146,7 @@ struct L2TauNNProducerAlpakaCacheData { class L2TauNNProducerAlpaka : public edm::stream::EDProducer> { public: - using TracksHost = pixelTrack::TracksHostPhase1; + using TracksHost = reco::TracksHost; struct caloRecHitCollections { const HBHERecHitCollection* hbhe; @@ -575,7 +575,6 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav const TracksHost& patatracks_tsoa, std::vector& trkGood, std::vector& vtxGood) { - using patatrackHelpers = TracksUtilities; const auto maxTracks = patatracks_tsoa.view().metadata().size(); const int nv = patavtx_soa.view().nvFinal(); trkGood.clear(); @@ -589,8 +588,8 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav std::vector nTrkAssociated(nv, 0); for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { - auto nHits = patatrackHelpers::nHits(patatracks_tsoa.view(), trk_idx); - if (nHits == 0) { + auto n_hits = nHits(patatracks_tsoa.view(), trk_idx); + if (n_hits == 0) { break; } int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); @@ -602,7 +601,7 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; } } - if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { + if (n_hits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) { trkGood.push_back(trk_idx); } } @@ -627,7 +626,7 @@ std::pair L2TauNNProducerAlpaka::impactParameter(int it, /* dxy and dz */ riemannFit::Vector5d ipar, opar; riemannFit::Matrix5d icov, ocov; - TracksUtilities::copyToDense(patatracks_tsoa.view(), ipar, icov, it); + copyToDense(patatracks_tsoa.view(), ipar, icov, it); riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); float sp = std::sin(patatrackPhi); @@ -657,7 +656,7 @@ void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, const reco::BeamSpot& beamspot, const MagneticField* magfi) { using NNInputs = L2TauTagNNv1::NNInputs; - using patatrackHelpers = TracksUtilities; + float deta, dphi; int eta_idx = 0; int phi_idx = 0; @@ -685,10 +684,10 @@ void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); const float patatrackCharge = reco::charge(patatracks_tsoa.const_view(), it); const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); - const auto nHits = patatrackHelpers::nHits(patatracks_tsoa.const_view(), it); - if (nHits <= 0) + const auto n_hits = nHits(patatracks_tsoa.const_view(), it); + if (n_hits <= 0) continue; - const int patatrackNdof = 2 * std::min(6, nHits) - 5; + const int patatrackNdof = 2 * std::min(6, n_hits) - 5; const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { diff --git a/RecoTracker/Configuration/python/customizePixelTracksForTriplets.py b/RecoTracker/Configuration/python/customizePixelTracksForTriplets.py index da3233278643a..4a599277044a2 100644 --- a/RecoTracker/Configuration/python/customizePixelTracksForTriplets.py +++ b/RecoTracker/Configuration/python/customizePixelTracksForTriplets.py @@ -1,12 +1,58 @@ import FWCore.ParameterSet.Config as cms def customizePixelTracksForTriplets(process): + + from HLTrigger.Configuration.common import producers_by_type, esproducers_by_type + names = ['CAHitNtupletAlpakaPhase1@alpaka','CAHitNtupletAlpakaPhase2@alpaka'] + + for name in names: + producers = producers_by_type(process, name) + for producer in producers: + producer.minHitsPerNtuplet = 3 + + if name == 'CAHitNtupletAlpakaPhase1@alpaka': + + producer.avgHitsPerTrack = 4.5 + producer.avgCellsPerHit = 27 + producer.avgCellsPerCell = 0.071 + producer.avgTracksPerCell = 0.127 + producer.maxNumberOfDoublets = str(512*1024) # this is actually low, should be ~630k, keeping the same for a fair comparison with master + producer.maxNumberOfTuples = str(32 * 1024) # this is on spot (µ+5*σ = 31.8k) + + producer.geometry.pairGraph = [ 0, 1, 0, 4, 0, + 7, 1, 2, 1, 4, + 1, 7, 4, 5, 7, + 8, 2, 3, 2, 4, + 2, 7, 5, 6, 8, + 9, 0, 2, 1, 3, + 0, 5, 0, 8, + 4, 6, 7, 9 ] + producer.geometry.startingPairs = [i for i in range(8)] + [13, 14, 15, 16, 17, 18, 19] + producer.geometry.phiCuts = [522, 730, 730, 522, 626, + 626, 522, 522, 626, 626, + 626, 522, 522, 522, 522, + 522, 522, 522, 522] + producer.geometry.minZ = [-20., 0., -30., -22., 10., + -30., -70., -70., -22., 15., + -30, -70., -70., -20., -22., + 0, -30., -70., -70.] + producer.geometry.maxZ = [20., 30., 0., 22., 30., + -10., 70., 70., 22., 30., + -15., 70., 70., 20., 22., + 30., 0., 70., 70.] + producer.geometry.maxR = [20., 9., 9., 20., 7., + 7., 5., 5., 20., 6., + 6., 5., 5., 20., 20., + 9., 9., 9., 9.] + + elif name == 'CAHitNtupletAlpakaPhase2@alpaka': + + producer.avgHitsPerTrack = 6.5 + producer.avgCellsPerHit = 6 # actually this is the same, quads has the same graph at the moment + producer.avgCellsPerCell = 0.151 + producer.avgTracksPerCell = 0.130 + producer.maxNumberOfDoublets = str(5*512*1024) # could be lowered to 1.4M, keeping the same for a fair comparison with master + producer.maxNumberOfTuples = str(256 * 1024) # could be lowered to 120k, same as above + + return process - from HLTrigger.Configuration.common import producers_by_type - producers = ['CAHitNtupletCUDA','CAHitNtupletCUDAPhase1','CAHitNtupletCUDAPhase2','CAHitNtupletAlpakaPhase1@alpaka','CAHitNtupletAlpakaPhase2@alpaka'] - for name in producers: - for producer in producers_by_type(process, name): - producer.includeJumpingForwardDoublets = True - producer.minHitsPerNtuplet = 3 - - return process diff --git a/RecoTracker/PixelSeeding/BuildFile.xml b/RecoTracker/PixelSeeding/BuildFile.xml index 7bc10578b4448..79350db5f5f5d 100644 --- a/RecoTracker/PixelSeeding/BuildFile.xml +++ b/RecoTracker/PixelSeeding/BuildFile.xml @@ -23,7 +23,10 @@ + + + diff --git a/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h b/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h new file mode 100644 index 0000000000000..4cc5c3ffcaccf --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h @@ -0,0 +1,16 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAGeometryDevice_H +#define RecoTracker_PixelSeeding_interface_CAGeometryDevice_H + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace reco { + template + using CAGeometryDevice = PortableDeviceMultiCollection; +} +#endif // RecoTracker_PixelSeeding_interface_CAGeometryDevice_H diff --git a/RecoTracker/PixelSeeding/interface/CAGeometryHost.h b/RecoTracker/PixelSeeding/interface/CAGeometryHost.h new file mode 100644 index 0000000000000..0a231b80ea6bf --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAGeometryHost.h @@ -0,0 +1,15 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAGeometryHost_H +#define RecoTracker_PixelSeeding_interface_CAGeometryHost_H + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace reco { + using CAGeometryHost = PortableHostMultiCollection; +} +#endif // RecoTracker_PixelSeeding_interface_CAGeometryHost_H diff --git a/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h b/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h new file mode 100644 index 0000000000000..e86b0da1ac711 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h @@ -0,0 +1,58 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAGeometry_h +#define RecoTracker_PixelSeeding_interface_CAGeometry_h + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" + +namespace reco { + + // struct RZMap + // { + // // in cm + // static constexpr float rmin = 0.f; + // static constexpr float rmax = 120.f; + // static constexpr float zlim = 300.f; + // static constexpr float zran = 600.f; + + // static constexpr uint16_t binr = uint16_t(rmax) * 5; + // static constexpr uint16_t binz = uint16_t(zlim) * 5; + + // static constexpr uint16_t binz = uint16_t(zlim) * 5; + + // // bin = 1 + int (fNbins*(x-fXmin)/(fXmax-fXmin) ); + // } + + using GraphNode = std::array; + using DetFrame = SOAFrame; + + GENERATE_SOA_LAYOUT(CAModulesLayout, SOA_COLUMN(DetFrame, detFrame)) + + GENERATE_SOA_LAYOUT(CALayersLayout, + SOA_COLUMN(uint32_t, layerStarts), + SOA_COLUMN(float, caThetaCut), + SOA_COLUMN(float, caDCACut)) + + GENERATE_SOA_LAYOUT(CAGraphLayout, + SOA_COLUMN(GraphNode, graph), + SOA_COLUMN(bool, startingPair), + SOA_COLUMN(int16_t, phiCuts), + SOA_COLUMN(float, minz), + SOA_COLUMN(float, maxz), + SOA_COLUMN(float, maxr)) + + using CALayersSoA = CALayersLayout<>; + using CALayersSoAView = CALayersSoA::View; + using CALayersSoAConstView = CALayersSoA::ConstView; + + using CAGraphSoA = CAGraphLayout<>; + using CAGraphSoAView = CAGraphSoA::View; + using CAGraphSoAConstView = CAGraphSoA::ConstView; + + using CAModulesSoA = CAModulesLayout<>; + using CAModulesView = CAModulesSoA::View; + using CAModulesConstView = CAModulesSoA::ConstView; + +} // namespace reco +#endif // RecoTracker_PixelSeeding_interface_CAGeometry_h diff --git a/RecoTracker/PixelSeeding/interface/CAPairDevice.h b/RecoTracker/PixelSeeding/interface/CAPairDevice.h new file mode 100644 index 0000000000000..e4197b4269bd7 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAPairDevice.h @@ -0,0 +1,17 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAPairDevice_H +#define RecoTracker_PixelSeeding_interface_CAPairDevice_H + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace caStructures { + template + using CAPairDevice = PortableDeviceCollection; +} + +#endif // RecoTracker_PixelSeeding_interface_CAPairDevice_H diff --git a/RecoTracker/PixelSeeding/interface/CAPairHost.h b/RecoTracker/PixelSeeding/interface/CAPairHost.h new file mode 100644 index 0000000000000..718345850cf19 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAPairHost.h @@ -0,0 +1,15 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAPairHost_h +#define RecoTracker_PixelSeeding_interface_CAPairHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace caStructures { + using CAPairHost = PortableHostCollection; +} +#endif // RecoTracker_PixelSeeding_interface_CAPairHost_h diff --git a/RecoTracker/PixelSeeding/interface/CAPairSoA.h b/RecoTracker/PixelSeeding/interface/CAPairSoA.h new file mode 100644 index 0000000000000..13d0d1420992d --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/CAPairSoA.h @@ -0,0 +1,20 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAPairSoA_h +#define RecoTracker_PixelSeeding_interface_CAPairSoA_h + +#include + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +namespace caStructures { + + GENERATE_SOA_LAYOUT(CAPairLayout, SOA_COLUMN(uint32_t, inner), SOA_COLUMN(uint32_t, outer)) + + using CAPairSoA = CAPairLayout<>; + using CAPairSoAView = CAPairSoA::View; + using CAPairSoAConstView = CAPairSoA::ConstView; + +} // namespace caStructures + +#endif // RecoTracker_PixelSeeding_interface_CAPairSoA_h diff --git a/RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h b/RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h new file mode 100644 index 0000000000000..3b92053e5ee95 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAGeometrySoACollection_h +#define RecoTracker_PixelSeeding_interface_CAGeometrySoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometryDevice.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometryHost.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + + using ::reco::CAGeometryDevice; + using ::reco::CAGeometryHost; + using CAGeometrySoACollection = + std::conditional_t, CAGeometryHost, CAGeometryDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::CAGeometrySoACollection, reco::CAGeometryHost); + +#endif // RecoTracker_PixelSeeding_interface_CAGeometrySoACollection_h diff --git a/RecoTracker/PixelSeeding/interface/alpaka/CAPairSoACollection.h b/RecoTracker/PixelSeeding/interface/alpaka/CAPairSoACollection.h new file mode 100644 index 0000000000000..22d79b7d82c50 --- /dev/null +++ b/RecoTracker/PixelSeeding/interface/alpaka/CAPairSoACollection.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_PixelSeeding_interface_CAPairSoACollection_h +#define RecoTracker_PixelSeeding_interface_CAPairSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "RecoTracker/PixelSeeding/interface/CAPairDevice.h" +#include "RecoTracker/PixelSeeding/interface/CAPairHost.h" +#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using ::caStructures::CAPairDevice; + using ::caStructures::CAPairHost; + using CAPairSoACollection = + std::conditional_t, CAPairHost, CAPairDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(CAPairSoACollection, ::caStructures::CAPairHost); + +#endif // RecoTracker_PixelSeeding_interface_CAPairSoACollection_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc index ef9584ba50373..60317b3da3fd6 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/BrokenLineFit.dev.cc @@ -1,6 +1,6 @@ -//#define BROKENLINE_DEBUG -//#define BL_DUMP_HITS - +// #define BROKENLINE_DEBUG +// #define BL_DUMP_HITS +// #define GPU_DEBUG #include #include @@ -8,31 +8,26 @@ #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/debug.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" #include "RecoTracker/PixelTrackFitting/interface/alpaka/BrokenLine.h" #include "HelixFit.h" -template -using Tuples = typename reco::TrackSoA::HitContainer; -template -using OutputSoAView = reco::TrackSoAView; -template -using TupleMultiplicity = caStructures::TupleMultiplicityT; - -// #define BL_DUMP_HITS +using OutputSoAView = reco::TrackSoAView; +using TupleMultiplicity = caStructures::GenericContainer; +using Tuples = caStructures::SequentialContainer; namespace ALPAKA_ACCELERATOR_NAMESPACE { - template + + template class Kernel_BLFastFit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - Tuples const *__restrict__ foundNtuplets, - TupleMultiplicity const *__restrict__ tupleMultiplicity, - TrackingRecHitSoAConstView hh, - pixelCPEforDevice::ParamsOnDeviceT const *__restrict__ cpeParams, - typename TrackerTraits::tindex_type *__restrict__ ptkids, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + Tuples const *__restrict__ foundNtuplets, + TupleMultiplicity const *__restrict__ tupleMultiplicity, + ::reco::TrackingRecHitConstView hh, + ::reco::CAModulesConstView cm, + typename caStructures::tindex_type *__restrict__ ptkids, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit, @@ -40,7 +35,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { uint32_t nHitsH, int32_t offset) const { constexpr uint32_t hitsInFit = N; - constexpr auto invalidTkId = std::numeric_limits::max(); + constexpr auto invalidTkId = std::numeric_limits::max(); ALPAKA_ASSERT_ACC(hitsInFit <= nHitsL); ALPAKA_ASSERT_ACC(nHitsL <= nHitsH); @@ -55,7 +50,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_ACC(totTK >= 0); #ifdef BROKENLINE_DEBUG - const uint32_t threadIdx(alpaka::getIdx(acc)[0u]); if (cms::alpakatools::once_per_grid(acc)) { printf("%d total Ntuple\n", tupleMultiplicity->size()); printf("%d Ntuple of size %d/%d for %d hits to fit\n", totTK, nHitsL, nHitsH, hitsInFit); @@ -115,7 +109,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { float ge[6]; #ifdef YERR_FROM_DC - auto const &dp = cpeParams->detParams(hh.detectorIndex(hit)); + auto const &dp = cm->detParams(hh.detectorIndex(hit)); auto status = hh[hit].chargeAndStatus().status; int qbin = CPEFastParametrisation::kGenErrorQBins - 1 - status.qBin; ALPAKA_ASSERT_ACC(qbin >= 0 && qbin < 5); @@ -136,7 +130,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { yerr = nok ? hh[hit].yerrLocal() : yerr; dp.frame.toGlobal(hh[hit].xerrLocal(), 0, yerr, ge); #else - cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); + auto const &frame = cm.detFrame(hh.detectorIndex(hit)); + frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); #endif #ifdef BL_DUMP_HITS @@ -174,12 +169,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template struct Kernel_BLFit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TupleMultiplicity const *__restrict__ tupleMultiplicity, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, double bField, - OutputSoAView results_view, - typename TrackerTraits::tindex_type const *__restrict__ ptkids, + OutputSoAView results_view, + typename caStructures::tindex_type const *__restrict__ ptkids, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit) const { @@ -190,7 +184,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ALPAKA_ASSERT_ACC(results_view.eta()); ALPAKA_ASSERT_ACC(results_view.chi2()); ALPAKA_ASSERT_ACC(pfast_fit); - constexpr auto invalidTkId = std::numeric_limits::max(); + + constexpr auto invalidTkId = std::numeric_limits::max(); // same as above... // look in bin for this hit multiplicity @@ -200,7 +195,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { break; auto tkid = ptkids[local_idx]; - ALPAKA_ASSERT_ACC(tkid < TrackerTraits::maxNumberOfTuples); + ALPAKA_ASSERT_ACC(int(tkid) < tupleMultiplicity->capacity()); riemannFit::Map3xNd hits(phits + local_idx); riemannFit::Map4d fast_fit(pfast_fit + local_idx); @@ -215,8 +210,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { brokenline::lineFit(acc, hits_ge, fast_fit, bField, data, line); brokenline::circleFit(acc, hits, hits_ge, fast_fit, bField, data, circle); - TracksUtilities::copyFromCircle( - results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid); + reco::copyFromCircle(results_view, circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid); results_view[tkid].pt() = float(bField) / float(std::abs(circle.par(2))); results_view[tkid].eta() = alpaka::math::asinh(acc, line.par(0)); results_view[tkid].chi2() = (circle.chi2 + line.chi2) / (2 * N - 5); @@ -246,14 +240,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { }; template - void HelixFit::launchBrokenLineKernels( - const TrackingRecHitSoAConstView &hv, - pixelCPEforDevice::ParamsOnDeviceT const *cpeParams, - uint32_t hitsInFit, - uint32_t maxNumberOfTuples, - Queue &queue) { + void HelixFit::launchBrokenLineKernels(const ::reco::TrackingRecHitConstView &hv, + const ::reco::CAModulesConstView &cm, + uint32_t hitsInFit, + uint32_t maxNumberOfTuples, + Queue &queue) { ALPAKA_ASSERT_ACC(tuples_); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Starting HelixFit::launchBrokenLineKernels" << std::endl; +#endif + uint32_t blockSize = 64; uint32_t numberOfBlocks = cms::alpakatools::divide_up_by(maxNumberOfConcurrentFits_, blockSize); const WorkDiv1D workDivTriplets = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); @@ -261,7 +259,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // Fit internals auto tkidDevice = - cms::alpakatools::make_device_buffer(queue, maxNumberOfConcurrentFits_); + cms::alpakatools::make_device_buffer(queue, maxNumberOfConcurrentFits_); auto hitsDevice = cms::alpakatools::make_device_buffer( queue, maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double)); auto hits_geDevice = cms::alpakatools::make_device_buffer( @@ -274,11 +272,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivTriplets, - Kernel_BLFastFit<3, TrackerTraits>{}, + Kernel_BLFastFit<3>{}, tuples_, tupleMultiplicity_, hv, - cpeParams, + cm, tkidDevice.data(), hitsDevice.data(), hits_geDevice.data(), @@ -286,7 +284,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { 3, 3, offset); - +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_BLFastFit(3) -> done! " << std::endl; +#endif alpaka::exec(queue, workDivTriplets, Kernel_BLFit<3, TrackerTraits>{}, @@ -297,12 +298,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_BLFit(3) -> done! " << std::endl; +#endif if (fitNas4_) { // fit all as 4 riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrack, 1>([this, &hv, - &cpeParams, + &cm, &tkidDevice, &hitsDevice, &hits_geDevice, @@ -312,11 +317,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { &workDivQuadsPenta](auto i) { alpaka::exec(queue, workDivQuadsPenta, - Kernel_BLFastFit<4, TrackerTraits>{}, + Kernel_BLFastFit<4>{}, tuples_, tupleMultiplicity_, hv, - cpeParams, + cm, tkidDevice.data(), hitsDevice.data(), hits_geDevice.data(), @@ -336,11 +341,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hits_geDevice.data(), fast_fit_resultsDevice.data()); }); - } else { riemannFit::rolling_fits<4, TrackerTraits::maxHitsOnTrackForFullFit, 1>([this, &hv, - &cpeParams, + &cm, &tkidDevice, &hitsDevice, &hits_geDevice, @@ -350,11 +354,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { &workDivQuadsPenta](auto i) { alpaka::exec(queue, workDivQuadsPenta, - Kernel_BLFastFit{}, + Kernel_BLFastFit{}, tuples_, tupleMultiplicity_, hv, - cpeParams, + cm, tkidDevice.data(), hitsDevice.data(), hits_geDevice.data(), @@ -373,6 +377,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_BLFastFit(" << i << ") and Kernel_BLFit(" << i << ") -> done! " << std::endl; +#endif }); static_assert(TrackerTraits::maxHitsOnTrackForFullFit < TrackerTraits::maxHitsOnTrack); @@ -380,11 +388,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { //Fit all the rest using the maximum from previous call alpaka::exec(queue, workDivQuadsPenta, - Kernel_BLFastFit{}, + Kernel_BLFastFit{}, tuples_, tupleMultiplicity_, hv, - cpeParams, + cm, tkidDevice.data(), hitsDevice.data(), hits_geDevice.data(), diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h index 01b32bd228a49..3c4fcf506e139 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CACell.h @@ -1,7 +1,9 @@ #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CACell_h #define RecoTracker_PixelSeeding_plugins_alpaka_CACell_h -// #define ONLY_TRIPLETS_IN_HOLE +// #define GPU_DEBUG +// #define CA_DEBUG +// #define CA_WARNINGS #include #include @@ -16,177 +18,114 @@ #include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "RecoTracker/PixelSeeding/interface/CircleEq.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" #include "CAStructures.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace ::caStructures; + template - class CACellT { + class CACell { public: - using PtrAsInt = unsigned long long; - - static constexpr auto maxCellsPerHit = TrackerTraits::maxCellsPerHit; - using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - using CellNeighbors = caStructures::CellNeighborsT; - using CellTracks = caStructures::CellTracksT; - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellTracksVector = caStructures::CellTracksVectorT; - - using HitsConstView = TrackingRecHitSoAConstView; - using hindex_type = typename TrackerTraits::hindex_type; - using tindex_type = typename TrackerTraits::tindex_type; + ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(const HitsConstView& hh, + int layerPairId, + uint8_t theInnerLayer, + uint8_t theOuterLayer, + hindex_type innerHitId, + hindex_type outerHitId) { + theInnerHitId_ = innerHitId; + theOuterHitId_ = outerHitId; + theLayerPairId_ = layerPairId; + theInnerLayer_ = theInnerLayer; + theOuterLayer_ = theOuterLayer; + theStatus_ = 0; + theFishboneId_ = invalidHitId; + + // optimization that depends on access pattern + theInnerZ_ = hh[innerHitId].zGlobal(); + theInnerR_ = hh[innerHitId].rGlobal(); + } + + using hindex_type = ::caStructures::hindex_type; + using tindex_type = ::caStructures::tindex_type; + static constexpr auto invalidHitId = std::numeric_limits::max(); using TmpTuple = cms::alpakatools::VecArray; + using HitContainer = caStructures::SequentialContainer; + using CellToCell = caStructures::GenericContainer; + using CellToTracks = caStructures::GenericContainer; + using CAPairSoAView = caStructures::CAPairSoAView; - using HitContainer = typename reco::TrackSoA::HitContainer; using Quality = ::pixelTrack::Quality; static constexpr auto bad = ::pixelTrack::Quality::bad; enum class StatusBit : uint16_t { kUsed = 1, kInTrack = 2, kKilled = 1 << 15 }; - CACellT() = default; + CACell() = default; - ALPAKA_FN_ACC ALPAKA_FN_INLINE void init(CellNeighborsVector& cellNeighbors, - CellTracksVector& cellTracks, - const HitsConstView& hh, - int layerPairId, - hindex_type innerHitId, - hindex_type outerHitId) { - theInnerHitId = innerHitId; - theOuterHitId = outerHitId; - theLayerPairId_ = layerPairId; - theStatus_ = 0; - theFishboneId = invalidHitId; + constexpr unsigned int inner_hit_id() const { return theInnerHitId_; } + constexpr unsigned int outer_hit_id() const { return theOuterHitId_; } - // optimization that depends on access pattern - theInnerZ = hh[innerHitId].zGlobal(); - theInnerR = hh[innerHitId].rGlobal(); - - // link to default empty - theOuterNeighbors = &cellNeighbors[0]; - theTracks = &cellTracks[0]; - ALPAKA_ASSERT_ACC(outerNeighbors().empty()); - ALPAKA_ASSERT_ACC(tracks().empty()); - } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addOuterNeighbor( - const TAcc& acc, typename TrackerTraits::cindex_type t, CellNeighborsVector& cellNeighbors) { - // use smart cache - if (outerNeighbors().empty()) { - auto i = cellNeighbors.extend(acc); // maybe wasted.... - if (i > 0) { - cellNeighbors[i].reset(); - alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); -#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED - theOuterNeighbors = &cellNeighbors[i]; -#else - auto zero = (PtrAsInt)(&cellNeighbors[0]); - alpaka::atomicCas(acc, - (PtrAsInt*)(&theOuterNeighbors), - zero, - (PtrAsInt)(&cellNeighbors[i]), - alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... -#endif - } else - return -1; - } - alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); - return outerNeighbors().push_back(acc, t); - } + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t innerLayer() const { return theInnerLayer_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t outerLayer() const { return theOuterLayer_; } - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) int addTrack(TAcc const& acc, - tindex_type t, - CellTracksVector& cellTracks) { - if (tracks().empty()) { - auto i = cellTracks.extend(acc); // maybe wasted.... - if (i > 0) { - cellTracks[i].reset(); - alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); -#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED - theTracks = &cellTracks[i]; -#else - auto zero = (PtrAsInt)(&cellTracks[0]); - alpaka::atomicCas(acc, - (PtrAsInt*)(&theTracks), - zero, - (PtrAsInt)(&cellTracks[i]), - alpaka::hierarchy::Blocks{}); // if fails we cannot give "i" back... + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } -#endif - } else - return -1; - } - alpaka::mem_fence(acc, alpaka::memory_scope::Grid{}); - return tracks().push_back(acc, t); - } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId_].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId_].xGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId_].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId_].yGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId_].zGlobal(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId_].rGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks& tracks() { return *theTracks; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE CellTracks const& tracks() const { return *theTracks; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors& outerNeighbors() { return *theOuterNeighbors; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE CellNeighbors const& outerNeighbors() const { return *theOuterNeighbors; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_x(const HitsConstView& hh) const { return hh[theInnerHitId].xGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_x(const HitsConstView& hh) const { return hh[theOuterHitId].xGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_y(const HitsConstView& hh) const { return hh[theInnerHitId].yGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_y(const HitsConstView& hh) const { return hh[theOuterHitId].yGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_z(const HitsConstView& hh) const { return theInnerZ; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_z(const HitsConstView& hh) const { return hh[theOuterHitId].zGlobal(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_r(const HitsConstView& hh) const { return theInnerR; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_r(const HitsConstView& hh) const { return hh[theOuterHitId].rGlobal(); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId].iphi(); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId].iphi(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto inner_iphi(const HitsConstView& hh) const { return hh[theInnerHitId_].iphi(); } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto outer_iphi(const HitsConstView& hh) const { return hh[theOuterHitId_].iphi(); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float inner_detIndex(const HitsConstView& hh) const { - return hh[theInnerHitId].detectorIndex(); + return hh[theInnerHitId_].detectorIndex(); } ALPAKA_FN_ACC ALPAKA_FN_INLINE float outer_detIndex(const HitsConstView& hh) const { - return hh[theOuterHitId].detectorIndex(); + return hh[theOuterHitId_].detectorIndex(); } - constexpr unsigned int inner_hit_id() const { return theInnerHitId; } - constexpr unsigned int outer_hit_id() const { return theOuterHitId; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId_; } + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId_ != invalidHitId; } ALPAKA_FN_ACC void print_cell() const { - printf("printing cell: on layerPair: %d, innerHitId: %d, outerHitId: %d \n", + printf("printing cell: on layerPair: %d, innerLayer: %d, outerLayer: %d, innerHitId: %d, outerHitId: %d \n", theLayerPairId_, - theInnerHitId, - theOuterHitId); + theInnerLayer_, + theOuterLayer_, + theInnerHitId_, + theOuterHitId_); } - ALPAKA_FN_ACC bool check_alignment(const HitsConstView& hh, - CACellT const& otherCell, - const float ptmin, - const float hardCurvCut, - const float caThetaCutBarrel, - const float caThetaCutForward, - const float dcaCutInnerTriplet, - const float dcaCutOuterTriplet) const { - // detIndex of the layerStart for the Phase1 Pixel Detector: - // [BPX1, BPX2, BPX3, BPX4, FP1, FP2, FP3, FN1, FN2, FN3, LAST_VALID] - // [ 0, 96, 320, 672, 1184, 1296, 1408, 1520, 1632, 1744, 1856] - auto ri = inner_r(hh); - auto zi = inner_z(hh); - - auto ro = outer_r(hh); - auto zo = outer_z(hh); - - auto r1 = otherCell.inner_r(hh); - auto z1 = otherCell.inner_z(hh); - auto isBarrel = otherCell.outer_detIndex(hh) < TrackerTraits::last_barrel_detIndex; - // TODO tune CA cuts below (theta and dca) - bool aligned = areAlignedRZ(r1, z1, ri, zi, ro, zo, ptmin, isBarrel ? caThetaCutBarrel : caThetaCutForward); - return (aligned && dcaCut(hh, - otherCell, - otherCell.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? dcaCutInnerTriplet - : dcaCutOuterTriplet, - hardCurvCut)); + ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(Acc2D const& acc, hindex_type id, float z, const HitsConstView& hh) { + // make it deterministic: use the farther apart (in z) + auto old = theFishboneId_; + while ( + old != + alpaka::atomicCas( + acc, + &theFishboneId_, + old, + (invalidHitId == old || std::abs(z - theInnerZ_) > std::abs(hh[old].zGlobal() - theInnerZ_)) ? id : old, + alpaka::hierarchy::Blocks{})) + old = theFishboneId_; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool areAlignedRZ( + ALPAKA_FN_ACC ALPAKA_FN_INLINE static bool areAlignedRZ( float r1, float z1, float ri, float zi, float ro, float zo, const float ptmin, const float thetaCut) { float radius_diff = std::abs(r1 - ro); float distance_13_squared = radius_diff * radius_diff + (z1 - zo) * (z1 - zo); @@ -199,7 +138,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } ALPAKA_FN_ACC ALPAKA_FN_INLINE bool dcaCut(const HitsConstView& hh, - CACellT const& otherCell, + CACell const& otherCell, const float region_origin_radius_plus_tolerance, const float maxCurv) const { auto x1 = otherCell.inner_x(hh); @@ -219,127 +158,115 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE __attribute__((always_inline)) static bool dcaCutH( - float x1, - float y1, - float x2, - float y2, - float x3, - float y3, - const float region_origin_radius_plus_tolerance, - const float maxCurv) { - CircleEq eq(x1, y1, x2, y2, x3, y3); - - if (std::abs(eq.curvature()) > maxCurv) - return false; - - return std::abs(eq.dca0()) < region_origin_radius_plus_tolerance * std::abs(eq.curvature()); - } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole0(const HitsConstView& hh, CACellT const& innerCell) const { - using namespace phase1PixelTopology; - - int p = innerCell.inner_iphi(hh); - if (p < 0) - p += std::numeric_limits::max(); - p = (max_ladder_bpx0 * p) / std::numeric_limits::max(); - p %= max_ladder_bpx0; - auto il = first_ladder_bpx0 + p; - auto r0 = hh.averageGeometry().ladderR[il]; - auto ri = innerCell.inner_r(hh); - auto zi = innerCell.inner_z(hh); - auto ro = outer_r(hh); - auto zo = outer_z(hh); - auto z0 = zi + (r0 - ri) * (zo - zi) / (ro - ri); - auto z_in_ladder = std::abs(z0 - hh.averageGeometry().ladderZ[il]); - auto z_in_module = z_in_ladder - module_length_bpx0 * int(z_in_ladder / module_length_bpx0); - auto gap = z_in_module < module_tolerance_bpx0 || z_in_module > (module_length_bpx0 - module_tolerance_bpx0); - return gap; - } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hole4(const HitsConstView& hh, CACellT const& innerCell) const { - using namespace phase1PixelTopology; - - int p = outer_iphi(hh); - if (p < 0) - p += std::numeric_limits::max(); - p = (max_ladder_bpx4 * p) / std::numeric_limits::max(); - p %= max_ladder_bpx4; - auto il = first_ladder_bpx4 + p; - auto r4 = hh.averageGeometry().ladderR[il]; - auto ri = innerCell.inner_r(hh); - auto zi = innerCell.inner_z(hh); - auto ro = outer_r(hh); - auto zo = outer_z(hh); - auto z4 = zo + (r4 - ro) * (zo - zi) / (ro - ri); - auto z_in_ladder = std::abs(z4 - hh.averageGeometry().ladderZ[il]); - auto z_in_module = z_in_ladder - module_length_bpx4 * int(z_in_ladder / module_length_bpx4); - auto gap = z_in_module < module_tolerance_bpx4 || z_in_module > (module_length_bpx4 - module_tolerance_bpx4); - auto holeP = z4 > hh.averageGeometry().ladderMaxZ[il] && z4 < hh.averageGeometry().endCapZ[0]; - auto holeN = z4 < hh.averageGeometry().ladderMinZ[il] && z4 > hh.averageGeometry().endCapZ[1]; - return gap || holeP || holeN; - } - // trying to free the track building process from hardcoded layers, leaving // the visit of the graph based on the neighborhood connections between cells. - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(TAcc const& acc, - const HitsConstView& hh, - CACellT* __restrict__ cells, - CellTracksVector& cellTracks, + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void find_ntuplets(Acc1D const& acc, + const ::reco::CAGraphSoAConstView& cc, + CACell* __restrict__ cells, HitContainer& foundNtuplets, + CellToCell const* __restrict__ cellNeighborsHisto, + CellToTracks* cellTracksHisto, + uint32_t* nCellTracks, + CAPairSoAView ct, cms::alpakatools::AtomicPairCounter& apc, Quality* __restrict__ quality, TmpTuple& tmpNtuplet, - const unsigned int minHitsPerNtuplet, - bool startAt0) const { + const unsigned int minHitsPerNtuplet) const { // the building process for a track ends if: // it has no right neighbor // it has no compatible neighbor // the ntuplets is then saved if the number of hits it contains is greater // than a threshold - if constexpr (DEPTH <= 0) { - printf("ERROR: CACellT::find_ntuplets reached full depth!\n"); + printf("ERROR: CACell::find_ntuplets reached full depth!\n"); ALPAKA_ASSERT_ACC(false); } else { auto doubletId = this - cells; - tmpNtuplet.push_back_unsafe(doubletId); + tmpNtuplet.push_back_unsafe(doubletId); // if we move this to be safe we could parallelize further below? ALPAKA_ASSERT_ACC(tmpNtuplet.size() <= int(TrackerTraits::maxHitsOnTrack - 3)); bool last = true; - for (unsigned int otherCell : outerNeighbors()) { + auto const* __restrict__ bin = cellNeighborsHisto->begin(doubletId); + auto nInBin = cellNeighborsHisto->size(doubletId); + + for (auto idx = 0u; idx < nInBin; idx++) { + // FIXME implement alpaka::ldg and use it here? or is it const* __restrict__ enough? + unsigned int otherCell = bin[idx]; if (cells[otherCell].isKilled()) - continue; // killed by earlyFishbone + continue; +#ifdef CA_DEBUG + printf("Doublet no. %d %d doubletId: %ld -> %d (isKilled %d) (%d,%d) -> (%d,%d) %d %d\n", + tmpNtuplet.size(), + idx, + doubletId, + otherCell, + cells[otherCell].isKilled(), + this->inner_hit_id(), + this->outer_hit_id(), + cells[otherCell].inner_hit_id(), + cells[otherCell].outer_hit_id(), + idx, + nInBin); +#endif + last = false; - cells[otherCell].template find_ntuplets( - acc, hh, cells, cellTracks, foundNtuplets, apc, quality, tmpNtuplet, minHitsPerNtuplet, startAt0); + cells[otherCell].template find_ntuplets(acc, + cc, + cells, + foundNtuplets, + cellNeighborsHisto, + cellTracksHisto, + nCellTracks, + ct, + apc, + quality, + tmpNtuplet, + minHitsPerNtuplet); } if (last) { // if long enough save... if ((unsigned int)(tmpNtuplet.size()) >= minHitsPerNtuplet - 1) { -#ifdef ONLY_TRIPLETS_IN_HOLE - // triplets accepted only pointing to the hole - if (tmpNtuplet.size() >= 3 || (startAt0 && hole4(hh, cells[tmpNtuplet[0]])) || - ((!startAt0) && hole0(hh, cells[tmpNtuplet[0]]))) -#endif { hindex_type hits[TrackerTraits::maxDepth + 2]; auto nh = 0U; constexpr int maxFB = 2; // for the time being let's limit this int nfb = 0; for (auto c : tmpNtuplet) { - hits[nh++] = cells[c].theInnerHitId; + hits[nh++] = cells[c].theInnerHitId_; if (nfb < maxFB && cells[c].hasFishbone()) { ++nfb; - hits[nh++] = cells[c].theFishboneId; // Fishbone hit is always outer than inner hit + hits[nh++] = cells[c].theFishboneId_; // Fishbone hit is always outer than inner hit } } ALPAKA_ASSERT_ACC(nh < TrackerTraits::maxHitsOnTrack); - hits[nh] = theOuterHitId; + hits[nh] = theOuterHitId_; auto it = foundNtuplets.bulkFill(acc, apc, hits, nh + 1); +#ifdef CA_DEBUG + printf("track n. %d nhits %d with cells: ", it, nh + 1); +#endif if (it >= 0) { // if negative is overflow.... - for (auto c : tmpNtuplet) - cells[c].addTrack(acc, it, cellTracks); + for (auto c : tmpNtuplet) { +#ifdef CA_DEBUG + printf("%d - ", c); +#endif + auto t_ind = alpaka::atomicAdd(acc, nCellTracks, 1u, alpaka::hierarchy::Blocks{}); + + if (t_ind >= uint32_t(ct.metadata().size())) { +#ifdef CA_WARNINGS + printf("Warning!!!! Too many cell->tracks associations (limit = %d)!\n", ct.metadata().size()); +#endif + alpaka::atomicSub(acc, nCellTracks, 1u, alpaka::hierarchy::Blocks{}); + break; + } + cellTracksHisto->count(acc, c); + + ct[t_ind].inner() = c; //cell + ct[t_ind].outer() = it; //track + } +#ifdef CA_DEBUG + printf("\n"); +#endif quality[it] = bad; // initialize to bad } } @@ -350,43 +277,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } } - // Cell status management - ALPAKA_FN_ACC ALPAKA_FN_INLINE void kill() { theStatus_ |= uint16_t(StatusBit::kKilled); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isKilled() const { return theStatus_ & uint16_t(StatusBit::kKilled); } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE int16_t layerPairId() const { return theLayerPairId_; } - - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool unused() const { return 0 == (uint16_t(StatusBit::kUsed) & theStatus_); } - ALPAKA_FN_ACC ALPAKA_FN_INLINE void setStatusBits(StatusBit mask) { theStatus_ |= uint16_t(mask); } - - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void setFishbone(TAcc const& acc, hindex_type id, float z, const HitsConstView& hh) { - // make it deterministic: use the farther apart (in z) - auto old = theFishboneId; - while (old != - alpaka::atomicCas( - acc, - &theFishboneId, - old, - (invalidHitId == old || std::abs(z - theInnerZ) > std::abs(hh[old].zGlobal() - theInnerZ)) ? id : old, - alpaka::hierarchy::Blocks{})) - old = theFishboneId; - } - ALPAKA_FN_ACC ALPAKA_FN_INLINE auto fishboneId() const { return theFishboneId; } - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool hasFishbone() const { return theFishboneId != invalidHitId; } - private: - CellNeighbors* theOuterNeighbors; - CellTracks* theTracks; - int16_t theLayerPairId_; + uint8_t theInnerLayer_; + uint8_t theOuterLayer_; uint16_t theStatus_; // tbd - float theInnerZ; - float theInnerR; - hindex_type theInnerHitId; - hindex_type theOuterHitId; - hindex_type theFishboneId; + float theInnerZ_; + float theInnerR_; + hindex_type theInnerHitId_; + hindex_type theOuterHitId_; + hindex_type theFishboneId_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h index e7e1554fa7a5b..96924f76932ec 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAFishbone.h @@ -17,108 +17,162 @@ #include "CACell.h" #include "CAStructures.h" +//#define GPU_DEBUG + namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { - template - using CellNeighbors = caStructures::CellNeighborsT; - template - using CellTracks = caStructures::CellTracksT; - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - template - using CellTracksVector = caStructures::CellTracksVectorT; - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; - template - using HitsConstView = typename CACellT::HitsConstView; + using HitToCell = caStructures::GenericContainer; + using CellToTracks = caStructures::GenericContainer; template class CAFishbone { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, - HitsConstView hh, - CACellT* cells, + ALPAKA_FN_ACC void operator()(Acc2D const& acc, + HitsConstView hh, + CACell* cells, uint32_t const* __restrict__ nCells, - OuterHitOfCell const* isOuterHitOfCellWrap, - int32_t nHits, + HitToCell const* __restrict__ outerHitHisto, + CellToTracks const* __restrict__ cellTracksHisto, + uint32_t outerHits, bool checkTrack) const { - constexpr auto maxCellsPerHit = CACellT::maxCellsPerHit; - - int32_t layer2Offset = isOuterHitOfCellWrap->offset; - // if there are no hits outside of the BPIX1, there is nothing to do - if (nHits <= layer2Offset) - return; - - auto const isOuterHitOfCell = isOuterHitOfCellWrap->container; - - float x[maxCellsPerHit], y[maxCellsPerHit], z[maxCellsPerHit], n[maxCellsPerHit]; - uint32_t cc[maxCellsPerHit]; - uint16_t d[maxCellsPerHit]; - uint8_t l[maxCellsPerHit]; - // outermost parallel loop, using all grid elements along the slower dimension (Y or 0 in a 2D grid) - for (uint32_t idy : cms::alpakatools::uniform_elements_y(acc, nHits - layer2Offset)) { - auto const& vc = isOuterHitOfCell[idy]; - auto size = vc.size(); + for (uint32_t idy : cms::alpakatools::uniform_elements_y(acc, outerHits)) { + uint32_t size = outerHitHisto->size(idy); + if (size < 2) continue; + + auto const* __restrict__ bin = outerHitHisto->begin(idy); + // if alligned kill one of the two. // in principle one could try to relax the cut (only in r-z?) for jumping-doublets - auto const& c0 = cells[vc[0]]; + auto const& c0 = cells[bin[0]]; auto xo = c0.outer_x(hh); auto yo = c0.outer_y(hh); auto zo = c0.outer_z(hh); - auto sg = 0; - for (int32_t ic = 0; ic < size; ++ic) { - auto& ci = cells[vc[ic]]; + + for (uint32_t ic : cms::alpakatools::independent_group_elements_x(acc, size)) { + unsigned int otherCell = bin[ic]; + auto& ci = cells[otherCell]; + if (ci.unused()) continue; // for triplets equivalent to next - if (checkTrack && ci.tracks().empty()) + if (checkTrack && cellTracksHisto->size(otherCell) == 0) continue; - cc[sg] = vc[ic]; - l[sg] = ci.layerPairId(); - d[sg] = ci.inner_detIndex(hh); - x[sg] = ci.inner_x(hh) - xo; - y[sg] = ci.inner_y(hh) - yo; - z[sg] = ci.inner_z(hh) - zo; - n[sg] = x[sg] * x[sg] + y[sg] * y[sg] + z[sg] * z[sg]; - ++sg; - } - if (sg < 2) - continue; - // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid) - for (uint32_t ic : cms::alpakatools::independent_group_elements_x(acc, sg - 1)) { - auto& ci = cells[cc[ic]]; - for (auto jc = ic + 1; (int)jc < sg; ++jc) { - auto& cj = cells[cc[jc]]; - // must be different detectors (in the same layer) - // if (d[ic]==d[jc]) continue; - auto cos12 = x[ic] * x[jc] + y[ic] * y[jc] + z[ic] * z[jc]; + float x1 = (ci.inner_x(hh) - xo); + float y1 = (ci.inner_y(hh) - yo); + float z1 = (ci.inner_z(hh) - zo); + float n1 = x1 * x1 + y1 * y1 + z1 * z1; + + for (auto jc = ic + 1; jc < size; ++jc) { + unsigned int nextCell = bin[jc]; + auto& cj = cells[nextCell]; + if (cj.unused()) + continue; + if (checkTrack && cellTracksHisto->size(nextCell) == 0) + continue; +#ifdef GPU_DEBUG + printf("xx = %.2f yo = %.2f zo = %.2f xi = %.2f yi = %.2f zi = %.2f xj = %.2f yj = %.2f zj = %.2f\n", + xo, + yo, + zo, + ci.inner_x(hh), + ci.inner_y(hh), + ci.inner_z(hh), + cj.inner_x(hh), + cj.inner_y(hh), + cj.inner_z(hh)); +#endif + + if (ci.inner_detIndex(hh) == cj.inner_detIndex(hh)) + continue; + + float x2 = (cj.inner_x(hh) - xo); + float y2 = (cj.inner_y(hh) - yo); + float z2 = (cj.inner_z(hh) - zo); + float n2 = x2 * x2 + y2 * y2 + z2 * z2; + + auto cos12 = x1 * x2 + y1 * y2 + z1 * z2; - if (d[ic] != d[jc] && cos12 * cos12 >= 0.99999f * (n[ic] * n[jc])) { + if (cos12 * cos12 >= 0.99999f * (n1 * n2)) { // alligned: kill farthest (prefer consecutive layers) // if same layer prefer farthest (longer level arm) and make space for intermediate hit - bool sameLayer = l[ic] == l[jc]; - if (n[ic] > n[jc]) { + bool sameLayer = int(ci.layerPairId()) == int(cj.layerPairId()); + if (n1 > n2) { if (sameLayer) { cj.kill(); // closest ci.setFishbone(acc, cj.inner_hit_id(), cj.inner_z(hh), hh); +#ifdef GPU_DEBUG + printf( + "n1>n2 lic = %d ljc = %d dic = %.2f djc = %.2f cell %d kill %d cos = %.7f n1 = %.3f n2 = %.3f " + "same\n", + int(ci.layerPairId()), + int(cj.layerPairId()), + ci.inner_detIndex(hh), + cj.inner_detIndex(hh), + bin[ic], + bin[jc], + cos12 * cos12 / (n1 * n2), + n1, + n2); +#endif } else { ci.kill(); // farthest +#ifdef GPU_DEBUG + printf( + "n1>n2 lic = %d ljc = %d dic = %.2f djc = %.2f cell %d kill %d cos = %.7f n1 = %.3f n2 = %.3f " + "diff\n", + int(ci.layerPairId()), + int(cj.layerPairId()), + ci.inner_detIndex(hh), + cj.inner_detIndex(hh), + bin[jc], + bin[ic], + cos12 * cos12 / (n1 * n2), + n1, + n2); +#endif // break; // removed to improve reproducibility, keep it for reference and tests } } else { if (!sameLayer) { cj.kill(); // farthest +#ifdef GPU_DEBUG + printf( + "n2>n1 lic = %d ljc = %d dic = %.2f djc = %.2f cell %d kill %d cos = %.7f n1 = %.3f n2 = %.3f " + "diff\n", + int(ci.layerPairId()), + int(cj.layerPairId()), + ci.inner_detIndex(hh), + cj.inner_detIndex(hh), + bin[ic], + bin[jc], + cos12 * cos12 / (n1 * n2), + n1, + n2); +#endif } else { ci.kill(); // closest cj.setFishbone(acc, ci.inner_hit_id(), ci.inner_z(hh), hh); +#ifdef GPU_DEBUG + printf( + "n2>n1 lic = %d ljc = %d dic = %.2f djc = %.2f cell %d kill %d cos = %.7f n1 = %.3f n2 = %.3f " + "same\n", + int(ci.layerPairId()), + int(cj.layerPairId()), + ci.inner_detIndex(hh), + cj.inner_detIndex(hh), + bin[jc], + bin[ic], + cos12 * cos12 / (n1 * n2), + n1, + n2); +#endif // break; // removed to improve reproducibility, keep it for reference and tests } } - } + } // cos } // cj } // ci } // hits diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc index 1055cf26b368b..59a8c19a0390f 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc @@ -1,5 +1,8 @@ #include +#include +#include "CommonTools/Utils/interface/FormulaEvaluator.h" + #include "DataFormats/TrackSoA/interface/TracksHost.h" #include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h" #include "DataFormats/TrackSoA/interface/TracksDevice.h" @@ -20,46 +23,221 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" #include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" -#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" +#include "RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometryHost.h" #include "CAHitNtupletGenerator.h" +#include "HeterogeneousCore/AlpakaCore/interface/MoveToDeviceCache.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" + +// #define GPU_DEBUG + +namespace reco { + struct CAGeometryParams { + //Constructor from ParameterSet + CAGeometryParams(edm::ParameterSet const& iConfig) + : caThetaCuts_(iConfig.getParameter>("caThetaCuts")), + caDCACuts_(iConfig.getParameter>("caDCACuts")), + pairGraph_(iConfig.getParameter>("pairGraph")), + startingPairs_(iConfig.getParameter>("startingPairs")), + phiCuts_(iConfig.getParameter>("phiCuts")), + minZ_(iConfig.getParameter>("minZ")), + maxZ_(iConfig.getParameter>("maxZ")), + maxR_(iConfig.getParameter>("maxR")) {} + + // Layers params + const std::vector caThetaCuts_; + const std::vector caDCACuts_; + + // Cells params + const std::vector pairGraph_; + const std::vector startingPairs_; + const std::vector phiCuts_; + const std::vector minZ_; + const std::vector maxZ_; + const std::vector maxR_; + + mutable edm::ESGetToken tokenGeometry_; + mutable edm::ESGetToken tokenTopology_; + }; + +} // namespace reco + namespace ALPAKA_ACCELERATOR_NAMESPACE { + template - class CAHitNtupletAlpaka : public stream::EDProducer<> { - using HitsConstView = TrackingRecHitSoAConstView; - using HitsOnDevice = TrackingRecHitsSoACollection; - using HitsOnHost = TrackingRecHitHost; + class CAHitNtupletAlpaka + : public stream::EDProducer, + edm::RunCache>> { + using HitsConstView = ::reco::TrackingRecHitConstView; + using HitsOnDevice = reco::TrackingRecHitsSoACollection; + using HitsOnHost = ::reco::TrackingRecHitHost; - using TkSoAHost = TracksHost; - using TkSoADevice = TracksSoACollection; + using TkSoAHost = ::reco::TracksHost; + using TkSoADevice = reco::TracksSoACollection; using Algo = CAHitNtupletGenerator; + using CAGeometryCache = cms::alpakatools::MoveToDeviceCache; + using Rotation = SOARotation; + using Frame = SOAFrame; + public: - explicit CAHitNtupletAlpaka(const edm::ParameterSet& iConfig); + explicit CAHitNtupletAlpaka(const edm::ParameterSet& iConfig, const ::reco::CAGeometryParams* iCache); ~CAHitNtupletAlpaka() override = default; + void produce(device::Event& iEvent, const device::EventSetup& es) override; + + static void globalEndJob(::reco::CAGeometryParams const*) { /* Do nothing */ }; + static void globalEndRun(edm::Run const& iRun, + edm::EventSetup const&, + RunContext const* iContext) { /* Do nothing */ }; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + static std::shared_ptr globalBeginRun(edm::Run const& iRun, + edm::EventSetup const& iSetup, + GlobalCache const* iCache) { + assert(iCache->minZ_.size() == iCache->maxZ_.size()); + assert(iCache->minZ_.size() == iCache->maxR_.size()); + assert(iCache->minZ_.size() == iCache->phiCuts_.size()); + + assert(iCache->caThetaCuts_.size() == iCache->caDCACuts_.size()); + + int n_layers = iCache->caThetaCuts_.size(); + int n_pairs = iCache->pairGraph_.size() / 2; + int n_modules = 0; + +#ifdef GPU_DEBUG + std::cout << "No. Layers to be used = " << n_layers << std::endl; + std::cout << "No. Pairs to be used = " << n_pairs << std::endl; +#endif + + assert(int(n_pairs) == int(iCache->minZ_.size())); + assert(int(*std::max_element(iCache->startingPairs_.begin(), iCache->startingPairs_.end())) <= n_pairs); + assert(int(*std::max_element(iCache->pairGraph_.begin(), iCache->pairGraph_.end())) < n_layers); + + const auto& trackerGeometry = iSetup.getData(iCache->tokenGeometry_); + const auto& trackerTopology = iSetup.getData(iCache->tokenTopology_); + auto const& dets = trackerGeometry.dets(); + +#ifdef GPU_DEBUG + auto subSystem = 1; + auto subSystemName = GeomDetEnumerators::tkDetEnum[subSystem]; + auto subSystemOffset = trackerGeometry.offsetDU(subSystemName); + std::cout + << "=========================================================================================================" + << std::endl; + std::cout << " ===================== Subsystem: " << subSystemName << std::endl; + subSystemName = GeomDetEnumerators::tkDetEnum[++subSystem]; + subSystemOffset = trackerGeometry.offsetDU(subSystemName); +#endif + + auto oldLayer = 0u; + auto layerCount = 0; + + std::vector layerStarts(n_layers + 1); + //^ why n_layers + 1? This is a cumulative sum of the number + // of modules each layer has. And we need the extra spot + // at the end to hold the total number of modules. + + for (auto& det : dets) { + DetId detid = det->geographicalId(); +#ifdef GPU_DEBUG + if (n_modules >= int(subSystemOffset)) { + subSystemName = GeomDetEnumerators::tkDetEnum[++subSystem]; + subSystemOffset = trackerGeometry.offsetDU(subSystemName); + std::cout << " ===================== Subsystem: " << subSystemName << std::endl; + } +#endif + + auto layer = trackerTopology.layer(detid); + + if (layer != oldLayer) { + layerStarts[layerCount++] = n_modules; + + if (layerCount > n_layers + 1) + break; + + oldLayer = layer; +#ifdef GPU_DEBUG + std::cout << " > New layer at module : " << n_modules << " (detId: " << detid << ")" << std::endl; +#endif + } + + n_modules++; + } + + reco::CAGeometryHost product{{{n_layers + 1, n_pairs, n_modules}}, cms::alpakatools::host()}; + + auto layerSoA = product.view(); + auto cellSoA = product.view<::reco::CAGraphSoA>(); + auto modulesSoA = product.view<::reco::CAModulesSoA>(); + + for (int i = 0; i < n_modules; ++i) { + auto det = dets[i]; + auto vv = det->surface().position(); + auto rr = Rotation(det->surface().rotation()); + modulesSoA[i].detFrame() = Frame(vv.x(), vv.y(), vv.z(), rr); + } + + for (int i = 0; i < n_layers; ++i) { + layerSoA.layerStarts()[i] = layerStarts[i]; + layerSoA.caThetaCut()[i] = iCache->caThetaCuts_[i]; + layerSoA.caDCACut()[i] = iCache->caDCACuts_[i]; + } + + layerSoA.layerStarts()[n_layers] = layerStarts[n_layers]; + + for (int i = 0; i < n_pairs; ++i) { + cellSoA.graph()[i] = {{uint32_t(iCache->pairGraph_[2 * i]), uint32_t(iCache->pairGraph_[2 * i + 1])}}; + cellSoA.phiCuts()[i] = iCache->phiCuts_[i]; + cellSoA.minz()[i] = iCache->minZ_[i]; + cellSoA.maxz()[i] = iCache->maxZ_[i]; + cellSoA.maxr()[i] = iCache->maxR_[i]; + cellSoA.startingPair()[i] = false; + } + + for (const unsigned int& i : iCache->startingPairs_) + cellSoA.startingPair()[i] = true; + + return std::make_shared(std::move(product)); + } + + static std::unique_ptr<::reco::CAGeometryParams> initializeGlobalCache(edm::ParameterSet const& iConfig) { + return std::make_unique<::reco::CAGeometryParams>(iConfig.getParameterSet("geometry")); + } + private: const edm::ESGetToken tokenField_; - const device::ESGetToken, PixelCPEFastParamsRecord> cpeToken_; const device::EDGetToken tokenHit_; const device::EDPutToken tokenTrack_; + const ::reco::FormulaEvaluator maxNumberOfDoublets_; + const ::reco::FormulaEvaluator maxNumberOfTuples_; + Algo deviceAlgo_; }; template - CAHitNtupletAlpaka::CAHitNtupletAlpaka(const edm::ParameterSet& iConfig) + CAHitNtupletAlpaka::CAHitNtupletAlpaka(const edm::ParameterSet& iConfig, + const ::reco::CAGeometryParams* iCache) : EDProducer(iConfig), tokenField_(esConsumes()), - cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), tokenHit_(consumes(iConfig.getParameter("pixelRecHitSrc"))), tokenTrack_(produces()), - deviceAlgo_(iConfig) {} + maxNumberOfDoublets_(iConfig.getParameter("maxNumberOfDoublets")), + maxNumberOfTuples_(iConfig.getParameter("maxNumberOfTuples")), + deviceAlgo_(iConfig) { + iCache->tokenGeometry_ = esConsumes(); + iCache->tokenTopology_ = esConsumes(); + } template void CAHitNtupletAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -67,10 +245,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); - std::string cpe = "PixelCPEFastParams"; - cpe += TrackerTraits::nameModifier; - desc.add("CPE", cpe); - Algo::fillPSetDescription(desc); descriptions.addWithDefaultLabel(desc); } @@ -79,11 +253,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void CAHitNtupletAlpaka::produce(device::Event& iEvent, const device::EventSetup& es) { auto bf = 1. / es.getData(tokenField_).inverseBzAtOriginInGeV(); - auto& fcpe = es.getData(cpeToken_); - + auto const& geometry = runCache()->get(iEvent.queue()); auto const& hits = iEvent.get(tokenHit_); - iEvent.emplace(tokenTrack_, deviceAlgo_.makeTuplesAsync(hits, fcpe.const_buffer().data(), bf, iEvent.queue())); + std::array nHitsV = {{double(hits.nHits())}}; + std::array emptyV; + + uint32_t const maxTuples = maxNumberOfTuples_.evaluate(nHitsV, emptyV); + uint32_t const maxDoublets = maxNumberOfDoublets_.evaluate(nHitsV, emptyV); + + iEvent.emplace(tokenTrack_, + deviceAlgo_.makeTuplesAsync(hits, geometry, bf, maxDoublets, maxTuples, iEvent.queue())); } using CAHitNtupletAlpakaPhase1 = CAHitNtupletAlpaka; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc index 326fa9f73dd1b..4e205ba95f6b3 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc @@ -1,5 +1,5 @@ -//#define GPU_DEBUG -//#define DUMP_GPU_TK_TUPLES +// #define GPU_DEBUG +// #define DUMP_GPU_TK_TUPLES #include #include @@ -26,6 +26,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace caHitNtupletGenerator; using namespace caPixelDoublets; + using namespace caStructures; using namespace pixelTopology; using namespace pixelTrack; @@ -36,16 +37,58 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { //Common Params void fillDescriptionsCommon(edm::ParameterSetDescription& desc) { - // 87 cm/GeV = 1/(3.8T * 0.3) - // take less than radius given by the hardPtCut and reject everything below - // auto hardCurvCut = 1.f/(0.35 * 87.f); + desc.add("cellZ0Cut", 12.0f)->setComment("Z0 cut for cells"); + desc.add("cellPtCut", 0.5f)->setComment("Preliminary pT cut at cell building level."); + + //// Pixel Cluster Cuts (@cell level) + desc.add("dzdrFact", 8.0f * 0.0285f / 0.015f); + desc.add("minYsizeB1", 1) + ->setComment("Cut on inner hit cluster size (in Y) for barrel-forward cells. Barrel 1 cut."); + desc.add("minYsizeB2", 1) + ->setComment("Cut on inner hit cluster size (in Y) for barrel-forward cells. Barrel 2 cut."); + desc.add("maxDYsize12", 28) + ->setComment("Cut on cluster size differences (in Y) for barrel-forward cells. Barrel 1-2 cells."); + desc.add("maxDYsize", 20) + ->setComment("Cut on cluster size differences (in Y) for barrel-forward cells. Other barrel cells."); + desc.add("maxDYPred", 20) + ->setComment("Cut on cluster size differences (in Y) for barrel-forward cells. Barrel-forward cells."); + + // Container sizes + // + // maxNumberOfDoublets and maxNumberOfTuples may be defined at runtime depending on the number of hits. + // This is done via a FormulaEvaluator expecting 'x' as nHits. + // e.g. : maxNumberOfDoublets = cms.string( '0.00022*pow(x,2) + 0.53*x + 10000' ) + // will compute maxNumberOfDoublets for each event as + // + // maxNumberOfDoublets = 2.2e-4 * nHits^2 + 0.53 * nHits + 10000 + // + // this may also be simply a constant (as for the default parameters) + // + // maxNumberOfDoublets = cms.string(str(512*1024)) + // + + desc.add("maxNumberOfDoublets", std::to_string(pixelTopology::Phase1::maxNumberOfDoublets)) + ->setComment( + "Max nummber of doublets (cells) as a string. The string will be parsed to a TFormula, depending on " + "nHits (labeled 'x'), \n and evaluated for each event. May also be a constant."); + desc.add("maxNumberOfTuples", std::to_string(pixelTopology::Phase1::maxNumberOfTuples)) + ->setComment("Max nummber of tuples as a string. Same behavior as maxNumberOfDoublets."); + desc.add("avgHitsPerTrack", 5.0f)->setComment("Number of hits per track. Average per track."); + desc.add("avgCellsPerHit", 25.0f) + ->setComment("Number of cells for which an hit is the outer hit. Average per hit."); + desc.add("avgCellsPerCell", 2.0f) + ->setComment("Number of cells connected to another cell. Average per cell."); + desc.add("avgTracksPerCell", 1.0f) + ->setComment("Number of tracks to which a cell belongs. Average per cell."); + + // nTuplet Cuts and Params desc.add("ptmin", 0.9f)->setComment("Cut on minimum pt"); - desc.add("CAThetaCutBarrel", 0.002f)->setComment("Cut on RZ alignement for Barrel"); - desc.add("CAThetaCutForward", 0.003f)->setComment("Cut on RZ alignment for Forward"); + //// p [GeV/c] = B [T] * R [m] * 0.3 (factor from conversion from J to GeV and q = e = 1.6 * 10e-19 C) + //// 87 cm/GeV = 1/(3.8T * 0.3) + //// take less than radius given by the hardPtCut and reject everything below desc.add("hardCurvCut", 1.f / (0.35 * 87.f)) ->setComment("Cut on minimum curvature, used in DCA ntuplet selection"); - desc.add("dcaCutInnerTriplet", 0.15f)->setComment("Cut on origin radius when the inner hit is on BPix1"); - desc.add("dcaCutOuterTriplet", 0.25f)->setComment("Cut on origin radius when the outer hit is on BPix1"); + desc.add("earlyFishbone", true); desc.add("lateFishbone", false); desc.add("fillStatistics", false); @@ -54,9 +97,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { ->setComment("Maximum number of hits in a tuple to clean also if the shared hit is on bpx1"); desc.add("fitNas4", false)->setComment("fit only 4 hits out of N"); - desc.add("doClusterCut", true); - desc.add("doZ0Cut", true); - desc.add("doPtCut", true); + desc.add("useRiemannFit", false)->setComment("true for Riemann, false for BrokenLine"); desc.add("doSharedHitCut", true)->setComment("Sharing hit nTuples cleaning"); desc.add("dupPassThrough", false)->setComment("Do not reject duplicate"); @@ -64,16 +105,39 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } AlgoParams makeCommonParams(edm::ParameterSet const& cfg) { - return AlgoParams({cfg.getParameter("minHitsForSharingCut"), - cfg.getParameter("useRiemannFit"), - cfg.getParameter("fitNas4"), - cfg.getParameter("includeJumpingForwardDoublets"), - cfg.getParameter("earlyFishbone"), - cfg.getParameter("lateFishbone"), - cfg.getParameter("fillStatistics"), - cfg.getParameter("doSharedHitCut"), - cfg.getParameter("dupPassThrough"), - cfg.getParameter("useSimpleTripletCleaner")}); + return AlgoParams({ + + // Container sizes + (float)cfg.getParameter("avgHitsPerTrack"), + (float)cfg.getParameter("avgCellsPerHit"), + (float)cfg.getParameter("avgCellsPerCell"), + (float)cfg.getParameter("avgTracksPerCell"), + + // Algo params + (uint16_t)cfg.getParameter("minHitsPerNtuplet"), + (uint16_t)cfg.getParameter("minHitsForSharingCut"), + (float)cfg.getParameter("ptmin"), + (float)cfg.getParameter("hardCurvCut"), + (float)cfg.getParameter("cellZ0Cut"), + (float)cfg.getParameter("cellPtCut"), + + // Pixel Cluster Cut Params + (float)cfg.getParameter("dzdrFact"), + (int16_t)cfg.getParameter("minYsizeB1"), + (int16_t)cfg.getParameter("minYsizeB2"), + (int16_t)cfg.getParameter("maxDYsize12"), + (int16_t)cfg.getParameter("maxDYsize"), + (int16_t)cfg.getParameter("maxDYPred"), + + // Flags + cfg.getParameter("useRiemannFit"), + cfg.getParameter("fitNas4"), + cfg.getParameter("earlyFishbone"), + cfg.getParameter("lateFishbone"), + cfg.getParameter("fillStatistics"), + cfg.getParameter("doSharedHitCut"), + cfg.getParameter("dupPassThrough"), + cfg.getParameter("useSimpleTripletCleaner")}); } //This is needed to have the partial specialization for isPhase1Topology/isPhase2Topology @@ -82,17 +146,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template struct TopologyCuts> { - static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { - return CAParamsT{{cfg.getParameter("maxNumberOfDoublets"), - cfg.getParameter("minHitsPerNtuplet"), - (float)cfg.getParameter("ptmin"), - (float)cfg.getParameter("CAThetaCutBarrel"), - (float)cfg.getParameter("CAThetaCutForward"), - (float)cfg.getParameter("hardCurvCut"), - (float)cfg.getParameter("dcaCutInnerTriplet"), - (float)cfg.getParameter("dcaCutOuterTriplet")}}; - }; - static constexpr ::pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { auto coeff = pset.getParameter>("chi2Coeff"); auto ptMax = pset.getParameter("chi2MaxPt"); @@ -117,18 +170,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template struct TopologyCuts> { - static constexpr CAParamsT makeCACuts(edm::ParameterSet const& cfg) { - return CAParamsT{{cfg.getParameter("maxNumberOfDoublets"), - cfg.getParameter("minHitsPerNtuplet"), - (float)cfg.getParameter("ptmin"), - (float)cfg.getParameter("CAThetaCutBarrel"), - (float)cfg.getParameter("CAThetaCutForward"), - (float)cfg.getParameter("hardCurvCut"), - (float)cfg.getParameter("dcaCutInnerTriplet"), - (float)cfg.getParameter("dcaCutOuterTriplet")}, - {(bool)cfg.getParameter("includeFarForwards")}}; - } - static constexpr ::pixelTrack::QualityCutsT makeQualityCuts(edm::ParameterSet const& pset) { return ::pixelTrack::QualityCutsT{ static_cast(pset.getParameter("maxChi2")), @@ -139,22 +180,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } }; - //Cell Cuts, as they are the cuts have the same logic for Phase2 and Phase1 - //keeping them separate would allow further differentiation in the future - //moving them to TopologyCuts and using the same syntax - template - CellCutsT makeCellCuts(edm::ParameterSet const& cfg) { - return CellCutsT{cfg.getParameter("doClusterCut"), - cfg.getParameter("doZ0Cut"), - cfg.getParameter("doPtCut"), - cfg.getParameter("idealConditions"), - (float)cfg.getParameter("cellZ0Cut"), - (float)cfg.getParameter("cellPtCut"), - cfg.getParameter("minYsizeB1"), - cfg.getParameter("minYsizeB2"), - cfg.getParameter>("phiCuts")}; - } - } // namespace using namespace std; @@ -162,9 +187,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template CAHitNtupletGenerator::CAHitNtupletGenerator(const edm::ParameterSet& cfg) : m_params(makeCommonParams(cfg), - makeCellCuts(cfg), - TopologyCuts::makeQualityCuts(cfg.getParameterSet("trackQualityCuts")), - TopologyCuts::makeCACuts(cfg)) { + TopologyCuts::makeQualityCuts(cfg.getParameterSet("trackQualityCuts"))) { #ifdef DUMP_GPU_TK_TUPLES printf("TK: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", "tid", @@ -198,12 +221,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { fillDescriptionsCommon(desc); - desc.add("maxNumberOfDoublets", pixelTopology::Phase1::maxNumberOfDoublets); - desc.add("idealConditions", true); - desc.add("includeJumpingForwardDoublets", false); - desc.add("cellZ0Cut", 12.0); - desc.add("cellPtCut", 0.5); - edm::ParameterSetDescription trackQualityCuts; trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); @@ -223,25 +240,58 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { "\"region " "cuts\" based on the fit results (pT, Tip, Zip)."); - desc.add("minYsizeB1", 1)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 1)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( + edm::ParameterSetDescription geometryParams; + using namespace phase1PixelTopology; + // layers params + geometryParams + .add>("caDCACuts", + std::vector(std::begin(dcaCuts), std::begin(dcaCuts) + numberOfLayers)) + ->setComment("Cut on RZ alignement. One per layer, the layer being the middle one for a triplet."); + geometryParams + .add>("caThetaCuts", + std::vector(std::begin(thetaCuts), std::begin(thetaCuts) + numberOfLayers)) + ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); + geometryParams.add>("startingPairs", {0u, 1u, 2u}) + ->setComment( + "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression + // cells params + geometryParams + .add>( + "pairGraph", + std::vector(std::begin(layerPairs), + std::begin(layerPairs) + (pixelTopology::Phase1::nPairsForQuadruplets * 2))) + ->setComment("CA graph"); + geometryParams + .add>( "phiCuts", - std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) + std::vector(std::begin(phicuts), std::begin(phicuts) + pixelTopology::Phase1::nPairsForQuadruplets)) ->setComment("Cuts in phi for cells"); + geometryParams + .add>( + "minZ", + std::vector(std::begin(minz), std::begin(minz) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in min z (on inner hit) for cells"); + geometryParams + .add>( + "maxZ", + std::vector(std::begin(maxz), std::begin(maxz) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in max z (on inner hit) for cells"); + geometryParams + .add>( + "maxR", + std::vector(std::begin(maxr), std::begin(maxr) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in max r for cells"); + + desc.add("geometry", geometryParams) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); } template <> void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { fillDescriptionsCommon(desc); - desc.add("maxNumberOfDoublets", pixelTopology::HIonPhase1::maxNumberOfDoublets); - desc.add("idealConditions", false); - desc.add("includeJumpingForwardDoublets", false); - desc.add("cellZ0Cut", 10.0); - desc.add("cellPtCut", 0.0); - edm::ParameterSetDescription trackQualityCuts; trackQualityCuts.add("chi2MaxPt", 10.)->setComment("max pT used to determine the pT-dependent chi2 cut"); trackQualityCuts.add>("chi2Coeff", {0.9, 1.8})->setComment("chi2 at 1GeV and at ptMax above"); @@ -262,26 +312,61 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { "\"region " "cuts\" based on the fit results (pT, Tip, Zip)."); - desc.add("minYsizeB1", 36)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 28)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( - "phiCuts", - std::vector(std::begin(phase1PixelTopology::phicuts), std::end(phase1PixelTopology::phicuts))) + edm::ParameterSetDescription geometryParams; + using namespace phase1PixelTopology; + // layers params + geometryParams + .add>("caDCACuts", + std::vector(std::begin(phase1HIonPixelTopology::dcaCuts), + std::begin(phase1HIonPixelTopology::dcaCuts) + numberOfLayers)) + ->setComment("Cut on RZ alignement. One per layer, the layer being the middle one for a triplet."); + geometryParams + .add>("caThetaCuts", + std::vector(std::begin(phase1HIonPixelTopology::thetaCuts), + std::begin(phase1HIonPixelTopology::thetaCuts) + numberOfLayers)) + ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); + geometryParams.add>("startingPairs", {0u, 1u, 2u}) + ->setComment( + "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression + // cells params + geometryParams + .add>( + "pairGraph", + std::vector(std::begin(layerPairs), + std::begin(layerPairs) + (pixelTopology::Phase1::nPairsForQuadruplets * 2))) + ->setComment("CA graph"); + geometryParams + .add>("phiCuts", + std::vector(std::begin(phase1HIonPixelTopology::phicuts), + std::begin(phase1HIonPixelTopology::phicuts) + + pixelTopology::Phase1::nPairsForQuadruplets)) ->setComment("Cuts in phi for cells"); + geometryParams + .add>( + "minZ", + std::vector(std::begin(minz), std::begin(minz) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in min z (on inner hit) for cells"); + geometryParams + .add>( + "maxZ", + std::vector(std::begin(maxz), std::begin(maxz) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in max z (on inner hit) for cells"); + geometryParams + .add>( + "maxR", + std::vector(std::begin(maxr), std::begin(maxr) + pixelTopology::Phase1::nPairsForQuadruplets)) + ->setComment("Cuts in max r for cells"); + + desc.add("geometry", geometryParams) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); } template <> void CAHitNtupletGenerator::fillPSetDescription(edm::ParameterSetDescription& desc) { fillDescriptionsCommon(desc); - desc.add("maxNumberOfDoublets", pixelTopology::Phase2::maxNumberOfDoublets); - desc.add("idealConditions", false); - desc.add("includeFarForwards", true); - desc.add("includeJumpingForwardDoublets", true); - desc.add("cellZ0Cut", 7.5); - desc.add("cellPtCut", 0.85); - edm::ParameterSetDescription trackQualityCuts; trackQualityCuts.add("maxChi2", 5.)->setComment("Max normalized chi2"); trackQualityCuts.add("minPt", 0.5)->setComment("Min pT in GeV"); @@ -292,23 +377,59 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " "Zip)."); - desc.add("minYsizeB1", 25)->setComment("Min Y cluster size in pixel B1"); - desc.add("minYsizeB2", 15)->setComment("Min Y cluster size in pixel B2"); - - desc.add>( - "phiCuts", - std::vector(std::begin(phase2PixelTopology::phicuts), std::end(phase2PixelTopology::phicuts))) + edm::ParameterSetDescription geometryParams; + using namespace phase2PixelTopology; + // layers params + geometryParams + .add>("caDCACuts", + std::vector(std::begin(dcaCuts), std::begin(dcaCuts) + numberOfLayers)) + ->setComment("Cut on RZ alignement. One per layer, the layer being the middle one for a triplet."); + geometryParams + .add>("caThetaCuts", + std::vector(std::begin(thetaCuts), std::begin(thetaCuts) + numberOfLayers)) + ->setComment("Cut on origin radius. One per layer, the layer being the innermost one for a triplet."); + geometryParams + .add>("startingPairs", + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}) + ->setComment( + "The list of the ids of pairs from which the CA ntuplets building may start."); //TODO could be parsed via an expression + // cells params + geometryParams + .add>( + "pairGraph", std::vector(std::begin(layerPairs), std::begin(layerPairs) + (nPairs * 2))) + ->setComment("CA graph"); + geometryParams + .add>("phiCuts", std::vector(std::begin(phicuts), std::begin(phicuts) + nPairs)) ->setComment("Cuts in phi for cells"); + geometryParams.add>("minZ", std::vector(std::begin(minz), std::begin(minz) + nPairs)) + ->setComment("Cuts in min z (on inner hit) for cells"); + geometryParams.add>("maxZ", std::vector(std::begin(maxz), std::begin(maxz) + nPairs)) + ->setComment("Cuts in max z (on inner hit) for cells"); + geometryParams.add>("maxR", std::vector(std::begin(maxr), std::begin(maxr) + nPairs)) + ->setComment("Cuts in max r for cells"); + + desc.add("geometry", geometryParams) + ->setComment( + "Quality cuts based on the results of the track fit:\n - apply cuts based on the fit results (pT, Tip, " + "Zip)."); } template - TracksSoACollection CAHitNtupletGenerator::makeTuplesAsync( - HitsOnDevice const& hits_d, ParamsOnDevice const* cpeParams, float bfield, Queue& queue) const { + reco::TracksSoACollection CAHitNtupletGenerator::makeTuplesAsync(HitsOnDevice const& hits_d, + CAGeometryOnDevice const& geometry_d, + float bfield, + uint32_t nDoublets, + uint32_t nTracks, + Queue& queue) const { using HelixFit = HelixFit; - using TrackSoA = TracksSoACollection; using GPUKernels = CAHitNtupletGeneratorKernels; + using TrackHitSoA = ::reco::TrackHitSoA; + using HitContainer = caStructures::HitContainerT; - TrackSoA tracks(queue); + const int32_t H = m_params.algoParams_.avgHitsPerTrack_; + + reco::TracksSoACollection tracks({{int(nTracks), int(nTracks * H)}}, queue); // Don't bother if less than 2 this if (hits_d.view().metadata().size() < 2) { @@ -317,19 +438,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::memset(queue, ntracks_d, 0); return tracks; } - GPUKernels kernels(m_params, hits_d.view().metadata().size(), hits_d.offsetBPIX2(), queue); - - kernels.buildDoublets(hits_d.view(), hits_d.offsetBPIX2(), queue); - kernels.launchKernels(hits_d.view(), hits_d.offsetBPIX2(), tracks.view(), queue); - - HelixFit fitter(bfield, m_params.fitNas4_); - fitter.allocate(kernels.tupleMultiplicity(), tracks.view()); - if (m_params.useRiemannFit_) { - fitter.launchRiemannKernels( - hits_d.view(), cpeParams, hits_d.view().metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); + GPUKernels kernels( + m_params, hits_d.nHits(), hits_d.offsetBPIX2(), nDoublets, nTracks, geometry_d.view().metadata().size(), queue); + + kernels.prepareHits(hits_d.view(), hits_d.view<::reco::HitModuleSoA>(), geometry_d.view(), queue); + kernels.buildDoublets(hits_d.view(), + geometry_d.view<::reco::CAGraphSoA>(), + geometry_d.view<::reco::CALayersSoA>(), + hits_d.offsetBPIX2(), + queue); + kernels.launchKernels(hits_d.view(), + hits_d.offsetBPIX2(), + geometry_d.view().metadata().size(), + tracks.view(), + tracks.view(), + geometry_d.view<::reco::CALayersSoA>(), + geometry_d.view<::reco::CAGraphSoA>(), + queue); + + HelixFit fitter(bfield, m_params.algoParams_.fitNas4_); + fitter.allocate(kernels.tupleMultiplicity(), tracks.view(), kernels.hitContainer()); + if (m_params.algoParams_.useRiemannFit_) { + fitter.launchRiemannKernels(hits_d.view(), + geometry_d.view<::reco::CAModulesSoA>(), + hits_d.view().metadata().size(), + TrackerTraits::maxNumberOfQuadruplets, + queue); } else { - fitter.launchBrokenLineKernels( - hits_d.view(), cpeParams, hits_d.view().metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); + fitter.launchBrokenLineKernels(hits_d.view(), + geometry_d.view<::reco::CAModulesSoA>(), + hits_d.view().metadata().size(), + TrackerTraits::maxNumberOfQuadruplets, + queue); } kernels.classifyTuples(hits_d.view(), tracks.view(), queue); #ifdef GPU_DEBUG diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h index ec3273a89dee6..efffa56aaddc6 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.h @@ -6,13 +6,15 @@ #include "DataFormats/SiPixelDetId/interface/PixelSubdetector.h" #include "DataFormats/TrackSoA/interface/TrackDefinitions.h" #include "DataFormats/TrackSoA/interface/alpaka/TracksSoACollection.h" +#include "DataFormats/TrackSoA/interface/TracksHost.h" +#include "DataFormats/TrackSoA/interface/TracksDevice.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" #include "FWCore/Framework/interface/EventSetup.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h" #include "CACell.h" #include "CAHitNtupletGeneratorKernels.h" @@ -27,32 +29,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class CAHitNtupletGenerator { public: - using HitsView = TrackingRecHitSoAView; - using HitsConstView = TrackingRecHitSoAConstView; - using HitsOnDevice = TrackingRecHitsSoACollection; - using HitsOnHost = TrackingRecHitHost; - using hindex_type = typename TrackingRecHitSoA::hindex_type; - - using HitToTuple = caStructures::HitToTupleT; - using TupleMultiplicity = caStructures::TupleMultiplicityT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - - using CACell = CACellT; - using TkSoAHost = TracksHost; - using TkSoADevice = TracksSoACollection; - using HitContainer = typename reco::TrackSoA::HitContainer; - using Tuple = HitContainer; - - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellTracksVector = caStructures::CellTracksVectorT; + using HitsView = ::reco::TrackingRecHitView; + using HitsConstView = ::reco::TrackingRecHitConstView; + using HitsOnDevice = reco::TrackingRecHitsSoACollection; + using HitsOnHost = ::reco::TrackingRecHitHost; + using TkSoADevice = reco::TracksSoACollection; using Quality = ::pixelTrack::Quality; using QualityCuts = ::pixelTrack::QualityCutsT; using Params = caHitNtupletGenerator::ParamsT; using Counters = caHitNtupletGenerator::Counters; - using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + using CAGeometryOnDevice = reco::CAGeometrySoACollection; public: CAHitNtupletGenerator(const edm::ParameterSet& cfg); @@ -68,17 +57,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // void endJob(); TkSoADevice makeTuplesAsync(HitsOnDevice const& hits_d, - ParamsOnDevice const* cpeParams, + CAGeometryOnDevice const& params_d, float bfield, + uint32_t maxDoublets, + uint32_t maxTuples, Queue& queue) const; private: - void buildDoublets(const HitsConstView& hh, Queue& queue) const; - - void hitNtuplets(const HitsConstView& hh, const edm::EventSetup& es, bool useRiemannFit, Queue& queue); - - void launchKernels(const HitsConstView& hh, bool useRiemannFit, Queue& queue) const; - Params m_params; }; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 2d778f5e6e9de..c7558009fe5b2 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -17,97 +17,215 @@ #include "CAHitNtupletGeneratorKernels.h" #include "CAHitNtupletGeneratorKernelsImpl.h" -//#define GPU_DEBUG -//#define NTUPLE_DEBUG +// #define GPU_DEBUG +// #define NTUPLE_DEBUG +// #define CA_STATS namespace ALPAKA_ACCELERATOR_NAMESPACE { template CAHitNtupletGeneratorKernels::CAHitNtupletGeneratorKernels(Params const ¶ms, - uint32_t nhits, + uint32_t nHits, uint32_t offsetBPIX2, + uint32_t maxDoublets, + uint32_t maxTuples, + uint16_t nLayers, Queue &queue) - : m_params(params), - ////////////////////////////////////////////////////////// - // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) - ////////////////////////////////////////////////////////// - counters_{cms::alpakatools::make_device_buffer(queue)}, - - // workspace - device_hitToTuple_{cms::alpakatools::make_device_buffer(queue)}, - device_hitToTupleStorage_{ - cms::alpakatools::make_device_buffer(queue, nhits + 1)}, - device_tupleMultiplicity_{cms::alpakatools::make_device_buffer(queue)}, - - // NB: In legacy, device_theCells_ and device_isOuterHitOfCell_ were allocated inside buildDoublets - device_theCells_{ - cms::alpakatools::make_device_buffer(queue, m_params.caParams_.maxNumberOfDoublets_)}, - // in principle we can use "nhits" to heuristically dimension the workspace... - device_isOuterHitOfCell_{ - cms::alpakatools::make_device_buffer(queue, std::max(1u, nhits - offsetBPIX2))}, - isOuterHitOfCell_{cms::alpakatools::make_device_buffer(queue)}, - - device_theCellNeighbors_{cms::alpakatools::make_device_buffer(queue)}, - device_theCellTracks_{cms::alpakatools::make_device_buffer(queue)}, - // NB: In legacy, cellStorage_ was allocated inside buildDoublets - cellStorage_{cms::alpakatools::make_device_buffer( - queue, - TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors) + - TrackerTraits::maxNumOfActiveDoublets * sizeof(CellTracks))}, - device_cellCuts_{cms::alpakatools::make_device_buffer(queue)}, - device_theCellNeighborsContainer_{reinterpret_cast(cellStorage_.data())}, - device_theCellTracksContainer_{reinterpret_cast( - cellStorage_.data() + TrackerTraits::maxNumOfActiveDoublets * sizeof(CellNeighbors))}, - - // NB: In legacy, device_storage_ was allocated inside allocateOnGPU - device_storage_{ - cms::alpakatools::make_device_buffer(queue, 3u)}, - device_hitTuple_apc_{reinterpret_cast(device_storage_.data())}, - device_hitToTuple_apc_{reinterpret_cast(device_storage_.data() + 1)}, - device_nCells_{ - cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_storage_.data() + 2))} { + : m_params(params) { + ////////////////////////////////////////////////////////// + // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) + ////////////////////////////////////////////////////////// + + counters_ = cms::alpakatools::make_device_buffer(queue); + // Here we define the OneToMany maps and the histograms + // allocating the buffers and defining the views. + // For each map/histo, we need: + // - a buffer for the offsets sized as the number of ones + 1 + // (with the last bin holding the total number of ones) + // - a buffer fot the content/storage itself sized as the number of many + + auto const &algoParams = m_params.algoParams_; + int outerHits = + nHits - offsetBPIX2; // the number of hits that may be used as outer hits for a cell (so not on bpix1) + + // These hold the max number of associations needed + int nHitsToTracks = std::max(int(maxTuples * algoParams.avgHitsPerTrack_), 1); + int nHitsToCells = std::max(int(outerHits * algoParams.avgCellsPerHit_), 1); + int nCellsToCells = std::max(int(maxDoublets * algoParams.avgCellsPerCell_), 1); + int nCellsToTracks = std::max(int(maxDoublets * algoParams.avgTracksPerCell_), 1); + #ifdef GPU_DEBUG - std::cout << "Allocation for tuple building. N hits " << nhits << std::endl; + std::cout << "Allocation for tuple building with: " << std::endl; + std::cout << "- nHits = " << nHits << std::endl; + std::cout << "- maxDoublets = " << maxTuples << std::endl; + std::cout << "- maxTracks = " << maxDoublets << std::endl; + + std::cout << "- nCellsToCells = " << nCellsToCells << std::endl; + std::cout << "- nHitsToCells = " << nHitsToCells << std::endl; + std::cout << "- nCellsToTracks = " << nCellsToTracks << std::endl; + std::cout << "- nHitsToTracks = " << nHitsToTracks << std::endl; #endif - alpaka::memset(queue, counters_, 0); - alpaka::memset(queue, device_nCells_, 0); - alpaka::memset(queue, cellStorage_, 0); - - auto cellCuts_h = cms::alpakatools::make_host_view(m_params.cellCuts_); - alpaka::memcpy(queue, device_cellCuts_, cellCuts_h); + // Hits -> Track + device_hitToTuple_ = cms::alpakatools::make_device_buffer(queue); + device_hitToTupleStorage_ = cms::alpakatools::make_device_buffer(queue, nHitsToTracks); + device_hitToTupleOffsets_ = cms::alpakatools::make_device_buffer(queue, nHits + 1); + device_hitToTupleView_ = {device_hitToTuple_->data(), + device_hitToTupleOffsets_->data(), + device_hitToTupleStorage_->data(), + int(nHits + 1), + nHitsToTracks}; - [[maybe_unused]] TupleMultiplicity *tupleMultiplicityDeviceData = device_tupleMultiplicity_.data(); - using TM = cms::alpakatools::OneToManyAssocRandomAccess; - TM *tm = device_tupleMultiplicity_.data(); - TM::template launchZero(tm, queue); - TupleMultiplicity::template launchZero(tupleMultiplicityDeviceData, queue); + HitToTuple::template launchZero(device_hitToTupleView_, queue); - device_hitToTupleView_.assoc = device_hitToTuple_.data(); - device_hitToTupleView_.offStorage = device_hitToTupleStorage_.data(); - device_hitToTupleView_.offSize = nhits + 1; + // (Outer) Hits-> Cells + device_hitToCell_ = cms::alpakatools::make_device_buffer(queue); + device_hitToCellStorage_ = cms::alpakatools::make_device_buffer(queue, nHitsToCells); + device_hitToCellOffsets_ = cms::alpakatools::make_device_buffer(queue, outerHits + 1); + device_hitToCellView_ = {device_hitToCell_->data(), + device_hitToCellOffsets_->data(), + device_hitToCellStorage_->data(), + outerHits + 1, + nHitsToCells}; + + HitToCell::template launchZero(device_hitToCellView_, queue); + + // Hits Phi Histograms: one histogram per layer + device_hitPhiHist_ = cms::alpakatools::make_device_buffer(queue); + device_phiBinnerStorage_ = cms::alpakatools::make_device_buffer(queue, nHits); + device_hitPhiView_ = {device_hitPhiHist_->data(), nullptr, device_phiBinnerStorage_->data(), -1, int(nHits)}; + // This will hold where each layer starts in the hit soa + device_layerStarts_ = cms::alpakatools::make_device_buffer(queue, nLayers + 1); + + // Cell -> Neighbor Cells + device_cellToNeighbors_ = cms::alpakatools::make_device_buffer(queue); + device_cellToNeighborsStorage_ = + cms::alpakatools::make_device_buffer(queue, nCellsToCells); + device_cellToNeighborsOffsets_ = + cms::alpakatools::make_device_buffer(queue, maxDoublets + 1); + device_cellToNeighborsView_ = {device_cellToNeighbors_->data(), + device_cellToNeighborsOffsets_->data(), + device_cellToNeighborsStorage_->data(), + int(maxDoublets + 1), + nCellsToCells}; + + CellToCell::template launchZero(device_cellToNeighborsView_, queue); + + // Cell -> Tracks + device_cellToTracks_ = cms::alpakatools::make_device_buffer(queue); + device_cellToTracksStorage_ = + cms::alpakatools::make_device_buffer(queue, nCellsToTracks); + device_cellToTracksOffsets_ = + cms::alpakatools::make_device_buffer(queue, maxDoublets + 1); + device_cellToTracksView_ = {device_cellToTracks_->data(), + device_cellToTracksOffsets_->data(), + device_cellToTracksStorage_->data(), + int(maxDoublets + 1), + nCellsToTracks}; + + CellToTrack::template launchZero(device_cellToTracksView_, queue); + + // Track -> Hits + // - This is a OneToManyAssocSequential since each bin is filled + // in one go: all the hits forming a track are pushed together. + device_hitContainer_ = cms::alpakatools::make_device_buffer(queue); + device_hitContainerStorage_ = + cms::alpakatools::make_device_buffer(queue, nHitsToTracks); + device_hitContainerOffsets_ = + cms::alpakatools::make_device_buffer(queue, maxTuples + 1); + device_hitContainerView_ = {device_hitContainer_->data(), + device_hitContainerOffsets_->data(), + device_hitContainerStorage_->data(), + int(maxTuples + 1), + nHitsToTracks}; + + HitContainer::template launchZero(device_hitContainerView_, queue); + + // No.Hits -> Track (track multiplicity) + device_tupleMultiplicity_ = cms::alpakatools::make_device_buffer(queue); + device_tupleMultiplicityStorage_ = + cms::alpakatools::make_device_buffer(queue, maxTuples); + device_tupleMultiplicityOffsets_ = + cms::alpakatools::make_device_buffer(queue, TrackerTraits::maxHitsOnTrack + 1); + device_tupleMultiplicityView_ = { + device_tupleMultiplicity_->data(), + device_tupleMultiplicityOffsets_->data(), + device_tupleMultiplicityStorage_->data(), + int(TrackerTraits::maxHitsOnTrack + 1), //TODO: this could become configurable with some work + int(maxTuples)}; + TupleMultiplicity::template launchZero(device_tupleMultiplicityView_, queue); + + // Structures and Counters Storage + device_simpleCells_ = cms::alpakatools::make_device_buffer(queue, maxDoublets); + + device_extraStorage_ = + cms::alpakatools::make_device_buffer(queue, 5u); + device_hitTuple_apc_ = reinterpret_cast(device_extraStorage_->data()); + device_nCells_ = + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 2)); + device_nTriplets_ = + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 3)); + device_nCellTracks_ = + cms::alpakatools::make_device_view(queue, *reinterpret_cast(device_extraStorage_->data() + 4)); + + deviceTriplets_ = CAPairSoACollection(maxDoublets * algoParams.avgCellsPerCell_, queue); + deviceTracksCells_ = CAPairSoACollection(nCellsToTracks, queue); + + //TODO: if doStats? + alpaka::memset(queue, *counters_, 0); + + alpaka::memset(queue, *device_nCells_, 0); + alpaka::memset(queue, *device_nTriplets_, 0); + alpaka::memset(queue, *device_nCellTracks_, 0); + + maxNumberOfDoublets_ = maxDoublets; - HitToTuple::template launchZero(device_hitToTupleView_, queue); #ifdef GPU_DEBUG + alpaka::wait(queue); std::cout << "Allocations for CAHitNtupletGeneratorKernels: done!" << std::endl; #endif } + template + void CAHitNtupletGeneratorKernels::prepareHits(const HitsConstView &hh, + const HitModulesConstView &mm, + const reco::CALayersSoAConstView &ll, + Queue &queue) { + using namespace caHitNtupletGeneratorKernels; + + const auto workDiv1D = cms::alpakatools::make_workdiv(1, ll.metadata().size() - 1); + alpaka::exec(queue, workDiv1D, SetHitsLayerStart{}, mm, ll, this->device_layerStarts_->data()); + + cms::alpakatools::fillManyFromVector(device_hitPhiHist_->data(), + device_hitPhiView_, + TrackerTraits::numberOfLayers, // could be ll.metadata().size() - 1 + hh.iphi(), + this->device_layerStarts_->data(), + hh.metadata().size(), + (uint32_t)256, + queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "CAHitNtupletGeneratorKernels -> Hits prepared (layer starts and histo) -> DONE!" << std::endl; +#endif + } + template void CAHitNtupletGeneratorKernels::launchKernels(const HitsConstView &hh, uint32_t offsetBPIX2, + uint16_t nLayers, TkSoAView &tracks_view, + TkHitsSoAView &tracks_hits_view, + const reco::CALayersSoAConstView &ll, + const reco::CAGraphSoAConstView &cc, Queue &queue) { using namespace caPixelDoublets; using namespace caHitNtupletGeneratorKernels; - // zero tuples - HitContainer::template launchZero(&(tracks_view.hitIndices()), queue); - uint32_t nhits = hh.metadata().size(); - + auto const maxDoublets = this->maxNumberOfDoublets_; + auto const maxTuples = tracks_view.metadata().size(); #ifdef NTUPLE_DEBUG std::cout << "start tuple building. N hits " << nhits << std::endl; if (nhits < 2) @@ -121,10 +239,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const auto nthTot = 64; const auto stride = 4; auto blockSize = nthTot / stride; - auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); const auto rescale = numberOfBlocks / 65536; blockSize *= (rescale + 1); - numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); assert(numberOfBlocks < 65536); assert(blockSize > 0 && 0 == blockSize % 16); const Vec2D blks{numberOfBlocks, 1u}; @@ -134,17 +252,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, kernelConnectWorkDiv, Kernel_connect{}, - this->device_hitTuple_apc_, - this->device_hitToTuple_apc_, // needed only to be reset, ready for next kernel + this->device_hitTuple_apc_, // needed only to be reset, ready for next kernel hh, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->device_theCellNeighbors_.data(), - this->isOuterHitOfCell_.data(), - this->m_params.caParams_); + ll, + this->deviceTriplets_->view(), + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_nTriplets_->data(), + this->device_hitToCell_->data(), + this->device_cellToNeighbors_->data(), + this->m_params.algoParams_); + + CellToCell::template launchFinalize(this->device_cellToNeighborsView_, queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_connect -> Done!" << std::endl; +#endif + + auto threadsPerBlock = 1024; + auto blocks = cms::alpakatools::divide_up_by(maxDoublets * m_params.algoParams_.avgCellsPerCell_, threadsPerBlock); + auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv1D, + Kernel_fillGenericPair{}, + this->deviceTriplets_->view(), + this->device_nTriplets_->data(), + this->device_cellToNeighbors_->data()); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "cellToNeighbors -> Filled!" << std::endl; +#endif // do not run the fishbone if there are hits only in BPIX1 - if (this->m_params.earlyFishbone_ and nhits > offsetBPIX2) { + if (this->m_params.algoParams_.earlyFishbone_ and nhits > offsetBPIX2) { const auto nthTot = 128; const auto stride = 16; const auto blockSize = nthTot / stride; @@ -156,96 +299,152 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { fishboneWorkDiv, CAFishbone{}, hh, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->isOuterHitOfCell_.data(), - nhits, + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_hitToCell_->data(), + this->device_cellToTracks_->data(), + nhits - offsetBPIX2, false); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Early fishbone -> Done!" << std::endl; +#endif } blockSize = 64; - numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); - auto workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); + workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_find_ntuplets{}, - hh, + cc, tracks_view, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->device_theCellTracks_.data(), + this->device_hitContainer_->data(), + this->device_cellToNeighbors_->data(), + this->device_cellToTracks_->data(), + this->deviceTracksCells_->view(), + this->device_simpleCells_->data(), + this->device_nCellTracks_->data(), + this->device_nTriplets_->data(), + this->device_nCells_->data(), this->device_hitTuple_apc_, - this->m_params.caParams_); + this->m_params.algoParams_); + #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_find_ntuplets -> Done!" << std::endl; #endif - if (this->m_params.doStats_) + CellToTracks::template launchFinalize(this->device_cellToTracksView_, queue); + + blocks = cms::alpakatools::divide_up_by(maxDoublets * m_params.algoParams_.avgCellsPerCell_, threadsPerBlock); + workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + + alpaka::exec(queue, + workDiv1D, + Kernel_fillGenericPair{}, + this->deviceTracksCells_->view(), + this->device_nCellTracks_->data(), + this->device_cellToTracks_->data()); + + if (this->m_params.algoParams_.doStats_) alpaka::exec(queue, workDiv1D, Kernel_mark_used{}, - this->device_theCells_.data(), - this->device_nCells_.data()); + this->device_simpleCells_->data(), + this->device_cellToTracks_->data(), + this->device_nCells_->data()); #ifdef GPU_DEBUG alpaka::wait(queue); #endif blockSize = 128; - numberOfBlocks = cms::alpakatools::divide_up_by(HitContainer{}.totOnes(), blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(maxTuples + 1, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); - alpaka::exec( - queue, workDiv1D, typename HitContainer::finalizeBulk{}, this->device_hitTuple_apc_, &tracks_view.hitIndices()); + alpaka::exec(queue, + workDiv1D, + typename HitContainer::finalizeBulk{}, + this->device_hitTuple_apc_, + this->device_hitContainer_->data()); #ifdef GPU_DEBUG alpaka::wait(queue); #endif - alpaka::exec(queue, workDiv1D, Kernel_fillHitDetIndices{}, tracks_view, hh); + alpaka::exec(queue, + workDiv1D, + Kernel_fillHitDetIndices{}, + tracks_view, + tracks_hits_view, + this->device_hitContainer_->data(), + hh); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_fillHitDetIndices -> done!" << std::endl; #endif - alpaka::exec(queue, workDiv1D, Kernel_fillNLayers{}, tracks_view, this->device_hitTuple_apc_); + alpaka::exec(queue, + workDiv1D, + Kernel_fillNLayers{}, + tracks_view, + tracks_hits_view, + this->device_layerStarts_->data(), + nLayers, + this->device_hitTuple_apc_); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_fillNLayers -> done!" << std::endl; #endif // remove duplicates (tracks that share a doublet) - numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_earlyDuplicateRemover{}, - this->device_theCells_.data(), - this->device_nCells_.data(), + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_cellToTracks_->data(), tracks_view, - this->m_params.dupPassThrough_); + this->m_params.algoParams_.dupPassThrough_); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_earlyDuplicateRemover -> done!" << std::endl; #endif blockSize = 128; - numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfTuples / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_countMultiplicity{}, tracks_view, - this->device_tupleMultiplicity_.data()); - TupleMultiplicity::template launchFinalize(this->device_tupleMultiplicity_.data(), queue); + this->device_hitContainer_->data(), + this->device_tupleMultiplicity_->data()); + GenericContainer::template launchFinalize(this->device_tupleMultiplicityView_, queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_countMultiplicity -> done!" << std::endl; +#endif workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); - alpaka::exec( - queue, workDiv1D, Kernel_fillMultiplicity{}, tracks_view, this->device_tupleMultiplicity_.data()); + alpaka::exec(queue, + workDiv1D, + Kernel_fillMultiplicity{}, + tracks_view, + this->device_hitContainer_->data(), + this->device_tupleMultiplicity_->data()); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_fillMultiplicity -> done!" << std::endl; #endif // do not run the fishbone if there are hits only in BPIX1 - if (this->m_params.lateFishbone_ and nhits > offsetBPIX2) { + if (this->m_params.algoParams_.lateFishbone_ and nhits > offsetBPIX2) { const auto nthTot = 128; const auto stride = 16; const auto blockSize = nthTot / stride; @@ -258,73 +457,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { workDiv2D, CAFishbone{}, hh, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->isOuterHitOfCell_.data(), - nhits, + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_hitToCell_->data(), + this->device_cellToTracks_->data(), + nhits - offsetBPIX2, true); } #ifdef GPU_DEBUG + std::cout << "lateFishbone -> done!" << std::endl; alpaka::wait(queue); #endif } template void CAHitNtupletGeneratorKernels::buildDoublets(const HitsConstView &hh, + const ::reco::CAGraphSoAConstView &cc, + const ::reco::CALayersSoAConstView &ll, uint32_t offsetBPIX2, Queue &queue) { using namespace caPixelDoublets; - using CACell = CACellT; - using OuterHitOfCell = typename CACell::OuterHitOfCell; - using CellNeighbors = typename CACell::CellNeighbors; - using CellTracks = typename CACell::CellTracks; - using OuterHitOfCellContainer = typename CACell::OuterHitOfCellContainer; + using namespace caHitNtupletGeneratorKernels; auto nhits = hh.metadata().size(); + const auto maxDoublets = this->maxNumberOfDoublets_; #ifdef NTUPLE_DEBUG std::cout << "building Doublets out of " << nhits << " Hits" << std::endl; #endif -#ifdef GPU_DEBUG - alpaka::wait(queue); -#endif - - // in principle we can use "nhits" to heuristically dimension the workspace... - ALPAKA_ASSERT_ACC(this->device_isOuterHitOfCell_.data()); - - alpaka::exec( - queue, - cms::alpakatools::make_workdiv(1, 1), - [] ALPAKA_FN_ACC(Acc1D const &acc, - OuterHitOfCell *isOuterHitOfCell, - OuterHitOfCellContainer *container, - int32_t const *offset) { - // this code runs on the device - isOuterHitOfCell->container = container; - isOuterHitOfCell->offset = *offset; - }, - this->isOuterHitOfCell_.data(), - this->device_isOuterHitOfCell_.data(), - &hh.offsetBPIX2()); - - { - int threadsPerBlock = 128; - // at least one block! - int blocks = std::max(1u, cms::alpakatools::divide_up_by(nhits - offsetBPIX2, threadsPerBlock)); - const auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); - - alpaka::exec(queue, - workDiv1D, - InitDoublets{}, - this->isOuterHitOfCell_.data(), - nhits, - this->device_theCellNeighbors_.data(), - this->device_theCellNeighborsContainer_, - this->device_theCellTracks_.data(), - this->device_theCellTracksContainer_); - } - #ifdef GPU_DEBUG alpaka::wait(queue); #endif @@ -332,31 +493,53 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (0 == nhits) return; // protect against empty events - // take all layer pairs into account - auto nActualPairs = this->m_params.nPairs(); - const int stride = 4; - const int threadsPerBlock = TrackerTraits::getDoubletsFromHistoMaxBlockSize / stride; + int threadsPerBlock = TrackerTraits::getDoubletsFromHistoMaxBlockSize / stride; int blocks = (4 * nhits + threadsPerBlock - 1) / threadsPerBlock; const Vec2D blks{blocks, 1u}; const Vec2D thrs{threadsPerBlock, stride}; const auto workDiv2D = cms::alpakatools::make_workdiv(blks, thrs); +#ifdef GPU_DEBUG + std::cout << "nActualPairs = " << cc.metadata().size() << std::endl; + std::cout << blocks << " - " << threadsPerBlock << " - " << stride << std::endl; +#endif alpaka::exec(queue, workDiv2D, GetDoubletsFromHisto{}, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->device_theCellNeighbors_.data(), - this->device_theCellTracks_.data(), + maxDoublets, + this->device_simpleCells_->data(), + this->device_nCells_->data(), hh, - this->isOuterHitOfCell_.data(), - nActualPairs, - this->m_params.caParams_.maxNumberOfDoublets_, - this->m_params.cellCuts_); + cc, + ll, + this->device_layerStarts_->data(), + this->device_hitPhiHist_->data(), + this->device_hitToCell_->data(), + this->m_params.algoParams_); + + HitToCell::template launchFinalize(this->device_hitToCellView_, queue); + + threadsPerBlock = 512; + blocks = cms::alpakatools::divide_up_by(maxDoublets, threadsPerBlock); + auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "GetDoubletsFromHisto -> done!" << std::endl; +#endif + + alpaka::exec(queue, + workDiv1D, + FillDoubletsHisto{}, + this->device_simpleCells_->data(), + this->device_nCells_->data(), + offsetBPIX2, + this->device_hitToCell_->data()); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "FillDoubletsHisto -> done!" << std::endl; #endif } @@ -366,146 +549,206 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Queue &queue) { using namespace caHitNtupletGeneratorKernels; +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Starting CAHitNtupletGeneratorKernels::classifyTuples" << std::endl; +#endif + uint32_t nhits = hh.metadata().size(); auto blockSize = 64; - + auto const maxDoublets = this->maxNumberOfDoublets_; + auto const maxTuples = tracks_view.metadata().size(); // classify tracks based on kinematics - auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + auto numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); auto workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); - alpaka::exec( - queue, workDiv1D, Kernel_classifyTracks{}, tracks_view, this->m_params.qualityCuts_); + alpaka::exec(queue, + workDiv1D, + Kernel_classifyTracks{}, + tracks_view, + this->device_hitContainer_->data(), + this->m_params.qualityCuts_); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_classifyTracks -> done!" << std::endl; +#endif - if (this->m_params.lateFishbone_) { + if (this->m_params.algoParams_.lateFishbone_) { // apply fishbone cleaning to good tracks - numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_fishboneCleaner{}, - this->device_theCells_.data(), - this->device_nCells_.data(), + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_cellToTracks_->data(), tracks_view); } - +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_fishboneCleaner -> done!" << std::endl; +#endif // mark duplicates (tracks that share a doublet) - numberOfBlocks = cms::alpakatools::divide_up_by(3 * m_params.caParams_.maxNumberOfDoublets_ / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxDoublets / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_fastDuplicateRemover{}, - this->device_theCells_.data(), - this->device_nCells_.data(), + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_cellToTracks_->data(), tracks_view, - this->m_params.dupPassThrough_); + this->m_params.algoParams_.dupPassThrough_); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_fastDuplicateRemover -> done!" << std::endl; #endif - if (this->m_params.doSharedHitCut_ || this->m_params.doStats_) { + if (this->m_params.algoParams_.doSharedHitCut_ || this->m_params.algoParams_.doStats_) { // fill hit->track "map" - numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_countHitInTracks{}, tracks_view, - this->device_hitToTuple_.data()); //CHECK + this->device_hitContainer_->data(), + this->device_hitToTuple_->data()); - HitToTuple::template launchFinalize(this->device_hitToTupleView_, queue); - alpaka::exec( - queue, workDiv1D, Kernel_fillHitInTracks{}, tracks_view, this->device_hitToTuple_.data()); + GenericContainer::template launchFinalize(this->device_hitToTupleView_, queue); + alpaka::exec(queue, + workDiv1D, + Kernel_fillHitInTracks{}, + tracks_view, + this->device_hitContainer_->data(), + this->device_hitToTuple_->data()); #ifdef GPU_DEBUG alpaka::wait(queue); + std::cout << "Kernel_countHitInTracks -> done!" << std::endl; #endif } - if (this->m_params.doSharedHitCut_) { + if (this->m_params.algoParams_.doSharedHitCut_) { // mark duplicates (tracks that share at least one hit) - numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, - blockSize); // TODO: Check if correct + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_rejectDuplicate{}, tracks_view, - this->m_params.minHitsForSharingCut_, - this->m_params.dupPassThrough_, - this->device_hitToTuple_.data()); + this->m_params.algoParams_.dupPassThrough_, + this->device_hitToTuple_->data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_rejectDuplicate -> done!" << std::endl; +#endif alpaka::exec(queue, workDiv1D, Kernel_sharedHitCleaner{}, hh, + this->device_layerStarts_->data(), tracks_view, - this->m_params.minHitsForSharingCut_, - this->m_params.dupPassThrough_, - this->device_hitToTuple_.data()); - - if (this->m_params.useSimpleTripletCleaner_) { - // (typename HitToTuple{}::capacity(), - numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize); + this->m_params.algoParams_.minHitsForSharingCut_, + this->m_params.algoParams_.dupPassThrough_, + this->device_hitToTuple_->data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_sharedHitCleaner -> done!" << std::endl; +#endif + if (this->m_params.algoParams_.useSimpleTripletCleaner_) { + numberOfBlocks = + cms::alpakatools::divide_up_by(int(nhits * this->m_params.algoParams_.avgHitsPerTrack_) + 1, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_simpleTripletCleaner{}, tracks_view, - this->m_params.minHitsForSharingCut_, - this->m_params.dupPassThrough_, - this->device_hitToTuple_.data()); + this->m_params.algoParams_.dupPassThrough_, + this->device_hitToTuple_->data()); +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "Kernel_simpleTripletCleaner -> done!" << std::endl; +#endif } else { - numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize); + numberOfBlocks = + cms::alpakatools::divide_up_by(int(nhits * this->m_params.algoParams_.avgHitsPerTrack_) + 1, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_tripletCleaner{}, tracks_view, - this->m_params.minHitsForSharingCut_, - this->m_params.dupPassThrough_, - this->device_hitToTuple_.data()); - } + this->m_params.algoParams_.dupPassThrough_, + this->device_hitToTuple_->data()); #ifdef GPU_DEBUG - alpaka::wait(queue); + alpaka::wait(queue); + std::cout << "Kernel_tripletCleaner -> done!" << std::endl; #endif + } } - if (this->m_params.doStats_) { - numberOfBlocks = - cms::alpakatools::divide_up_by(std::max(nhits, m_params.caParams_.maxNumberOfDoublets_), blockSize); + if (this->m_params.algoParams_.doStats_) { + numberOfBlocks = cms::alpakatools::divide_up_by(std::max(nhits, maxDoublets), blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_checkOverflows{}, tracks_view, - this->device_tupleMultiplicity_.data(), - this->device_hitToTuple_.data(), + this->device_hitContainer_->data(), + this->device_tupleMultiplicity_->data(), + this->device_hitToTuple_->data(), this->device_hitTuple_apc_, - this->device_theCells_.data(), - this->device_nCells_.data(), - this->device_theCellNeighbors_.data(), - this->device_theCellTracks_.data(), - this->isOuterHitOfCell_.data(), + this->device_simpleCells_->data(), + this->device_nCells_->data(), + this->device_nTriplets_->data(), + this->device_nCellTracks_->data(), + this->deviceTriplets_->view(), + this->deviceTracksCells_->view(), nhits, - this->m_params.caParams_.maxNumberOfDoublets_, - this->counters_.data()); + this->maxNumberOfDoublets_, + this->m_params.algoParams_, + this->counters_->data()); } - if (this->m_params.doStats_) { +#ifdef CA_STATS + alpaka::wait(queue); + workDiv1D = cms::alpakatools::make_workdiv(1, 1); + alpaka::exec(queue, + workDiv1D, + Kernel_printSizes{}, + hh, + tracks_view, + this->device_nCells_->data(), + this->device_nTriplets_->data(), + this->device_nCellTracks_->data()); + + alpaka::wait(queue); +#endif + if (this->m_params.algoParams_.doStats_) { // counters (add flag???) - numberOfBlocks = cms::alpakatools::divide_up_by(HitToTuple{}.capacity(), blockSize); + numberOfBlocks = + cms::alpakatools::divide_up_by(int(nhits * this->m_params.algoParams_.avgHitsPerTrack_) + 1, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); alpaka::exec(queue, workDiv1D, Kernel_doStatsForHitInTracks{}, - this->device_hitToTuple_.data(), - this->counters_.data()); + this->device_hitToTuple_->data(), + this->counters_->data()); - numberOfBlocks = cms::alpakatools::divide_up_by(3 * TrackerTraits::maxNumberOfQuadruplets / 4, blockSize); + numberOfBlocks = cms::alpakatools::divide_up_by(3 * maxTuples / 4, blockSize); workDiv1D = cms::alpakatools::make_workdiv(numberOfBlocks, blockSize); - alpaka::exec( - queue, workDiv1D, Kernel_doStatsForTracks{}, tracks_view, this->counters_.data()); + alpaka::exec(queue, + workDiv1D, + Kernel_doStatsForTracks{}, + tracks_view, + this->device_hitContainer_->data(), + this->counters_->data()); + + auto workDiv1D = cms::alpakatools::make_workdiv(1, 1); + alpaka::exec(queue, workDiv1D, Kernel_printCounters{}, this->counters_->data()); } #ifdef GPU_DEBUG alpaka::wait(queue); @@ -524,7 +767,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_print_found_ntuplets{}, hh, tracks_view, - this->device_hitToTuple_.data(), + this->device_hitContainer_->data(), + this->device_hitToTuple_->data(), k, k + 500, iev); @@ -535,7 +779,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { Kernel_print_found_ntuplets{}, hh, tracks_view, - this->device_hitToTuple_.data(), + this->device_hitToTuple_->data(), 20000, 1000000, iev); @@ -550,7 +794,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template void CAHitNtupletGeneratorKernels::printCounters() { auto workDiv1D = cms::alpakatools::make_workdiv(1,1); - alpaka::exec(queue_, workDiv1D, Kernel_printCounters{}, this->counters_.data()); + alpaka::exec(queue_, workDiv1D, Kernel_printCounters{}, this->counters_->data()); } */ diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h index 6999169a46d73..2634c1479b3f1 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -1,8 +1,8 @@ #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernels_h #define RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernels_h -//#define GPU_DEBUG -//#define DUMP_GPU_TK_TUPLES +// #define GPU_DEBUG +// #define DUMP_GPU_TK_TUPLES #include @@ -16,100 +16,50 @@ #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" +#include "RecoTracker/PixelSeeding/interface/alpaka/CAPairSoACollection.h" #include "CACell.h" #include "CAPixelDoublets.h" #include "CAStructures.h" namespace ALPAKA_ACCELERATOR_NAMESPACE { - namespace caHitNtupletGenerator { - - //Configuration params common to all topologies, for the algorithms - struct AlgoParams { - const uint32_t minHitsForSharingCut_; - const bool useRiemannFit_; - const bool fitNas4_; - const bool includeJumpingForwardDoublets_; - const bool earlyFishbone_; - const bool lateFishbone_; - const bool doStats_; - const bool doSharedHitCut_; - const bool dupPassThrough_; - const bool useSimpleTripletCleaner_; - }; - //CAParams - struct CACommon { - const uint32_t maxNumberOfDoublets_; - const uint32_t minHitsPerNtuplet_; - const float ptmin_; - const float CAThetaCutBarrel_; - const float CAThetaCutForward_; - const float hardCurvCut_; - const float dcaCutInnerTriplet_; - const float dcaCutOuterTriplet_; - }; - - template - struct CAParamsT : public CACommon { - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { return false; }; - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { return false; }; - }; + using namespace ::caStructures; - template - struct CAParamsT> : public CACommon { - /// Is is a starting layer pair? - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { - return minHitsPerNtuplet_ > 3 ? pid < 3 : pid < 8 || pid > 12; - } - - /// Is this a pair with inner == 0? - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { - ALPAKA_ASSERT_ACC( - (pixelTopology::Phase1::layerPairs[pid * 2] == 0) == - (pid < 3 || pid == 13 || pid == 15 || pid == 16)); // to be 100% sure it's working, may be removed - return pixelTopology::Phase1::layerPairs[pid * 2] == 0; - } - }; + namespace caHitNtupletGenerator { - template - struct CAParamsT> : public CACommon { - const bool includeFarForwards_; - /// Is is a starting layer pair? - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startingLayerPair(int16_t pid) const { - return pid < 33; // in principle one could remove 5,6,7 23, 28 and 29 - } - - /// Is this a pair with inner == 0 - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool startAt0(int16_t pid) const { - ALPAKA_ASSERT_ACC((pixelTopology::Phase2::layerPairs[pid * 2] == 0) == ((pid < 3) | (pid >= 23 && pid < 28))); - return pixelTopology::Phase2::layerPairs[pid * 2] == 0; - } + //Counters + struct Counters { + unsigned long long nEvents; + unsigned long long nHits; + unsigned long long nCells; + unsigned long long nTuples; + unsigned long long nFitTracks; + unsigned long long nLooseTracks; + unsigned long long nGoodTracks; + unsigned long long nUsedHits; + unsigned long long nDupHits; + unsigned long long nFishCells; + unsigned long long nKilledCells; + unsigned long long nEmptyCells; + unsigned long long nZeroTrackCells; }; - //Full list of params = algo params + ca params + cell params + quality cuts + //Full list of params = algo params + quality cuts //Generic template template - struct ParamsT : public AlgoParams { - // one should define the params for its own pixelTopology - // not defining anything here - inline uint32_t nPairs() const { return 0; } - }; + struct ParamsT {}; template - struct ParamsT> : public AlgoParams { + struct ParamsT> { using TT = TrackerTraits; using QualityCuts = ::pixelTrack::QualityCutsT; //track quality cuts - using CellCuts = caPixelDoublets::CellCutsT; //cell building cuts - using CAParams = CAParamsT; //params to be used on device - ParamsT(AlgoParams const& commonCuts, - CellCuts const& cellCuts, - QualityCuts const& cutsCuts, - CAParams const& caParams) - : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(cutsCuts), caParams_(caParams) {} + ParamsT(AlgoParams const& commonCuts, QualityCuts const& qualityCuts) + : algoParams_(commonCuts), qualityCuts_(qualityCuts) {} - const CellCuts cellCuts_; + const AlgoParams algoParams_; const QualityCuts qualityCuts_{// polynomial coefficients for the pT-dependent chi2 cut {0.68177776, 0.74609577, -0.08035491, 0.00315399}, // max pT used to determine the chi2 cut @@ -128,22 +78,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { 0.3, // pT > 0.3 GeV 12.0 // |Zip| < 12.0 cm }}; - const CAParams caParams_; - /// Compute the number of pairs - inline uint32_t nPairs() const { - // take all layer pairs into account - uint32_t nActualPairs = TT::nPairs; - if (not includeJumpingForwardDoublets_) { - // exclude forward "jumping" layer pairs - nActualPairs = TT::nPairsForTriplets; - } - if (caParams_.minHitsPerNtuplet_ > 3) { - // for quadruplets, exclude all "jumping" layer pairs - nActualPairs = TT::nPairsForQuadruplets; - } - - return nActualPairs; - } }; // Params Phase1 @@ -151,125 +85,162 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { struct ParamsT> : public AlgoParams { using TT = TrackerTraits; using QualityCuts = ::pixelTrack::QualityCutsT; - using CellCuts = caPixelDoublets::CellCutsT; - using CAParams = CAParamsT; - ParamsT(AlgoParams const& commonCuts, - CellCuts const& cellCuts, - QualityCuts const& qualityCuts, - CAParams const& caParams) - : AlgoParams(commonCuts), cellCuts_(cellCuts), qualityCuts_(qualityCuts), caParams_(caParams) {} + ParamsT(AlgoParams const& commonCuts, QualityCuts const& qualityCuts) + : algoParams_(commonCuts), qualityCuts_(qualityCuts) {} // quality cuts - const CellCuts cellCuts_; + const AlgoParams algoParams_; const QualityCuts qualityCuts_{5.0f, /*chi2*/ 0.9f, /* pT in Gev*/ 0.4f, /*zip in cm*/ 12.0f /*tip in cm*/}; - const CAParams caParams_; - - inline uint32_t nPairs() const { - // take all layer pairs into account - uint32_t nActualPairs = TT::nPairsMinimal; - if (caParams_.includeFarForwards_) { - // considera far forwards (> 11 & > 23) - nActualPairs = TT::nPairsFarForwards; - } - if (includeJumpingForwardDoublets_) { - // include jumping forwards - nActualPairs = TT::nPairs; - } - - return nActualPairs; - } }; // Params Phase1 - // counters - struct Counters { - unsigned long long nEvents; - unsigned long long nHits; - unsigned long long nCells; - unsigned long long nTuples; - unsigned long long nFitTracks; - unsigned long long nLooseTracks; - unsigned long long nGoodTracks; - unsigned long long nUsedHits; - unsigned long long nDupHits; - unsigned long long nFishCells; - unsigned long long nKilledCells; - unsigned long long nEmptyCells; - unsigned long long nZeroTrackCells; - }; - - using Quality = ::pixelTrack::Quality; - } // namespace caHitNtupletGenerator - template class CAHitNtupletGeneratorKernels { public: using TrackerTraits = TTTraits; - using QualityCuts = ::pixelTrack::QualityCutsT; - using CellCuts = caPixelDoublets::CellCutsT; + + using SimpleCell = CACell; using Params = caHitNtupletGenerator::ParamsT; - using CAParams = caHitNtupletGenerator::CAParamsT; using Counters = caHitNtupletGenerator::Counters; - - using HitsView = TrackingRecHitSoAView; - using HitsConstView = TrackingRecHitSoAConstView; - using TkSoAView = reco::TrackSoAView; - - using HitToTuple = caStructures::template HitToTupleT; - using TupleMultiplicity = caStructures::template TupleMultiplicityT; - struct Testttt { - TupleMultiplicity tm; - }; - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - using CellNeighbors = caStructures::CellNeighborsT; - using CellTracksVector = caStructures::CellTracksVectorT; - using CellTracks = caStructures::CellTracksT; - using OuterHitOfCellContainer = caStructures::OuterHitOfCellContainerT; - using OuterHitOfCell = caStructures::OuterHitOfCellT; - - using CACell = CACellT; - + // Track qualities using Quality = ::pixelTrack::Quality; - using HitContainer = typename reco::TrackSoA::HitContainer; + using QualityCuts = ::pixelTrack::QualityCutsT; - CAHitNtupletGeneratorKernels(Params const& params, uint32_t nhits, uint32_t offsetBPIX2, Queue& queue); + // Histograms + + using PhiBinner = caStructures::PhiBinnerT; //the traits here define the number of layer/histograms + using PhiBinnerStorageType = typename PhiBinner::index_type; + using PhiBinnerView = typename PhiBinner::View; + + using HitToTuple = caStructures::GenericContainer; + using HitContainer = caStructures::SequentialContainer; + using TupleMultiplicity = caStructures::GenericContainer; + using HitToCell = caStructures::GenericContainer; + using CellToCell = caStructures::GenericContainer; + using CellToTrack = caStructures::GenericContainer; + + using GenericContainer = caStructures::GenericContainer; + using GenericContainerStorage = typename GenericContainer::index_type; + using GenericContainerView = typename GenericContainer::View; + using DeviceGenericContainerBuffer = std::optional>; + using DeviceGenericStorageBuffer = + std::optional>; + using DeviceGenericOffsetsBuffer = + std::optional>; + + using SequentialContainer = caStructures::SequentialContainer; + using SequentialContainerStorage = typename SequentialContainer::index_type; + using SequentialContainerView = typename SequentialContainer::View; + using DeviceSequentialContainerBuffer = std::optional>; + using DeviceSequentialStorageBuffer = + std::optional>; + using DeviceSequentialOffsetsBuffer = + std::optional>; + + CAHitNtupletGeneratorKernels(Params const& params, + uint32_t nHits, + uint32_t offsetBPIX2, + uint32_t nDoublets, + uint32_t nTracks, + uint16_t nLayers, + Queue& queue); ~CAHitNtupletGeneratorKernels() = default; - TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_.data(); } - - void launchKernels(const HitsConstView& hh, uint32_t offsetBPIX2, TkSoAView& track_view, Queue& queue); + TupleMultiplicity const* tupleMultiplicity() const { return device_tupleMultiplicity_->data(); } + HitContainer const* hitContainer() const { return device_hitContainer_->data(); } + HitToCell const* hitToCell() const { return device_hitToCell_->data(); } + HitToTuple const* hitToTuple() const { return device_hitToTuple_->data(); } + CellToCell const* cellToCell() const { return device_cellToNeighbors_->data(); } + CellToTrack const* cellToTrack() const { return device_cellToTracks_->data(); } + + void prepareHits(const HitsConstView& hh, + const HitModulesConstView& mm, + const ::reco::CALayersSoAConstView& ll, + Queue& queue); + + void launchKernels(const HitsConstView& hh, + uint32_t offsetBPIX2, + uint16_t nLayers, + TkSoAView& track_view, + TkHitsSoAView& track_hits_view, + const ::reco::CALayersSoAConstView& ll, + const ::reco::CAGraphSoAConstView& cc, + Queue& queue); void classifyTuples(const HitsConstView& hh, TkSoAView& track_view, Queue& queue); - void buildDoublets(const HitsConstView& hh, uint32_t offsetBPIX2, Queue& queue); + void buildDoublets(const HitsConstView& hh, + const ::reco::CAGraphSoAConstView& cc, + const ::reco::CALayersSoAConstView& ll, + uint32_t offsetBPIX2, + Queue& queue); static void printCounters(); private: // params Params const& m_params; - cms::alpakatools::device_buffer counters_; - - // workspace - cms::alpakatools::device_buffer device_hitToTuple_; - cms::alpakatools::device_buffer device_hitToTupleStorage_; - typename HitToTuple::View device_hitToTupleView_; - cms::alpakatools::device_buffer device_tupleMultiplicity_; - cms::alpakatools::device_buffer device_theCells_; - cms::alpakatools::device_buffer device_isOuterHitOfCell_; - cms::alpakatools::device_buffer isOuterHitOfCell_; - cms::alpakatools::device_buffer device_theCellNeighbors_; - cms::alpakatools::device_buffer device_theCellTracks_; - cms::alpakatools::device_buffer cellStorage_; - cms::alpakatools::device_buffer device_cellCuts_; - CellNeighbors* device_theCellNeighborsContainer_; - CellTracks* device_theCellTracksContainer_; - cms::alpakatools::device_buffer device_storage_; + std::optional> counters_; + + // Hits->Track + DeviceGenericContainerBuffer device_hitToTuple_; + DeviceGenericStorageBuffer device_hitToTupleStorage_; + DeviceGenericOffsetsBuffer device_hitToTupleOffsets_; + GenericContainerView device_hitToTupleView_; + + // (Outer) Hits-> Cells + DeviceGenericContainerBuffer device_hitToCell_; + DeviceGenericStorageBuffer device_hitToCellStorage_; + DeviceGenericOffsetsBuffer device_hitToCellOffsets_; + GenericContainerView device_hitToCellView_; + + // Hits Phi Binner + std::optional> device_hitPhiHist_; + std::optional> device_phiBinnerStorage_; + PhiBinnerView device_hitPhiView_; + std::optional> device_layerStarts_; + + // Cells-> Neighbor Cells + DeviceGenericContainerBuffer device_cellToNeighbors_; + DeviceGenericStorageBuffer device_cellToNeighborsStorage_; + DeviceGenericOffsetsBuffer device_cellToNeighborsOffsets_; + GenericContainerView device_cellToNeighborsView_; + + // Cells-> Tracks + DeviceGenericContainerBuffer device_cellToTracks_; + DeviceGenericStorageBuffer device_cellToTracksStorage_; + DeviceGenericOffsetsBuffer device_cellToTracksOffsets_; + GenericContainerView device_cellToTracksView_; + + // Tracks->Hits + DeviceSequentialContainerBuffer device_hitContainer_; + DeviceGenericStorageBuffer device_hitContainerStorage_; + DeviceSequentialOffsetsBuffer device_hitContainerOffsets_; + SequentialContainerView device_hitContainerView_; + + // No.Hits -> Track (Multiplicity) + DeviceGenericContainerBuffer device_tupleMultiplicity_; + DeviceGenericStorageBuffer device_tupleMultiplicityStorage_; + DeviceGenericOffsetsBuffer device_tupleMultiplicityOffsets_; + GenericContainerView device_tupleMultiplicityView_; + + std::optional> device_simpleCells_; + + std::optional> + device_extraStorage_; cms::alpakatools::AtomicPairCounter* device_hitTuple_apc_; - cms::alpakatools::AtomicPairCounter* device_hitToTuple_apc_; - cms::alpakatools::device_view device_nCells_; + std::optional> device_nCells_; + std::optional> device_nTriplets_; + std::optional> device_nCellTracks_; + + std::optional deviceTriplets_; + std::optional deviceTracksCells_; + + // this could be inferred from the above buffers + // but seems cleaner to have a dedicate variable + uint32_t maxNumberOfDoublets_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index e72d221f7e21c..ca6e3eefc83d9 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -1,8 +1,10 @@ #ifndef RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h #define RecoTracker_PixelSeeding_plugins_alpaka_CAHitNtupletGeneratorKernelsImpl_h -//#define GPU_DEBUG -//#define NTUPLE_DEBUG +// #define GPU_DEBUG +// #define NTUPLE_DEBUG +// #define CA_DEBUG +// #define CA_WARNINGS // C++ includes #include @@ -22,6 +24,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" #include "FWCore/Utilities/interface/isFinite.h" +#include "RecoTracker/PixelSeeding/interface/CAPairSoA.h" // local includes #include "CACell.h" @@ -30,62 +33,92 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { - constexpr uint32_t tkNotFound = std::numeric_limits::max(); + using namespace ::caStructures; + + constexpr uint32_t tkNotFound = std::numeric_limits::max(); constexpr float maxScore = std::numeric_limits::max(); constexpr float nSigma2 = 25.f; - // all of these below are mostly to avoid brining around the relative namespace - - template - using HitToTuple = caStructures::HitToTupleT; - - template - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - - template - using CellTracksVector = caStructures::CellTracksVectorT; - - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; + // all of these below are mostly to avoid carrying around the relative namespace using Quality = ::pixelTrack::Quality; + using TkSoAView = ::reco::TrackSoAView; + using TkHitSoAView = ::reco::TrackHitSoAView; template - using TkSoAView = reco::TrackSoAView; - - template - using HitContainer = typename reco::TrackSoA::HitContainer; + using QualityCuts = ::pixelTrack::QualityCutsT; - template - using HitsConstView = typename CACellT::HitsConstView; + using Counters = caHitNtupletGenerator::Counters; + using HitToTuple = caStructures::GenericContainer; + using HitContainer = caStructures::SequentialContainer; + using TupleMultiplicity = caStructures::GenericContainer; + using HitToCell = caStructures::GenericContainer; + using CellToCell = caStructures::GenericContainer; + using CellToTrack = caStructures::GenericContainer; - template - using QualityCuts = ::pixelTrack::QualityCutsT; + using namespace cms::alpakatools; - template - using CAParams = caHitNtupletGenerator::CAParamsT; + class SetHitsLayerStart { + public: + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + const reco::HitModuleSoAConstView &mm, + const reco::CALayersSoAConstView &ll, + uint32_t *__restrict__ hitsLayerStart) const { + ALPAKA_ASSERT_ACC(0 == mm.moduleStart()[0]); + + for (int32_t i : cms::alpakatools::uniform_elements(acc, ll.metadata().size())) { + hitsLayerStart[i] = mm.moduleStart()[ll.layerStarts()[i]]; +#ifdef GPU_DEBUG + int old = i == 0 ? 0 : mm.moduleStart()[ll.layerStarts()[i - 1]]; + printf("LayerStart %d/%d at module %d: %d - %d\n", + i, + ll.metadata().size() - 1, + ll.layerStarts()[i], + hitsLayerStart[i], + hitsLayerStart[i] - old); +#endif + } + } + }; - using Counters = caHitNtupletGenerator::Counters; + class Kernel_printSizes { + public: + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + HitsConstView hh, + TkSoAView tt, + uint32_t const *__restrict__ nCells, + uint32_t const *__restrict__ nTrips, + uint32_t const *__restrict__ nCellTracks) const { + if (cms::alpakatools::once_per_grid(acc)) + printf("nSizes:%d;%d;%d;%d;%d;%d;%d\n", + hh.metadata().size(), + hh.metadata().size() - hh.offsetBPIX2(), + *nCells, + *nTrips, + *nCellTracks, + tt.nTracks(), + tt.metadata().size()); + } + }; template class Kernel_checkOverflows { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - TupleMultiplicity const *tupleMultiplicity, - HitToTuple const *hitToTuple, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + TupleMultiplicity const *tupleMultiplicity, + HitToTuple const *hitToTuple, cms::alpakatools::AtomicPairCounter *apc, - CACellT const *__restrict__ cells, + CACell const *__restrict__ cells, uint32_t const *__restrict__ nCells, - CellNeighborsVector const *cellNeighbors, - CellTracksVector const *cellTracks, - OuterHitOfCell const *isOuterHitOfCell, + uint32_t const *__restrict__ nTrips, + uint32_t const *__restrict__ nCellTracks, + caStructures::CAPairSoAConstView cellCell, + caStructures::CAPairSoAConstView cellTrack, int32_t nHits, uint32_t maxNumberOfDoublets, + AlgoParams const ¶ms, Counters *counters) const { auto &c = *counters; // counters once per event @@ -108,48 +141,36 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { apc->get().first, apc->get().second, nHits); - if (apc->get().first < TrackerTraits::maxNumberOfQuadruplets) { - ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size(apc->get().first) == 0); - ALPAKA_ASSERT_ACC(tracks_view.hitIndices().size() == apc->get().second); + if (apc->get().first < tracks_view.metadata().size()) { + ALPAKA_ASSERT_ACC(foundNtuplets->size(apc->get().first) == 0); + ALPAKA_ASSERT_ACC(foundNtuplets->size() == apc->get().second); } } - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - if (tracks_view.hitIndices().size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit - printf("ERROR %d, %d\n", idx, tracks_view.hitIndices().size(idx)); - ALPAKA_ASSERT_ACC(ftracks_view.hitIndices().size(idx) <= TrackerTraits::maxHitsOnTrack); - for (auto ih = tracks_view.hitIndices().begin(idx); ih != tracks_view.hitIndices().end(idx); ++ih) + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + if (foundNtuplets->size(idx) > TrackerTraits::maxHitsOnTrack) // current real limit + printf("ERROR %d, %d\n", idx, foundNtuplets->size(idx)); + ALPAKA_ASSERT_ACC(foundNtuplets->size(idx) <= TrackerTraits::maxHitsOnTrack); + for (auto ih = foundNtuplets->begin(idx); ih != foundNtuplets->end(idx); ++ih) ALPAKA_ASSERT_ACC(int(*ih) < nHits); } #endif if (cms::alpakatools::once_per_grid(acc)) { - if (apc->get().first >= TrackerTraits::maxNumberOfQuadruplets) + if (apc->get().first >= uint32_t(tracks_view.metadata().size())) printf("Tuples overflow\n"); if (*nCells >= maxNumberOfDoublets) printf("Cells overflow\n"); - if (cellNeighbors && cellNeighbors->full()) - printf("cellNeighbors overflow %d %d \n", cellNeighbors->capacity(), cellNeighbors->size()); - if (cellTracks && cellTracks->full()) - printf("cellTracks overflow\n"); - if (int(hitToTuple->nOnes()) < nHits) - printf("ERROR hitToTuple overflow %d %d\n", hitToTuple->nOnes(), nHits); -#ifdef GPU_DEBUG - printf("size of cellNeighbors %d \n cellTracks %d \n hitToTuple %d \n", - cellNeighbors->size(), - cellTracks->size(), - hitToTuple->size()); -#endif + if (*nTrips >= uint32_t(cellCell.metadata().size())) + printf("Triplets overflow\n"); + if (*nCellTracks >= uint32_t(cellTrack.metadata().size())) + printf("TracksToCell overflow\n"); } for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { auto const &thisCell = cells[idx]; if (thisCell.hasFishbone() && !thisCell.isKilled()) alpaka::atomicAdd(acc, &c.nFishCells, 1ull, alpaka::hierarchy::Blocks{}); - if (thisCell.outerNeighbors().full()) //++tooManyNeighbors[thisCell.theLayerPairId]; - printf("OuterNeighbors overflow %d in %d\n", idx, thisCell.layerPairId()); - if (thisCell.tracks().full()) //++tooManyTracks[thisCell.theLayerPairId]; - printf("Tracks overflow %d in %d\n", idx, thisCell.layerPairId()); if (thisCell.isKilled()) alpaka::atomicAdd(acc, &c.nKilledCells, 1ull, alpaka::hierarchy::Blocks{}); if (!thisCell.unused()) @@ -157,22 +178,17 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if ((0 == hitToTuple->size(thisCell.inner_hit_id())) && (0 == hitToTuple->size(thisCell.outer_hit_id()))) alpaka::atomicAdd(acc, &c.nZeroTrackCells, 1ull, alpaka::hierarchy::Blocks{}); } - - // FIXME this loop was up to nHits - isOuterHitOfCell.offset in the CUDA version - for (auto idx : cms::alpakatools::uniform_elements(acc, nHits)) - if ((*isOuterHitOfCell).container[idx].full()) // ++tooManyOuterHitOfCell; - printf("OuterHitOfCell overflow %d\n", idx); } }; template class Kernel_fishboneCleaner { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - CACellT const *cells, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + CACell const *cells, uint32_t const *__restrict__ nCells, - TkSoAView tracks_view) const { + CellToTrack const *__restrict__ cellTracksHisto, + TkSoAView tracks_view) const { constexpr auto reject = Quality::dup; for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { @@ -180,8 +196,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (!thisCell.isKilled()) continue; - for (auto it : thisCell.tracks()) - tracks_view[it].quality() = reject; + auto const *__restrict__ tracksOfCell = cellTracksHisto->begin(idx); + for (auto i = 0u; i < cellTracksHisto->size(idx); i++) + tracks_view[tracksOfCell[i]].quality() = reject; } } }; @@ -191,26 +208,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_earlyDuplicateRemover { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - CACellT const *cells, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + CACell const *cells, uint32_t const *__restrict__ nCells, - TkSoAView tracks_view, + CellToTrack const *__restrict__ cellTracksHisto, + TkSoAView tracks_view, bool dupPassThrough) const { // quality to mark rejected constexpr auto reject = Quality::edup; /// cannot be loose ALPAKA_ASSERT_ACC(nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, *nCells)) { - auto const &thisCell = cells[idx]; - - if (thisCell.tracks().size() < 2) +#ifdef CA_SIZES + printf("cellTracksSizes;%d;%d;%d\n", idx, cT.size(), cT.capacity()); +#endif + if (cellTracksHisto->size(idx) < 2) continue; int8_t maxNl = 0; + auto const *__restrict__ tracksOfCell = cellTracksHisto->begin(idx); // find maxNl - for (auto it : thisCell.tracks()) { - auto nl = tracks_view[it].nLayers(); + for (auto i = 0u; i < cellTracksHisto->size(idx); i++) { + if (int(tracksOfCell[i]) > tracks_view.metadata().size()) + printf(">WARNING: %d %d %d %d\n", idx, i, int(tracksOfCell[i]), tracks_view.metadata().size()); + auto nl = tracks_view[tracksOfCell[i]].nLayers(); maxNl = std::max(nl, maxNl); } @@ -218,7 +239,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { // quad pass through (leave it here for tests) // maxNl = std::min(4, maxNl); - for (auto it : thisCell.tracks()) { + for (auto i = 0u; i < cellTracksHisto->size(idx); i++) { + auto it = tracksOfCell[i]; + + if (int(it) > tracks_view.metadata().size()) + printf(">WARNING: %d %d %d\n", i, it, tracks_view.metadata().size()); if (tracks_view[it].nLayers() < maxNl) tracks_view[it].quality() = reject; // no race: simple assignment of the same constant } @@ -230,11 +255,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fastDuplicateRemover { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - CACellT const *__restrict__ cells, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + CACell const *__restrict__ cells, uint32_t const *__restrict__ nCells, - TkSoAView tracks_view, + CellToTrack const *__restrict__ cellTracksHisto, + TkSoAView tracks_view, bool dupPassThrough) const { // quality to mark rejected auto const reject = dupPassThrough ? Quality::loose : Quality::dup; @@ -244,19 +269,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { const auto ntNCells = (*nCells); for (auto idx : cms::alpakatools::uniform_elements(acc, ntNCells)) { - auto const &thisCell = cells[idx]; - if (thisCell.tracks().size() < 2) + if (cellTracksHisto->size(idx) < 2) continue; float mc = maxScore; - uint16_t im = tkNotFound; + uint32_t im = tkNotFound; auto score = [&](auto it) { return std::abs(reco::tip(tracks_view, it)); }; // full crazy combinatorics - int ntr = thisCell.tracks().size(); - for (int i = 0; i < ntr - 1; ++i) { - auto it = thisCell.tracks()[i]; + auto const *__restrict__ thisCellTracks = cellTracksHisto->begin(idx); + int ntr = cellTracksHisto->size(idx); + for (int i = 0; i < ntr - 1; i++) { + auto it = thisCellTracks[i]; auto qi = tracks_view[it].quality(); if (qi <= reject) continue; @@ -264,8 +289,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto e2opi = tracks_view[it].covariance()(9); auto cti = tracks_view[it].state()(3); auto e2cti = tracks_view[it].covariance()(12); - for (auto j = i + 1; j < ntr; ++j) { - auto jt = thisCell.tracks()[j]; + for (int j = i + 1; j < ntr; ++j) { + auto jt = thisCellTracks[j]; auto qj = tracks_view[jt].quality(); if (qj <= reject) continue; @@ -288,7 +313,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { // find maxQual auto maxQual = reject; // no duplicate! - for (auto it : thisCell.tracks()) { + for (int i = 0; i < ntr; i++) { + auto it = thisCellTracks[i]; if (tracks_view[it].quality() > maxQual) maxQual = tracks_view[it].quality(); } @@ -297,7 +323,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; // find min score - for (auto it : thisCell.tracks()) { + for (int i = 0; i < ntr; i++) { + auto it = thisCellTracks[i]; if (tracks_view[it].quality() == maxQual && score(it) < mc) { mc = score(it); im = it; @@ -308,7 +335,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; // mark all other duplicates (not yet, keep it loose) - for (auto it : thisCell.tracks()) { + for (int i = 0; i < ntr; i++) { + auto it = thisCellTracks[i]; if (tracks_view[it].quality() > loose && it != im) tracks_view[it].quality() = loose; //no race: simple assignment of the same constant } @@ -319,87 +347,149 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_connect { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - cms::alpakatools::AtomicPairCounter *apc1, - cms::alpakatools::AtomicPairCounter *apc2, // just to zero them - HitsConstView hh, - CACellT *cells, - uint32_t *nCells, - CellNeighborsVector *cellNeighbors, - OuterHitOfCell const *isOuterHitOfCell, - CAParams params) const { - using Cell = CACellT; + ALPAKA_FN_ACC void operator()(Acc2D const &acc, + cms::alpakatools::AtomicPairCounter *apc, // just to zero them + HitsConstView hh, + reco::CALayersSoAConstView ll, + caStructures::CAPairSoAView cn, + CACell *cells, + uint32_t const *nCells, + uint32_t *nTrips, + HitToCell const *__restrict__ outerHitHisto, + CellToCell *cellNeighborsHisto, + AlgoParams const ¶ms) const { + using Cell = CACell; + uint32_t maxTriplets = cn.metadata().size(); if (cms::alpakatools::once_per_grid(acc)) { - *apc1 = 0; - *apc2 = 0; + *apc = 0; } // ready for next kernel // loop on outer cells - for (uint32_t idx : cms::alpakatools::uniform_elements_y(acc, *nCells)) { - auto cellIndex = idx; - auto &thisCell = cells[idx]; - auto innerHitId = thisCell.inner_hit_id(); - if (int(innerHitId) < isOuterHitOfCell->offset) + for (uint32_t cellIndex : cms::alpakatools::uniform_elements_y(acc, *nCells)) { + auto &thisCell = cells[cellIndex]; + auto innerHitId = thisCell.inner_hit_id() - hh.offsetBPIX2(); + + if (int(innerHitId) < 0) continue; - uint32_t numberOfPossibleNeighbors = (*isOuterHitOfCell)[innerHitId].size(); - auto vi = (*isOuterHitOfCell)[innerHitId].data(); + auto const *__restrict__ outerHitCells = outerHitHisto->begin(innerHitId); + auto const numberOfPossibleNeighbors = outerHitHisto->size(innerHitId); + +#ifdef CA_DEBUG + printf("numberOfPossibleFromHisto;%d;%d;%d;%d;%d\n", + *nCells, + innerHitId, + cellIndex, + thisCell.innerLayer(), + numberOfPossibleNeighbors); +#endif auto ri = thisCell.inner_r(hh); auto zi = thisCell.inner_z(hh); auto ro = thisCell.outer_r(hh); auto zo = thisCell.outer_z(hh); - auto isBarrel = thisCell.inner_detIndex(hh) < TrackerTraits::last_barrel_detIndex; + auto thetaCut = ll[thisCell.innerLayer()].caThetaCut(); // loop on inner cells for (uint32_t j : cms::alpakatools::independent_group_elements_x(acc, numberOfPossibleNeighbors)) { - auto otherCell = (vi[j]); + auto otherCell = outerHitCells[j]; auto &oc = cells[otherCell]; auto r1 = oc.inner_r(hh); auto z1 = oc.inner_z(hh); - bool aligned = Cell::areAlignedRZ( - r1, - z1, - ri, - zi, - ro, - zo, - params.ptmin_, - isBarrel ? params.CAThetaCutBarrel_ : params.CAThetaCutForward_); // 2.f*thetaCut); // FIXME tune cuts - if (aligned && - thisCell.dcaCut(hh, - oc, - oc.inner_detIndex(hh) < TrackerTraits::last_bpix1_detIndex ? params.dcaCutInnerTriplet_ - : params.dcaCutOuterTriplet_, - params.hardCurvCut_)) { // FIXME tune cuts - oc.addOuterNeighbor(acc, cellIndex, *cellNeighbors); + auto dcaCut = ll[oc.innerLayer()].caDCACut(); + bool aligned = Cell::areAlignedRZ(r1, z1, ri, zi, ro, zo, params.ptmin_, thetaCut); + if (aligned && thisCell.dcaCut(hh, oc, dcaCut, params.hardCurvCut_)) { + auto t_ind = alpaka::atomicAdd(acc, nTrips, 1u, alpaka::hierarchy::Blocks{}); +#ifdef CA_DEBUG + printf("Triplet no. %d %.5f %.5f (%d %d) - %d %d -> (%d, %d, %d, %d) \n", + t_ind, + thetaCut, + dcaCut, + thisCell.layerPairId(), + oc.layerPairId(), + otherCell, + cellIndex, + thisCell.inner_hit_id(), + thisCell.outer_hit_id(), + oc.inner_hit_id(), + oc.outer_hit_id()); +#endif + +#ifdef CA_DEBUG + printf("filling cell no. %d %d: %d -> %d\n", t_ind, cellNeighborsHisto->size(), otherCell, cellIndex); +#endif + + if (t_ind >= maxTriplets) { +#ifdef CA_WARNINGS + printf("Warning!!!! Too many cell->cell (triplets) associations (limit = %d)!\n", cn.metadata().size()); +#endif + alpaka::atomicSub(acc, nTrips, 1u, alpaka::hierarchy::Blocks{}); + break; + } + + cellNeighborsHisto->count(acc, otherCell); + + cn[t_ind].inner() = otherCell; + cn[t_ind].outer() = cellIndex; thisCell.setStatusBits(Cell::StatusBit::kUsed); oc.setStatusBits(Cell::StatusBit::kUsed); } + } // loop on inner cells } // loop on outer cells } }; + + template + class FillDoubletsHisto { + public: + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + CACell const *__restrict__ cells, + uint32_t *nCells, + uint32_t offsetBPIX2, + HitToCell *outerHitHisto) const { + for (auto cellIndex : cms::alpakatools::uniform_elements(acc, *nCells)) { +#ifdef DOUBLETS_DEBUG + printf("outerHitHisto;%d;%d\n", cellIndex, cells[cellIndex].outer_hit_id()); +#endif + outerHitHisto->fill(acc, cells[cellIndex].outer_hit_id() - offsetBPIX2, cellIndex); + } + } + }; + + class Kernel_fillGenericPair { + public: + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + caStructures::CAPairSoAConstView cn, + uint32_t const *nElements, + GenericContainer *genericHisto) const { + for (uint32_t index : cms::alpakatools::uniform_elements(acc, *nElements)) { + genericHisto->fill(acc, cn[index].inner(), cn[index].outer()); + } + } + }; + template class Kernel_find_ntuplets { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - HitsConstView hh, - TkSoAView tracks_view, - CACellT *__restrict__ cells, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + const ::reco::CAGraphSoAConstView &cc, + TkSoAView tracks_view, + HitContainer *foundNtuplets, + CellToCell const *__restrict__ cellNeighborsHisto, + CellToTrack *cellTracksHisto, + caStructures::CAPairSoAView ct, + CACell *__restrict__ cells, + uint32_t *nCellTracks, + uint32_t const *nTriplets, uint32_t const *nCells, - CellTracksVector *cellTracks, cms::alpakatools::AtomicPairCounter *apc, - CAParams params) const { - // recursive: not obvious to widen - - using Cell = CACellT; + AlgoParams const ¶ms) const { + using Cell = CACell; #ifdef GPU_DEBUG if (cms::alpakatools::once_per_grid(acc)) - printf("starting producing ntuplets from %d cells \n", *nCells); + printf("starting producing ntuplets from %d cells and %d triplets \n", *nCells, *nTriplets); #endif for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) { @@ -410,28 +500,37 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; // we require at least three hits - if (thisCell.outerNeighbors().empty()) + if (cellNeighborsHisto->size(idx) == 0) continue; auto pid = thisCell.layerPairId(); - bool doit = params.startingLayerPair(pid); + bool doit = cc[pid].startingPair(); constexpr uint32_t maxDepth = TrackerTraits::maxDepth; +#ifdef CA_DEBUG + printf("LayerPairId %d doit ? %d From cell %d with nNeighbors = %d \n", + pid, + doit, + idx, + cellNeighborsHisto->size(idx)); +#endif if (doit) { typename Cell::TmpTuple stack; + stack.reset(); - bool bpix1Start = params.startAt0(pid); - thisCell.template find_ntuplets(acc, - hh, - cells, - *cellTracks, - tracks_view.hitIndices(), - *apc, - tracks_view.quality(), - stack, - params.minHitsPerNtuplet_, - bpix1Start); + thisCell.template find_ntuplets(acc, + cc, + cells, + *foundNtuplets, + cellNeighborsHisto, + cellTracksHisto, + nCellTracks, + ct, + *apc, + tracks_view.quality(), + stack, + params.minHitsPerNtuplet_); ALPAKA_ASSERT_ACC(stack.empty()); } } @@ -441,14 +540,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_mark_used { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - CACellT *__restrict__ cells, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + CACell *__restrict__ cells, + CellToTrack const *__restrict__ cellTracksHisto, uint32_t const *nCells) const { - using Cell = CACellT; + using Cell = CACell; for (auto idx : cms::alpakatools::uniform_elements(acc, (*nCells))) { auto &thisCell = cells[idx]; - if (!thisCell.tracks().empty()) + if (cellTracksHisto->size(idx) > 0) thisCell.setStatusBits(Cell::StatusBit::kInTrack); } } @@ -457,12 +556,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countMultiplicity { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - TupleMultiplicity *tupleMultiplicity) const { - for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - auto nhits = tracks_view.hitIndices().size(it); + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + TupleMultiplicity *tupleMultiplicity) const { + for (auto it : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + auto nhits = foundNtuplets->size(it); + // printf("it: %d nhits: %d \n",it,nhits); if (nhits < 3) continue; if (tracks_view[it].quality() == Quality::edup) @@ -479,12 +579,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillMultiplicity { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - TupleMultiplicity *tupleMultiplicity) const { - for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - auto nhits = tracks_view.hitIndices().size(it); + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + TupleMultiplicity *tupleMultiplicity) const { + for (auto it : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + auto nhits = foundNtuplets->size(it); + if (nhits < 3) continue; if (tracks_view[it].quality() == Quality::edup) @@ -501,12 +602,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_classifyTracks { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, QualityCuts cuts) const { - for (auto it : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - auto nhits = tracks_view.hitIndices().size(it); + for (auto it : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + auto nhits = foundNtuplets->size(it); if (nhits == 0) break; // guard @@ -527,7 +628,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { } if (isNaN) { #ifdef NTUPLE_DEBUG - printf("NaN in fit %d size %d chi2 %f\n", it, tracks_view.hitIndices().size(it), tracks_view[it].chi2()); + printf("NaN in fit %d size %d chi2 %f\n", it, foundNtuplets->size(it), tracks_view[it].chi2()); #endif continue; } @@ -548,10 +649,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_doStatsForTracks { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, TkSoAView tracks_view, Counters *counters) const { - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - if (tracks_view.hitIndices().size(idx) == 0) + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + Counters *counters) const { + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + if (foundNtuplets->size(idx) == 0) break; //guard if (tracks_view[idx].quality() < Quality::loose) continue; @@ -566,14 +669,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countHitInTracks { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - HitToTuple *hitToTuple) const { - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - if (tracks_view.hitIndices().size(idx) == 0) + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + HitToTuple *hitToTuple) const { + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + if (foundNtuplets->size(idx) == 0) break; // guard - for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) + for (auto h = foundNtuplets->begin(idx); h != foundNtuplets->end(idx); ++h) hitToTuple->count(acc, *h); } } @@ -582,14 +685,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillHitInTracks { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - HitToTuple *hitToTuple) const { - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - if (tracks_view.hitIndices().size(idx) == 0) + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + HitToTuple *hitToTuple) const { + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes())) { + if (foundNtuplets->size(idx) == 0) break; // guard - for (auto h = tracks_view.hitIndices().begin(idx); h != tracks_view.hitIndices().end(idx); ++h) + for (auto h = foundNtuplets->begin(idx); h != foundNtuplets->end(idx); ++h) hitToTuple->fill(acc, *h, idx); } } @@ -598,18 +701,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillHitDetIndices { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - HitsConstView hh) const { + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + TkHitSoAView track_hits_view, + HitContainer const *__restrict__ foundNtuplets, + HitsConstView hh) const { // copy offsets - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().nOnes())) { - tracks_view.detIndices().off[idx] = tracks_view.hitIndices().off[idx]; + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->nOnes() - 1)) { + tracks_view[idx].hitOffsets() = foundNtuplets->off[idx + 1]; // offset for track 0 is always 0 } // fill hit indices - for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.hitIndices().size())) { - ALPAKA_ASSERT_ACC(tracks_view.hitIndices().content[idx] < (uint32_t)hh.metadata().size()); - tracks_view.detIndices().content[idx] = hh[tracks_view.hitIndices().content[idx]].detectorIndex(); + for (auto idx : cms::alpakatools::uniform_elements(acc, foundNtuplets->size())) { + ALPAKA_ASSERT_ACC(foundNtuplets->content[idx] < (uint32_t)hh.metadata().size()); + track_hits_view[idx].id() = foundNtuplets->content[idx]; + track_hits_view[idx].detId() = hh[foundNtuplets->content[idx]].detectorIndex(); +#ifdef CA_DEBUG + printf("Kernel_fillHitDetIndices %d %d %d \n", + idx, + foundNtuplets->content[idx], + track_hits_view.metadata().size()); +#endif } } }; @@ -617,9 +728,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_fillNLayers { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, + TkHitSoAView track_hits_view, + uint32_t const *__restrict__ layerStarts, + uint16_t maxLayers, cms::alpakatools::AtomicPairCounter *apc) const { // clamp the number of tracks to the capacity of the SoA auto ntracks = std::min(apc->get().first, tracks_view.metadata().size() - 1); @@ -627,8 +740,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { if (cms::alpakatools::once_per_grid(acc)) tracks_view.nTracks() = ntracks; for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) { - ALPAKA_ASSERT_ACC(TracksUtilities::nHits(tracks_view, idx) >= 3); - tracks_view[idx].nLayers() = TracksUtilities::computeNumberOfLayers(tracks_view, idx); + ALPAKA_ASSERT_ACC(reco::nHits(tracks_view, idx) >= 3); + tracks_view[idx].nLayers() = reco::nLayers(tracks_view, track_hits_view, maxLayers, layerStarts, idx); +#ifdef CA_DEBUG + printf("Kernel_fillNLayers %d %d %d - %d %d\n", + idx, + ntracks, + tracks_view[idx].nLayers(), + apc->get().first, + tracks_view.metadata().size() - 1); +#endif } } }; @@ -636,9 +757,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_doStatsForHitInTracks { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - HitToTuple const *__restrict__ hitToTuple, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + HitToTuple const *__restrict__ hitToTuple, Counters *counters) const { auto &c = *counters; for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nOnes())) { @@ -654,17 +774,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_countSharedHit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, int *__restrict__ nshared, - HitContainer const *__restrict__ ptuples, + HitContainer const *__restrict__ ptuples, Quality const *__restrict__ quality, - HitToTuple const *__restrict__ phitToTuple) const { + HitToTuple const *__restrict__ phitToTuple) const { constexpr auto loose = Quality::loose; auto &hitToTuple = *phitToTuple; auto const &foundNtuplets = *ptuples; - for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple->nbins())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) continue; @@ -684,7 +803,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { for (auto it = hitToTuple.begin(idx); it != hitToTuple.end(idx); ++it) { if (foundNtuplets.size(*it) > 3) continue; - alpaka::atomicAdd(acc, &nshared[*it], 1ull, alpaka::hierarchy::Blocks{}); + alpaka::atomicAdd(acc, &nshared[*it], 1, alpaka::hierarchy::Blocks{}); } } // hit loop @@ -693,10 +812,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_markSharedHit { - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, int const *__restrict__ nshared, - HitContainer const *__restrict__ tuples, + HitContainer const *__restrict__ tuples, Quality *__restrict__ quality, bool dupPassThrough) const { // constexpr auto bad = Quality::bad; @@ -706,7 +824,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { // quality to mark rejected auto const reject = dupPassThrough ? loose : dup; - for (auto idx : cms::alpakatools::uniform_elements(acc, tuples->nbins())) { + for (auto idx : cms::alpakatools::uniform_elements(acc, tuples->nOnes())) { if (tuples->size(idx) == 0) break; //guard if (quality[idx] <= reject) @@ -721,12 +839,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_rejectDuplicate { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - uint16_t nmin, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) const { + HitToTuple const *__restrict__ phitToTuple) const { // quality to mark rejected auto const reject = dupPassThrough ? Quality::loose : Quality::dup; @@ -778,13 +894,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_sharedHitCleaner { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - HitsConstView hh, - TkSoAView tracks_view, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + HitsConstView hh, + uint32_t const *__restrict__ layerStarts, + TkSoAView tracks_view, int nmin, bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) const { + HitToTuple const *__restrict__ phitToTuple) const { // quality to mark rejected auto const reject = dupPassThrough ? Quality::loose : Quality::dup; // quality of longest track @@ -792,7 +908,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { auto &hitToTuple = *phitToTuple; - uint32_t l1end = hh.hitsLayerStart()[1]; + uint32_t l1end = layerStarts[1]; for (auto idx : cms::alpakatools::uniform_elements(acc, hitToTuple.nOnes())) { if (hitToTuple.size(idx) < 2) @@ -832,12 +948,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_tripletCleaner { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - uint16_t nmin, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) const { + HitToTuple const *__restrict__ phitToTuple) const { // quality to mark rejected auto const reject = Quality::loose; /// min quality of good @@ -850,7 +964,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; float mc = maxScore; - uint16_t im = tkNotFound; + uint32_t im = tkNotFound; bool onlyTriplets = true; // check if only triplets @@ -892,12 +1006,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_simpleTripletCleaner { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TkSoAView tracks_view, - uint16_t nmin, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TkSoAView tracks_view, bool dupPassThrough, - HitToTuple const *__restrict__ phitToTuple) const { + HitToTuple const *__restrict__ phitToTuple) const { // quality to mark rejected auto const reject = Quality::loose; /// min quality of good @@ -910,7 +1022,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { continue; float mc = maxScore; - uint16_t im = tkNotFound; + uint32_t im = tkNotFound; // choose best tip! (should we first find best quality???) for (auto ip = hitToTuple.begin(idx); ip != hitToTuple.end(idx); ++ip) { @@ -938,19 +1050,18 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { template class Kernel_print_found_ntuplets { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - HitsConstView hh, - TkSoAView tracks_view, - HitToTuple const *__restrict__ phitToTuple, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + HitsConstView hh, + TkSoAView tracks_view, + HitContainer const *__restrict__ foundNtuplets, + HitToTuple const *__restrict__ phitToTuple, int32_t firstPrint, int32_t lastPrint, int iev) const { constexpr auto loose = Quality::loose; - for (auto i : - cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, tracks_view.hitIndices().nbins()))) { - auto nh = tracks_view.hitIndices().size(i); + for (auto i : cms::alpakatools::uniform_elements(acc, firstPrint, std::min(lastPrint, foundNtuplets->nOnes()))) { + auto nh = foundNtuplets->size(i); if (nh < 3) continue; if (tracks_view[i].quality() < loose) @@ -967,21 +1078,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { reco::tip(tracks_view, i), reco::zip(tracks_view, i), tracks_view[i].chi2(), - hh[*tracks_view.hitIndices().begin(i)].zGlobal(), - hh[*(tracks_view.hitIndices().begin(i) + 1)].zGlobal(), - hh[*(tracks_view.hitIndices().begin(i) + 2)].zGlobal(), - nh > 3 ? hh[int(*(tracks_view.hitIndices().begin(i) + 3))].zGlobal() : 0, - nh > 4 ? hh[int(*(tracks_view.hitIndices().begin(i) + 4))].zGlobal() : 0, - nh > 5 ? hh[int(*(tracks_view.hitIndices().begin(i) + 5))].zGlobal() : 0, - nh > 6 ? hh[int(*(tracks_view.hitIndices().begin(i) + nh - 1))].zGlobal() : 0); + hh[*foundNtuplets->begin(i)].zGlobal(), + hh[*(foundNtuplets->begin(i) + 1)].zGlobal(), + hh[*(foundNtuplets->begin(i) + 2)].zGlobal(), + nh > 3 ? hh[int(*(foundNtuplets->begin(i) + 3))].zGlobal() : 0, + nh > 4 ? hh[int(*(foundNtuplets->begin(i) + 4))].zGlobal() : 0, + nh > 5 ? hh[int(*(foundNtuplets->begin(i) + 5))].zGlobal() : 0, + nh > 6 ? hh[int(*(foundNtuplets->begin(i) + nh - 1))].zGlobal() : 0); } } }; class Kernel_printCounters { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, Counters const *counters) const { + ALPAKA_FN_ACC void operator()(Acc1D const &acc, Counters const *counters) const { auto const &c = *counters; printf( "||Counters | nEvents | nHits | nCells | nTuples | nFitTacks | nLooseTracks | nGoodTracks | nUsedHits | " diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h index aff544bb556cb..46997a61b064b 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoublets.h @@ -10,68 +10,30 @@ #include "CAPixelDoubletsAlgos.h" -namespace ALPAKA_ACCELERATOR_NAMESPACE { - using namespace alpaka; - using namespace cms::alpakatools; - namespace caPixelDoublets { - - template - class InitDoublets { - public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, - OuterHitOfCell* isOuterHitOfCell, - int nHits, - CellNeighborsVector* cellNeighbors, - CellNeighbors* cellNeighborsContainer, - CellTracksVector* cellTracks, - CellTracks* cellTracksContainer) const { - ALPAKA_ASSERT_ACC((*isOuterHitOfCell).container); - - for (auto i : cms::alpakatools::uniform_elements(acc, nHits - isOuterHitOfCell->offset)) - (*isOuterHitOfCell).container[i].reset(); - - if (cms::alpakatools::once_per_grid(acc)) { - cellNeighbors->construct(TrackerTraits::maxNumOfActiveDoublets, cellNeighborsContainer); - cellTracks->construct(TrackerTraits::maxNumOfActiveDoublets, cellTracksContainer); - [[maybe_unused]] auto i = cellNeighbors->extend(acc); - ALPAKA_ASSERT_ACC(0 == i); - (*cellNeighbors)[0].reset(); - i = cellTracks->extend(acc); - ALPAKA_ASSERT_ACC(0 == i); - (*cellTracks)[0].reset(); - } - } - }; - - // Not used for the moment, see below. - //constexpr auto getDoubletsFromHistoMaxBlockSize = 64; // for both x and y - //constexpr auto getDoubletsFromHistoMinBlocksPerMP = 16; - - template - class GetDoubletsFromHisto { - public: - template >> - // #ifdef __CUDACC__ - // __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) // TODO: Alapakify - // #endif - ALPAKA_FN_ACC void operator()(TAcc const& acc, - CACellT* cells, - uint32_t* nCells, - CellNeighborsVector* cellNeighbors, - CellTracksVector* cellTracks, - HitsConstView hh, - OuterHitOfCell* isOuterHitOfCell, - uint32_t nActualPairs, - const uint32_t maxNumOfDoublets, - CellCutsT cuts) const { - doubletsFromHisto( - acc, nActualPairs, maxNumOfDoublets, cells, nCells, cellNeighbors, cellTracks, hh, *isOuterHitOfCell, cuts); - } - }; - - } // namespace caPixelDoublets - -} // namespace ALPAKA_ACCELERATOR_NAMESPACE +namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { + + template + class GetDoubletsFromHisto { + public: + // #ifdef __CUDACC__ + // __launch_bounds__(getDoubletsFromHistoMaxBlockSize, getDoubletsFromHistoMinBlocksPerMP) // TODO: Alapakafy + // #endif + ALPAKA_FN_ACC void operator()(Acc2D const& acc, + uint32_t maxNumOfDoublets, + CACell* cells, + uint32_t* nCells, + HitsConstView hh, + ::reco::CAGraphSoAConstView cc, + ::reco::CALayersSoAConstView ll, + uint32_t const* __restrict__ offsets, + PhiBinner const* phiBinner, + HitToCell* outerHitHisto, + AlgoParams const& params) const { + doubletsFromHisto( + acc, maxNumOfDoublets, cells, nCells, hh, cc, ll, offsets, phiBinner, outerHitHisto, params); + } + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets #endif // RecoTracker_PixelSeeding_plugins_alpaka_CAPixelDoublets_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h index 7637fc2fca61f..26b38ca1aa9fb 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAPixelDoubletsAlgos.h @@ -16,155 +16,139 @@ #include "HeterogeneousCore/AlpakaInterface/interface/VecArray.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" #include "CACell.h" #include "CAStructures.h" +#include "CAHitNtupletGeneratorKernels.h" -//#define GPU_DEBUG -//#define NTUPLE_DEBUG +// #define GPU_DEBUG +// #define DOUBLETS_DEBUG +// #define CA_WARNINGS namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { using namespace cms::alpakatools; + using namespace ::caStructures; + using namespace ::reco; - template - using CellNeighbors = caStructures::CellNeighborsT; - template - using CellTracks = caStructures::CellTracksT; - template - using CellNeighborsVector = caStructures::CellNeighborsVectorT; - template - using CellTracksVector = caStructures::CellTracksVectorT; - template - using OuterHitOfCell = caStructures::OuterHitOfCellT; - template - using HitsConstView = typename CACellT::HitsConstView; + using HitToCell = GenericContainer; template - struct CellCutsT { - using H = HitsConstView; - using T = TrackerTraits; - - CellCutsT() = default; - - CellCutsT(const bool doClusterCut, - const bool doZ0Cut, - const bool doPtCut, - const bool idealConditions, - const float z0Cut, - const float ptCut, - const int minYsizeB1, - const int minYsizeB2, - const std::vector& phiCutsV) - : doClusterCut_(doClusterCut), - doZ0Cut_(doZ0Cut), - doPtCut_(doPtCut), - idealConditions_(idealConditions), - z0Cut_(z0Cut), - ptCut_(ptCut), - minYsizeB1_(minYsizeB1), - minYsizeB2_(minYsizeB2) { - assert(phiCutsV.size() == TrackerTraits::nPairs); - std::copy(phiCutsV.begin(), phiCutsV.end(), &phiCuts[0]); - } - - bool doClusterCut_; - bool doZ0Cut_; - bool doPtCut_; - bool idealConditions_; //this is actually not used by phase2 - - float z0Cut_; //FIXME: check if could be const now - float ptCut_; - - int minYsizeB1_; - int minYsizeB2_; - - int phiCuts[T::nPairs]; - - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) zSizeCut(const TAcc& acc, - H hh, - int i, - int o) const { - const uint32_t mi = hh[i].detectorIndex(); - - bool innerB1 = mi < T::last_bpix1_detIndex; - bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; - auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; - - if (mes < 0) - return false; - - const uint32_t mo = hh[o].detectorIndex(); - auto so = hh[o].clusterSizeY(); - - auto dz = hh[i].zGlobal() - hh[o].zGlobal(); - auto dr = hh[i].rGlobal() - hh[o].rGlobal(); + using PhiBinner = PhiBinnerT; + //Move this ^ definition in CAStructures maybe + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool zSizeCut( + const TAcc& acc, HitsConstView hh, ::reco::CALayersSoAConstView ll, AlgoParams const& params, int i, int o) { + const uint32_t mi = hh[i].detectorIndex(); + const auto first_forward = ll.layerStarts()[4]; + const auto first_bpix2 = ll.layerStarts()[1]; + bool innerB1 = mi < first_bpix2; + bool isOuterLadder = 0 == (mi / 8) % 2; + auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; +#ifdef DOUBLETS_DEBUG + printf("i = %d o = %d mi = %d innerB1 = %d isOuterLadder = %d first_forward = %d first_bpix2 = %d\n", + i, + o, + mi, + innerB1, + isOuterLadder, + first_forward, + first_bpix2); +#endif + if (mes < 0) + return false; - auto innerBarrel = mi < T::last_barrel_detIndex; - auto onlyBarrel = mo < T::last_barrel_detIndex; + const uint32_t mo = hh[o].detectorIndex(); + auto so = hh[o].clusterSizeY(); - if (not innerBarrel and not onlyBarrel) - return false; - auto dy = innerB1 ? T::maxDYsize12 : T::maxDYsize; + auto dz = hh[i].zGlobal() - hh[o].zGlobal(); + auto dr = hh[i].rGlobal() - hh[o].rGlobal(); - return onlyBarrel ? so > 0 && std::abs(so - mes) > dy - : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * T::dzdrFact + 0.5f)) > T::maxDYPred; - } + auto innerBarrel = mi < first_forward; + auto onlyBarrel = mo < first_forward; +#ifdef DOUBLETS_DEBUG + printf("i = %d o = %d mo = %d innerB1 = %d isOuterLadder = %d \n", i, o, mo, innerBarrel, onlyBarrel); +#endif + if (not innerBarrel and not onlyBarrel) + return false; + auto dy = innerB1 ? params.maxDYsize12_ : params.maxDYsize_; +#ifdef DOUBLETS_DEBUG + printf("i = %d o = %d dy = %d maxDYsize12_ = %d maxDYsize_ = %d dzdrFact_ = %.2f maxDYPred_ = %d \n", + i, + o, + dy, + params.maxDYsize12_, + params.maxDYsize_, + params.dzdrFact_, + params.maxDYPred_); +#endif + return onlyBarrel + ? so > 0 && std::abs(so - mes) > dy + : innerBarrel && std::abs(mes - int(std::abs(dz / dr) * params.dzdrFact_ + 0.5f)) > params.maxDYPred_; + } - template - ALPAKA_FN_ACC ALPAKA_FN_INLINE bool __attribute__((always_inline)) clusterCut(const TAcc& acc, - H hh, - uint32_t i) const { - const uint32_t mi = hh[i].detectorIndex(); - bool innerB1orB2 = mi < T::last_bpix2_detIndex; + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool clusterCut( + const TAcc& acc, HitsConstView hh, ::reco::CALayersSoAConstView ll, AlgoParams const& params, uint32_t i) { + const uint32_t mi = hh[i].detectorIndex(); + const auto first_bpix2 = ll.layerStarts()[1]; + const auto first_bpix3 = ll.layerStarts()[2]; + bool innerB1orB2 = mi < ll.layerStarts()[2]; +#ifdef DOUBLETS_DEBUG + printf( + "i = %d mi = %d innerB1orB2 = %d innerB1 = %d innerB2 = %d minYsizeB1 = %d minYsizeB2 = %d isOuterLadder = %d " + "mes = %d \n", + i, + mi, + innerB1orB2, + mi < first_bpix2, + (mi >= first_bpix2) && (mi < first_bpix3), + params.minYsizeB1_, + params.minYsizeB2_, + (0 == (mi / 8) % 2), + (!(mi < first_bpix2)) || (0 == (mi / 8) % 2) ? hh[i].clusterSizeY() : -1); +#endif + if (!innerB1orB2) + return false; - if (!innerB1orB2) - return false; + bool innerB1 = mi < first_bpix2; - bool innerB1 = mi < T::last_bpix1_detIndex; - bool isOuterLadder = idealConditions_ ? true : 0 == (mi / 8) % 2; - auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; + bool isOuterLadder = 0 == (mi / 8) % 2; + auto mes = (!innerB1) || isOuterLadder ? hh[i].clusterSizeY() : -1; - if (innerB1) // B1 - if (mes > 0 && mes < minYsizeB1_) - return true; // only long cluster (5*8) - bool innerB2 = (mi >= T::last_bpix1_detIndex) && (mi < T::last_bpix2_detIndex); //FIXME number - if (innerB2) // B2 and F1 - if (mes > 0 && mes < minYsizeB2_) - return true; + if (innerB1) // B1 + if (mes > 0 && mes < params.minYsizeB1_) + return true; // only long cluster (5*8) + bool innerB2 = (mi >= first_bpix2) && (mi < first_bpix3); + if (innerB2) // B2 and F1 + if (mes > 0 && mes < params.minYsizeB2_) + return true; - return false; - } - }; + return false; + } template - ALPAKA_FN_ACC ALPAKA_FN_INLINE void __attribute__((always_inline)) doubletsFromHisto( - const TAcc& acc, - uint32_t nPairs, - const uint32_t maxNumOfDoublets, - CACellT* cells, - uint32_t* nCells, - CellNeighborsVector* cellNeighbors, - CellTracksVector* cellTracks, - HitsConstView hh, - OuterHitOfCell isOuterHitOfCell, - CellCutsT const& cuts) { // ysize cuts (z in the barrel) times 8 - // these are used if doClusterCut is true - - const bool doClusterCut = cuts.doClusterCut_; - const bool doZ0Cut = cuts.doZ0Cut_; - const bool doPtCut = cuts.doPtCut_; - - const float z0cut = cuts.z0Cut_; // cm - const float hardPtCut = cuts.ptCut_; // GeV + ALPAKA_FN_ACC ALPAKA_FN_INLINE void doubletsFromHisto(const TAcc& acc, + uint32_t maxNumOfDoublets, + CACell* cells, + uint32_t* nCells, + HitsConstView hh, + ::reco::CAGraphSoAConstView cc, + ::reco::CALayersSoAConstView ll, + uint32_t const* __restrict__ offsets, + PhiBinner const* phiBinner, + HitToCell* outerHitHisto, + AlgoParams const& params) { + const bool doClusterCut = params.minYsizeB1_ > 0 or params.minYsizeB2_ > 0; + const bool doZSizeCut = params.maxDYsize12_ > 0 or params.maxDYsize_ > 0 or params.maxDYPred_ > 0; + // cm (1 GeV track has 1 GeV/c / (e * 3.8T) ~ 87 cm radius in a 3.8T field) - const float minRadius = hardPtCut * 87.78f; + const float minRadius = params.cellPtCut_ * 87.78f; const float minRadius2T4 = 4.f * minRadius * minRadius; - using PhiBinner = typename TrackingRecHitSoA::PhiBinner; - - auto const& __restrict__ phiBinner = hh.phiBinner(); - uint32_t const* __restrict__ offsets = hh.hitsLayerStart().data(); + const uint32_t nPairs = cc.metadata().size(); + using PhiHisto = PhiBinner; ALPAKA_ASSERT_ACC(offsets); auto layerSize = [=](uint8_t li) { return offsets[li + 1] - offsets[li]; }; @@ -172,18 +156,29 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { // nPairsMax to be optimized later (originally was 64). // If it should much be bigger, consider using a block-wide parallel prefix scan, // e.g. see https://nvlabs.github.io/cub/classcub_1_1_warp_scan.html - auto& innerLayerCumulativeSize = alpaka::declareSharedVar(acc); + auto& innerLayerCumulativeSize = alpaka::declareSharedVar(acc); auto& ntot = alpaka::declareSharedVar(acc); - constexpr uint32_t dimIndexY = 0u; - constexpr uint32_t dimIndexX = 1u; - const uint32_t threadIdxLocalY(alpaka::getIdx(acc)[dimIndexY]); - const uint32_t threadIdxLocalX(alpaka::getIdx(acc)[dimIndexX]); +#ifdef DOUBLETS_DEBUG + if (cms::alpakatools::once_per_grid(acc)) + printf( + "maxNumDoublets = %d cc.metadata().size() = %d ll.metadata().size() = %d cellZ0Cut_ = %.2f cellPtCut_ = " + "%.2f doClusterCut = %d doZ0Cut = %d doPtCut = %d doZSizeCut = %d\n", + maxNumOfDoublets, + cc.metadata().size(), + ll.metadata().size(), + params.cellZ0Cut_, + params.cellPtCut_, + doClusterCut, + params.cellZ0Cut_ > 0, + params.cellPtCut_ > 0, + doZSizeCut); +#endif - if (threadIdxLocalY == 0 && threadIdxLocalX == 0) { - innerLayerCumulativeSize[0] = layerSize(TrackerTraits::layerPairs[0]); + if (cms::alpakatools::once_per_block(acc)) { + innerLayerCumulativeSize[0] = layerSize(cc.graph()[0][0]); for (uint32_t i = 1; i < nPairs; ++i) { - innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(TrackerTraits::layerPairs[2 * i]); + innerLayerCumulativeSize[i] = innerLayerCumulativeSize[i - 1] + layerSize(cc.graph()[i][0]); } ntot = innerLayerCumulativeSize[nPairs - 1]; } @@ -203,20 +198,28 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { ALPAKA_ASSERT_ACC(j < innerLayerCumulativeSize[pairLayerId]); ALPAKA_ASSERT_ACC(0 == pairLayerId || j >= innerLayerCumulativeSize[pairLayerId - 1]); - uint8_t inner = TrackerTraits::layerPairs[2 * pairLayerId]; - uint8_t outer = TrackerTraits::layerPairs[2 * pairLayerId + 1]; + uint8_t inner = cc.graph()[pairLayerId][0]; + uint8_t outer = cc.graph()[pairLayerId][1]; ALPAKA_ASSERT_ACC(outer > inner); - auto hoff = PhiBinner::histOff(outer); + auto hoff = PhiHisto::histOff(outer); auto i = (0 == pairLayerId) ? j : j - innerLayerCumulativeSize[pairLayerId - 1]; i += offsets[inner]; ALPAKA_ASSERT_ACC(i >= offsets[inner]); ALPAKA_ASSERT_ACC(i < offsets[inner + 1]); - +#ifdef DOUBLETS_DEBUG + printf("pairLayerId = %d i = %d inner = %d outer = %d offsets[inner] = %d offsets[inner + 1] = %d\n", + pairLayerId, + i, + inner, + outer, + offsets[inner], + offsets[inner + 1]); +#endif // found hit corresponding to our worker thread, now do the job - if (hh[i].detectorIndex() > pixelClustering::maxNumModules) - continue; // invalid + if (hh[i].detectorIndex() > ll.layerStarts()[ll.metadata().size() - 1]) //TODO use cc + continue; // invalid /* maybe clever, not effective when zoCut is on auto bpos = (mi%8)/4; // if barrel is 1 for z>0 @@ -226,10 +229,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto mez = hh[i].zGlobal(); - if (mez < TrackerTraits::minz[pairLayerId] || mez > TrackerTraits::maxz[pairLayerId]) + if (mez < cc.minz()[pairLayerId] || mez > cc.maxz()[pairLayerId]) continue; +#ifdef DOUBLETS_DEBUG + if (doClusterCut && outer > pixelTopology::last_barrel_layer) + printf("clustCut: %d %d \n", i, clusterCut(acc, hh, ll, params, i)); +#endif - if (doClusterCut && outer > pixelTopology::last_barrel_layer && cuts.clusterCut(acc, hh, i)) + if (doClusterCut && outer > pixelTopology::last_barrel_layer && clusterCut(acc, hh, ll, params, i)) continue; auto mep = hh[i].iphi(); @@ -247,30 +254,34 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto zo = hh[j].zGlobal(); auto ro = hh[j].rGlobal(); auto dr = ro - mer; - return dr > TrackerTraits::maxr[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > z0cut * dr; + return dr > cc.maxr()[pairLayerId] || dr < 0 || std::abs((mez * ro - mer * zo)) > params.cellZ0Cut_ * dr; }; - auto iphicut = cuts.phiCuts[pairLayerId]; + auto iphicut = cc.phiCuts()[pairLayerId]; - auto kl = PhiBinner::bin(int16_t(mep - iphicut)); - auto kh = PhiBinner::bin(int16_t(mep + iphicut)); - auto incr = [](auto& k) { return k = (k + 1) % PhiBinner::nbins(); }; + auto kl = PhiHisto::bin(int16_t(mep - iphicut)); + auto kh = PhiHisto::bin(int16_t(mep + iphicut)); + auto incr = [](auto& k) { return k = (k + 1) % PhiHisto::nbins(); }; -#ifdef GPU_DEBUG - int tot = 0; - int nmin = 0; - int tooMany = 0; +#ifdef GPU_DEGBU + printf("pairLayerId %d %d %.2f %.2f %.2f \n", + pairLayerId, + cc.phiCuts()[pairLayerId], + cc.maxr()[pairLayerId], + cc.maxz()[pairLayerId], + cc.minz()[pairLayerId]); #endif auto khh = kh; incr(khh); for (auto kk = kl; kk != khh; incr(kk)) { -#ifdef GPU_DEBUG - if (kk != kl && kk != kh) - nmin += phiBinner.size(kk + hoff); -#endif - auto const* __restrict__ p = phiBinner.begin(kk + hoff); - auto const* __restrict__ e = phiBinner.end(kk + hoff); + //#ifdef GPU_DEBUG + // if (kk != kl && kk != kh) + // nmin += phiBinner->size(kk + hoff); + //#endif + + auto const* __restrict__ p = phiBinner->begin(kk + hoff); + auto const* __restrict__ e = phiBinner->end(kk + hoff); auto const maxpIndex = e - p; // innermost parallel loop, using the block elements along the faster dimension (X or 1 in a 2D grid) @@ -282,10 +293,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { auto mo = hh[oi].detectorIndex(); // invalid - if (mo > pixelClustering::maxNumModules) + if (mo > pixelClustering::maxNumModules) //FIXME use cc? continue; - if (doZ0Cut && z0cutoff(oi)) + if (params.cellZ0Cut_ > 0. && z0cutoff(oi)) continue; auto mop = hh[oi].iphi(); @@ -293,42 +304,31 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caPixelDoublets { if (idphi > iphicut) continue; - - if (doClusterCut && cuts.zSizeCut(acc, hh, i, oi)) +#ifdef DOUBLETS_DEBUG + printf("zSizeCut: %d %d %d \n", i, oi, zSizeCut(acc, hh, ll, params, i, oi)); +#endif + if (doZSizeCut && zSizeCut(acc, hh, ll, params, i, oi)) continue; - if (doPtCut && ptcut(oi, idphi)) + if (params.cellPtCut_ > 0. && ptcut(oi, idphi)) continue; - auto ind = alpaka::atomicAdd(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{}); + auto ind = alpaka::atomicAdd(acc, nCells, 1u, alpaka::hierarchy::Blocks{}); if (ind >= maxNumOfDoublets) { - alpaka::atomicSub(acc, nCells, (uint32_t)1, alpaka::hierarchy::Blocks{}); +#ifdef CA_WARNINGS + printf("Warning!!!! Too many cells (limit = %d)!\n", maxNumOfDoublets); +#endif + alpaka::atomicSub(acc, nCells, 1u, alpaka::hierarchy::Blocks{}); break; - } // move to SimpleVector?? - cells[ind].init(*cellNeighbors, *cellTracks, hh, pairLayerId, i, oi); - isOuterHitOfCell[oi].push_back(acc, ind); -#ifdef GPU_DEBUG - if (isOuterHitOfCell[oi].full()) - ++tooMany; - ++tot; + } + + outerHitHisto->count(acc, oi - hh.offsetBPIX2()); + cells[ind].init(hh, pairLayerId, inner, outer, i, oi); +#ifdef DOUBLETS_DEBUG + printf("doublet: %d layerPair: %d inner: %d outer: %d i: %d oi: %d\n", ind, pairLayerId, inner, outer, i, oi); #endif } } -// #endif -#ifdef GPU_DEBUG - if (tooMany > 0 or tot > 0) - printf("OuterHitOfCell for %d in layer %d/%d, %d,%d %d, %d %.3f %.3f %s\n", - i, - inner, - outer, - nmin, - tot, - tooMany, - iphicut, - TrackerTraits::minz[pairLayerId], - TrackerTraits::maxz[pairLayerId], - tooMany > 0 ? "FULL!!" : "not full."); -#endif } // loop in block... } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h index 1790582f213e2..7af104d2fe274 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h @@ -7,12 +7,77 @@ namespace caStructures { + using Quality = ::pixelTrack::Quality; + + //Configuration params common to all topologies, for the algorithms + struct AlgoParams { + // Container sizes + float avgHitsPerTrack_; + float avgCellsPerHit_; + float avgCellsPerCell_; + float avgTracksPerCell_; + + // Algorithm Parameters + uint16_t minHitsPerNtuplet_; + uint16_t minHitsForSharingCut_; + float ptmin_; + float hardCurvCut_; + float cellZ0Cut_; + float cellPtCut_; + + // Pixel Cluster Cut Params + float dzdrFact_; // from dz/dr to "DY" + int16_t minYsizeB1_; + int16_t minYsizeB2_; + int16_t maxDYsize12_; + int16_t maxDYsize_; + int16_t maxDYPred_; + + // Flags + bool useRiemannFit_; + bool fitNas4_; + bool earlyFishbone_; + bool lateFishbone_; + bool doStats_; + bool doSharedHitCut_; + bool dupPassThrough_; + bool useSimpleTripletCleaner_; + }; + + // Hits data formats + using HitsView = ::reco::TrackingRecHitView; + using HitModulesConstView = ::reco::HitModuleSoAConstView; + using HitsConstView = ::reco::TrackingRecHitConstView; + + //Tracks data formats + using TkSoAView = ::reco::TrackSoAView; + using TkHitsSoAView = ::reco::TrackHitSoAView; + + //Indices for hits, tracks and cells + using hindex_type = uint32_t; + using tindex_type = uint32_t; + using cindex_type = uint32_t; + + using GenericContainer = cms::alpakatools::OneToManyAssocRandomAccess; + using GenericContainerStorage = typename GenericContainer::index_type; + using GenericContainerOffsets = typename GenericContainer::Counter; + using GenericContainerView = typename GenericContainer::View; + + using SequentialContainer = cms::alpakatools::OneToManyAssocSequential; + using SequentialContainerStorage = typename SequentialContainer::index_type; + using SequentialContainerOffsets = typename SequentialContainer::Counter; + using SequentialContainerView = typename SequentialContainer::View; + + template + using PhiBinnerT = + cms::alpakatools::HistoContainer; + template using CellNeighborsT = cms::alpakatools::VecArray; template - using CellTracksT = cms::alpakatools::VecArray; + using CellTracksT = cms::alpakatools::VecArray; template using CellNeighborsVectorT = cms::alpakatools::SimpleVector>; @@ -24,15 +89,21 @@ namespace caStructures { using OuterHitOfCellContainerT = cms::alpakatools::VecArray; template - using TupleMultiplicityT = cms::alpakatools::OneToManyAssocRandomAccess; + using TupleMultiplicityT = cms::alpakatools:: + OneToManyAssocRandomAccess; + + template + using HitContainerT = + cms::alpakatools::OneToManyAssocSequential; template using HitToTupleT = - cms::alpakatools::OneToManyAssocRandomAccess; // 3.5 should be enough + TrackerTraits::maxNumberOfTuples * + TrackerTraits::avgHitsPerTrack>; // 3.5 should be enough template using TuplesContainerT = cms::alpakatools::OneToManyAssocRandomAccess - void HelixFit::allocate(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results) { - tuples_ = &helix_fit_results.hitIndices(); + void HelixFit::allocate(TupleMultiplicity const *tupleMultiplicity, + OutputSoAView &helix_fit_results, + Tuples const *__restrict__ foundNtuplets) { + tuples_ = foundNtuplets; tupleMultiplicity_ = tupleMultiplicity; outputSoa_ = helix_fit_results; ALPAKA_ASSERT_ACC(tuples_); ALPAKA_ASSERT_ACC(tupleMultiplicity_); + ALPAKA_ASSERT_ACC(helix_fit_results.pt()); } template diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h index f3e75e83106a7..dfd35c8666086 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/HelixFit.h @@ -10,7 +10,7 @@ #include "RecoTracker/PixelTrackFitting/interface/alpaka/FitResult.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" #include "CAStructures.h" @@ -53,34 +53,32 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class HelixFit { public: - using TrackingRecHitSoAs = TrackingRecHitSoA; + using HitView = ::reco::TrackingRecHitView; + using HitConstView = ::reco::TrackingRecHitConstView; + using OutputSoAView = ::reco::TrackSoAView; + using OutputHitSoAView = ::reco::TrackHitSoAView; - using HitView = TrackingRecHitSoAView; - using HitConstView = TrackingRecHitSoAConstView; - - using Tuples = typename reco::TrackSoA::HitContainer; - using OutputSoAView = reco::TrackSoAView; - - using TupleMultiplicity = caStructures::TupleMultiplicityT; - - using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + using Tuples = caStructures::SequentialContainer; + using TupleMultiplicity = caStructures::GenericContainer; explicit HelixFit(float bf, bool fitNas4) : bField_(bf), fitNas4_(fitNas4) {} ~HelixFit() { deallocate(); } void setBField(double bField) { bField_ = bField; } void launchRiemannKernels(const HitConstView &hv, - ParamsOnDevice const *cpeParams, + const ::reco::CAModulesConstView &fr, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue); void launchBrokenLineKernels(const HitConstView &hv, - ParamsOnDevice const *cpeParams, + const ::reco::CAModulesConstView &fr, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue); - void allocate(TupleMultiplicity const *tupleMultiplicity, OutputSoAView &helix_fit_results); + void allocate(TupleMultiplicity const *tupleMultiplicity, + OutputSoAView &helix_fit_results, + Tuples const *__restrict__ foundNtuplets); void deallocate(); private: diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc index 3e6f6e9c8ed98..e1ecf5825b106 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/RiemannFit.dev.cc @@ -6,18 +6,15 @@ #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" -#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoTracker/PixelSeeding/interface/CAGeometrySoA.h" #include "RecoTracker/PixelTrackFitting/interface/alpaka/RiemannFit.h" #include "HelixFit.h" #include "CAStructures.h" -template -using Tuples = typename reco::TrackSoA::HitContainer; -template -using OutputSoAView = reco::TrackSoAView; -template -using TupleMultiplicity = caStructures::TupleMultiplicityT; +using OutputSoAView = reco::TrackSoAView; +using TupleMultiplicity = caStructures::GenericContainer; +using Tuples = caStructures::SequentialContainer; namespace ALPAKA_ACCELERATOR_NAMESPACE { using namespace alpaka; @@ -26,13 +23,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class Kernel_FastFit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - Tuples const *__restrict__ foundNtuplets, - TupleMultiplicity const *__restrict__ tupleMultiplicity, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + Tuples const *__restrict__ foundNtuplets, + TupleMultiplicity const *__restrict__ tupleMultiplicity, uint32_t nHits, - TrackingRecHitSoAConstView hh, - pixelCPEforDevice::ParamsOnDeviceT const *__restrict__ cpeParams, + ::reco::TrackingRecHitConstView hh, + ::reco::CAModulesConstView cm, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit, @@ -74,8 +70,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { for (unsigned int i = 0; i < hitsInFit; ++i) { auto hit = hitId[i]; float ge[6]; - cpeParams->detParams(hh[hit].detectorIndex()).frame.toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); - + cm.detFrame(hh.detectorIndex(hit)).toGlobal(hh[hit].xerrLocal(), 0, hh[hit].yerrLocal(), ge); hits.col(i) << hh[hit].xGlobal(), hh[hit].yGlobal(), hh[hit].zGlobal(); hits_ge.col(i) << ge[0], ge[1], ge[2], ge[3], ge[4], ge[5]; } @@ -95,9 +90,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class Kernel_CircleFit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TupleMultiplicity const *__restrict__ tupleMultiplicity, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, uint32_t nHits, double bField, double *__restrict__ phits, @@ -141,12 +135,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class Kernel_LineFit { public: - template >> - ALPAKA_FN_ACC void operator()(TAcc const &acc, - TupleMultiplicity const *__restrict__ tupleMultiplicity, + ALPAKA_FN_ACC void operator()(Acc1D const &acc, + TupleMultiplicity const *__restrict__ tupleMultiplicity, uint32_t nHits, double bField, - OutputSoAView results_view, + OutputSoAView results_view, double *__restrict__ phits, float *__restrict__ phits_ge, double *__restrict__ pfast_fit_input, @@ -175,13 +168,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { riemannFit::fromCircleToPerigee(acc, circle_fit[local_idx]); - TracksUtilities::copyFromCircle(results_view, - circle_fit[local_idx].par, - circle_fit[local_idx].cov, - line_fit.par, - line_fit.cov, - 1.f / float(bField), - tkid); + reco::copyFromCircle(results_view, + circle_fit[local_idx].par, + circle_fit[local_idx].cov, + line_fit.par, + line_fit.cov, + 1.f / float(bField), + tkid); results_view[tkid].pt() = bField / std::abs(circle_fit[local_idx].par(2)); results_view[tkid].eta() = asinhf(line_fit.par(0)); results_view[tkid].chi2() = (circle_fit[local_idx].chi2 + line_fit.chi2) / (2 * N - 5); @@ -209,8 +202,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { }; template - void HelixFit::launchRiemannKernels(const TrackingRecHitSoAConstView &hv, - pixelCPEforDevice::ParamsOnDeviceT const *cpeParams, + void HelixFit::launchRiemannKernels(const ::reco::TrackingRecHitConstView &hv, + const ::reco::CAModulesConstView &cm, uint32_t nhits, uint32_t maxNumberOfTuples, Queue &queue) { @@ -242,7 +235,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_, 3, hv, - cpeParams, + cm, hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data(), @@ -281,7 +274,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_, 4, hv, - cpeParams, + cm, hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data(), @@ -321,7 +314,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_, 5, hv, - cpeParams, + cm, hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data(), @@ -360,7 +353,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { tupleMultiplicity_, 5, hv, - cpeParams, + cm, hitsDevice.data(), hits_geDevice.data(), fast_fit_resultsDevice.data(), diff --git a/RecoTracker/PixelSeeding/src/ES_CAGeometry.cc b/RecoTracker/PixelSeeding/src/ES_CAGeometry.cc new file mode 100644 index 0000000000000..71a97604ea576 --- /dev/null +++ b/RecoTracker/PixelSeeding/src/ES_CAGeometry.cc @@ -0,0 +1,4 @@ +#include "RecoTracker/PixelSeeding/interface/CAGeometryHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(reco::CAGeometryHost); diff --git a/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp index 9e164d1eb09e1..7b570f3e29ca0 100644 --- a/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp +++ b/RecoTracker/PixelSeeding/test/alpaka/CAsizes_t.cpp @@ -17,7 +17,7 @@ int main() { using namespace caStructures; //for Phase-I - print>(); + print>(); print>(); print>(); print>(); @@ -27,7 +27,7 @@ int main() { print>(); //for Phase-II - print>(); + print>(); print>(); print>(); print>(); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc index 6ccb7789fc098..2bd9204fb977c 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc @@ -16,7 +16,7 @@ template class PixelTrackDumpAlpakaT : public edm::global::EDAnalyzer<> { public: - using TkSoAHost = TracksHost; + using TkSoAHost = reco::TracksHost; using VertexSoAHost = ZVertexHost; explicit PixelTrackDumpAlpakaT(const edm::ParameterSet& iConfig); diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc index 5769c8c53976c..acb780850b638 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc @@ -42,14 +42,13 @@ * objects from the output of SoA CA. */ -//#define GPU_DEBUG +// #define GPU_DEBUG -template class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { - using TrackSoAHost = TracksHost; - using TracksHelpers = TracksUtilities; + using TrackSoAHost = reco::TracksHost; using HMSstorage = std::vector; using IndToEdm = std::vector; + using TrackHitSoA = reco::TrackHitSoA; public: explicit PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig); @@ -73,8 +72,7 @@ class PixelTrackProducerFromSoAAlpaka : public edm::global::EDProducer<> { pixelTrack::Quality const minQuality_; }; -template -PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig) +PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka(const edm::ParameterSet &iConfig) : tBeamSpot_(consumes(iConfig.getParameter("beamSpot"))), tokenTrack_(consumes(iConfig.getParameter("trackSrc"))), cpuHits_(consumes(iConfig.getParameter("pixelRecHitLegacySrc"))), @@ -100,8 +98,7 @@ PixelTrackProducerFromSoAAlpaka::PixelTrackProducerFromSoAAlpaka( produces(); } -template -void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { +void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions &descriptions) { edm::ParameterSetDescription desc; desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); desc.add("trackSrc", edm::InputTag("pixelTracksAlpaka")); @@ -111,10 +108,9 @@ void PixelTrackProducerFromSoAAlpaka::fillDescriptions(edm::Confi descriptions.addWithDefaultLabel(desc); } -template -void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, - edm::Event &iEvent, - const edm::EventSetup &iSetup) const { +void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event &iEvent, + const edm::EventSetup &iSetup) const { // enum class Quality : uint8_t { bad = 0, edup, dup, loose, strict, tight, highPurity }; reco::TrackBase::TrackQuality recoQuality[] = {reco::TrackBase::undefQuality, reco::TrackBase::undefQuality, @@ -141,17 +137,17 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea const auto &bsh = iEvent.get(tBeamSpot_); GlobalPoint bs(bsh.x0(), bsh.y0(), bsh.z0()); - auto const &rechits = iEvent.get(cpuHits_); + auto const &pixelRecHitsDSV = iEvent.get(cpuHits_); std::vector hitmap; - auto const &rcs = rechits.data(); - auto const nhits = rcs.size(); - - hitmap.resize(nhits, nullptr); + auto const &pixelRecHits = pixelRecHitsDSV.data(); + auto const nPixelHits = pixelRecHits.size(); auto const &hitsModuleStart = iEvent.get(hmsToken_); - for (auto const &hit : rcs) { - auto const &thit = static_cast(hit); + hitmap.resize(nPixelHits, nullptr); + + for (auto const &pixelHit : pixelRecHits) { + auto const &thit = static_cast(pixelHit); auto const detI = thit.det()->index(); auto const &clus = thit.firstClusterRef(); assert(clus.isPixel()); @@ -160,15 +156,17 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea hitmap.resize(idx + 256, nullptr); // only in case of hit overflow in one module assert(nullptr == hitmap[idx]); - hitmap[idx] = &hit; + hitmap[idx] = &pixelHit; } std::vector hits; - hits.reserve(5); + hits.reserve(5); //TODO move to a configurable parameter? auto const &tsoa = iEvent.get(tokenTrack_); auto const *quality = tsoa.view().quality(); - auto const &hitIndices = tsoa.view().hitIndices(); + auto const *hitOffs = tsoa.view().hitOffsets(); + auto const *hitIdxs = tsoa.template view().id(); + // auto const &hitIndices = tsoa.view().hitIndices(); auto nTracks = tsoa.view().nTracks(); tracks.reserve(nTracks); @@ -189,7 +187,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea //store the index of the SoA: indToEdm[index_SoAtrack] -> index_edmTrack (if it exists) indToEdm.resize(sortIdxs.size(), -1); for (const auto &it : sortIdxs) { - auto nHits = TracksHelpers::nHits(tsoa.view(), it); + auto nHits = reco::nHits(tsoa.view(), it); assert(nHits >= 3); auto q = quality[it]; @@ -201,9 +199,18 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea ++nt; hits.resize(nHits); - auto b = hitIndices.begin(it); - for (int iHit = 0; iHit < nHits; ++iHit) - hits[iHit] = hitmap[*(b + iHit)]; + auto start = (it == 0) ? 0 : hitOffs[it - 1]; + auto end = hitOffs[it]; + + for (auto iHit = start; iHit < end; ++iHit) + hits[iHit - start] = hitmap[hitIdxs[iHit]]; + +#ifdef CA_DEBUG + std::cout << "track soa : " << it << " with hits: "; + for (auto iHit = start; iHit < end; ++iHit) + std::cout << hitIdxs[iHit] << " - "; + std::cout << std::endl; +#endif // mind: this values are respect the beamspot! float chi2 = tsoa.view()[it].chi2(); @@ -211,7 +218,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea riemannFit::Vector5d ipar, opar; riemannFit::Matrix5d icov, ocov; - TracksHelpers::template copyToDense(tsoa.view(), ipar, icov, it); + reco::copyToDense(tsoa.view(), ipar, icov, it); riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); @@ -254,6 +261,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea // filter??? tracks.emplace_back(track.release(), hits); } + #ifdef GPU_DEBUG std::cout << "processed " << nt << " good tuples " << tracks.size() << " out of " << indToEdm.size() << std::endl; #endif @@ -263,11 +271,14 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID strea iEvent.put(std::move(indToEdmP)); } -using PixelTrackProducerFromSoAAlpakaPhase1 = PixelTrackProducerFromSoAAlpaka; -using PixelTrackProducerFromSoAAlpakaPhase2 = PixelTrackProducerFromSoAAlpaka; -using PixelTrackProducerFromSoAAlpakaHIonPhase1 = PixelTrackProducerFromSoAAlpaka; - #include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpaka); + +// (also) for HLT migration, could be removed once done +using PixelTrackProducerFromSoAAlpakaPhase1 = PixelTrackProducerFromSoAAlpaka; +using PixelTrackProducerFromSoAAlpakaPhase2 = PixelTrackProducerFromSoAAlpaka; +using PixelTrackProducerFromSoAAlpakaHIonPhase1 = PixelTrackProducerFromSoAAlpaka; + DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpakaPhase1); DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpakaPhase2); DEFINE_FWK_MODULE(PixelTrackProducerFromSoAAlpakaHIonPhase1); diff --git a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py index 79b257ab64fa3..63004687dc0dd 100644 --- a/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py +++ b/RecoTracker/PixelTrackFitting/python/PixelTracks_cff.py @@ -103,9 +103,42 @@ from RecoTracker.PixelSeeding.caHitNtupletAlpakaPhase2_cfi import caHitNtupletAlpakaPhase2 as _pixelTracksAlpakaPhase2 from RecoTracker.PixelSeeding.caHitNtupletAlpakaHIonPhase1_cfi import caHitNtupletAlpakaHIonPhase1 as _pixelTracksAlpakaHIonPhase1 -pixelTracksAlpaka = _pixelTracksAlpakaPhase1.clone() +pixelTracksAlpaka = _pixelTracksAlpakaPhase1.clone( + avgHitsPerTrack = 4.6, + avgCellsPerHit = 13, + avgCellsPerCell = 0.0268, + avgTracksPerCell = 0.0123, + maxNumberOfDoublets = str(512*1024), # could be lowered to 315k, keeping the same for a fair comparison with master + maxNumberOfTuples = str(32 * 1024), # this couul be much lower (2.1k, these are quads) +) phase2_tracker.toReplaceWith(pixelTracksAlpaka,_pixelTracksAlpakaPhase2.clone()) -(pp_on_AA & ~phase2_tracker).toReplaceWith(pixelTracksAlpaka, _pixelTracksAlpakaHIonPhase1.clone()) +phase2_tracker.toModify(pixelTracksAlpaka, + maxNumberOfDoublets = str(5*512*1024), + maxNumberOfTuples = str(256 * 1024), + avgHitsPerTrack = 7.0, + avgCellsPerHit = 6, + avgCellsPerCell = 0.151, + avgTracksPerCell = 0.040, + cellPtCut = 0.85, + cellZ0Cut = 7.5, + minYsizeB1 = 25, + minYsizeB2 = 15, + maxDYsize12 = 12, + maxDYsize = 10, + maxDYPred = 20, +) + + +(pp_on_AA & ~phase2_tracker).toModify(pixelTracksAlpaka, + maxNumberOfDoublets = str(6 * 512 *1024), # this could be 2.3M + maxNumberOfTuples = str(256 * 1024), # this could be 4.7 + avgHitsPerTrack = 5.0, + avgCellsPerHit = 40, + avgCellsPerCell = 0.07, # with maxNumberOfDoublets ~= 3.14M; 0.02 for HLT HI on 2024 HI Data + avgTracksPerCell = 0.03, # with maxNumberOfDoublets ~= 3.14M; 0.005 for HLT HI on 2024 HI Data + cellPtCut = 0.5, # setup currenlty used @ HLT (was 0.0) + cellZ0Cut = 8.0, # setup currenlty used @ HLT (was 10.0) +) # pixel tracks SoA producer on the cpu, for validation pixelTracksAlpakaSerial = makeSerialClone(pixelTracksAlpaka, @@ -113,19 +146,9 @@ ) # legacy pixel tracks from SoA -from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase1_cfi import pixelTrackProducerFromSoAAlpakaPhase1 as _pixelTrackProducerFromSoAAlpakaPhase1 -from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaPhase2_cfi import pixelTrackProducerFromSoAAlpakaPhase2 as _pixelTrackProducerFromSoAAlpakaPhase2 -from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpakaHIonPhase1_cfi import pixelTrackProducerFromSoAAlpakaHIonPhase1 as _pixelTrackProducerFromSoAAlpakaHIonPhase1 - -(alpaka & ~phase2_tracker).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpakaPhase1.clone( - pixelRecHitLegacySrc = "siPixelRecHitsPreSplitting", -)) - -(alpaka & phase2_tracker).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpakaPhase2.clone( - pixelRecHitLegacySrc = "siPixelRecHitsPreSplitting", -)) +from RecoTracker.PixelTrackFitting.pixelTrackProducerFromSoAAlpaka_cfi import pixelTrackProducerFromSoAAlpaka as _pixelTrackProducerFromSoAAlpaka -(alpaka & ~phase2_tracker & pp_on_AA).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpakaHIonPhase1.clone( +(alpaka).toReplaceWith(pixelTracks, _pixelTrackProducerFromSoAAlpaka.clone( pixelRecHitLegacySrc = "siPixelRecHitsPreSplitting", )) diff --git a/RecoTracker/Record/src/alpaka/ES_CAGeometrySoACollection.cc b/RecoTracker/Record/src/alpaka/ES_CAGeometrySoACollection.cc new file mode 100644 index 0000000000000..c5a38facda122 --- /dev/null +++ b/RecoTracker/Record/src/alpaka/ES_CAGeometrySoACollection.cc @@ -0,0 +1,4 @@ +#include "RecoTracker/PixelSeeding/interface/alpaka/CAGeometrySoACollection.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(reco::CAGeometrySoACollection); diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc b/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc index 3190c53cc8d01..28f9c5327e544 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc @@ -28,7 +28,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template class PixelVertexProducerAlpaka : public global::EDProducer<> { - using TkSoADevice = TracksSoACollection; + using TkSoADevice = reco::TracksSoACollection; using Algo = vertexFinder::Producer; public: diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 2350f73a76cbb..a9fc2a51808fb 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -29,17 +29,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class LoadTracks { public: ALPAKA_FN_ACC void operator()(Acc1D const& acc, - reco::TrackSoAConstView tracks_view, + ::reco::TrackSoAConstView tracks_view, VtxSoAView data, TrkSoAView trkdata, WsSoAView ws, float ptMin, float ptMax) const { auto const* quality = tracks_view.quality(); - using helper = TracksUtilities; for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) { - [[maybe_unused]] auto nHits = helper::nHits(tracks_view, idx); + [[maybe_unused]] auto nHits = reco::nHits(tracks_view, idx); ALPAKA_ASSERT_ACC(nHits >= 3); // initialize the track data @@ -126,11 +125,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif template - ZVertexSoACollection Producer::makeAsync(Queue& queue, - reco::TrackSoAConstView const& tracks_view, - int maxVertices, - float ptMin, - float ptMax) const { + ZVertexSoACollection Producer::makeAsync( + Queue& queue, ::reco::TrackSoAConstView const& tracks_view, int maxVertices, float ptMin, float ptMax) const { #ifdef PIXVERTEX_DEBUG_PRODUCE std::cout << "producing Vertices on GPU" << std::endl; #endif // PIXVERTEX_DEBUG_PRODUCE diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h index 82745b1de3a30..7d4eedd16db1c 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h @@ -32,7 +32,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { template class Producer { - using TkSoAConstView = reco::TrackSoAConstView; + using TkSoAConstView = ::reco::TrackSoAConstView; public: Producer(bool oneKernel,