diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc index d6fa4b9c87032..75c2808b05790 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareRecHits.cc @@ -111,8 +111,8 @@ void SiPixelCompareRecHits::analyzeSeparate(U tokenRef, V tokenTar, const edm::E const auto& rhsoaRef = *rhsoaHandleRef; const auto& rhsoaTar = *rhsoaHandleTar; - auto const& soa2dRef = rhsoaRef.const_view(); - auto const& soa2dTar = rhsoaTar.const_view(); + auto const& soa2dRef = rhsoaRef.const_view().trackingHits(); + auto const& soa2dTar = rhsoaTar.const_view().trackingHits(); uint32_t nHitsRef = soa2dRef.metadata().size(); uint32_t nHitsTar = soa2dTar.metadata().size(); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc index 5f5480421a9ad..09511789077e7 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareTracks.cc @@ -163,11 +163,14 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Ev auto const& tsoaRef = *tsoaHandleRef; auto const& tsoaTar = *tsoaHandleTar; - auto maxTracksRef = tsoaRef.view().metadata().size(); //this should be same for both? - auto maxTracksTar = tsoaTar.view().metadata().size(); //this should be same for both? + auto refTracks = tsoaRef.view().tracks(); + auto tarTracks = tsoaTar.view().tracks(); - auto const qualityRef = tsoaRef.view().quality(); - auto const qualityTar = tsoaTar.view().quality(); + auto maxTracksRef = refTracks.metadata().size(); //this should be same for both? + auto maxTracksTar = tarTracks.metadata().size(); //this should be same for both? + + auto const qualityRef = refTracks.quality(); + auto const qualityTar = tarTracks.quality(); int32_t nTracksRef = 0; int32_t nTracksTar = 0; @@ -178,9 +181,9 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Ev //Loop over Tar tracks and store the indices of the loose tracks. Whats happens if useQualityCut_ is false? std::vector looseTrkidxTar; for (int32_t jt = 0; jt < maxTracksTar; ++jt) { - if (reco::nHits(tsoaTar.view(), jt) == 0) + if (reco::nHits(tarTracks, jt) == 0) break; // this is a guard - if (!(tsoaTar.view()[jt].pt() > 0.)) + if (!(tarTracks[jt].pt() > 0.)) continue; nTracksTar++; if (useQualityCut_ && qualityTar[jt] < minQuality_) @@ -191,17 +194,17 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Ev //Now loop over Ref tracks//nested loop for loose gPU tracks for (int32_t it = 0; it < maxTracksRef; ++it) { - int nHitsRef = reco::nHits(tsoaRef.view(), it); + int nHitsRef = reco::nHits(refTracks, it); if (nHitsRef == 0) break; // this is a guard - float ptRef = tsoaRef.view()[it].pt(); - float etaRef = tsoaRef.view()[it].eta(); - float phiRef = reco::phi(tsoaRef.view(), it); - float zipRef = reco::zip(tsoaRef.view(), it); - float tipRef = reco::tip(tsoaRef.view(), it); - auto qRef = reco::charge(tsoaRef.view(), it); + float ptRef = refTracks[it].pt(); + float etaRef = refTracks[it].eta(); + float phiRef = reco::phi(refTracks, it); + float zipRef = reco::zip(refTracks, it); + float tipRef = reco::tip(refTracks, it); + auto qRef = reco::charge(refTracks, it); if (!(ptRef > 0.)) continue; @@ -215,8 +218,8 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Ev float mindr2 = dr2cut_; for (auto gid : looseTrkidxTar) { - float etaTar = tsoaTar.view()[gid].eta(); - float phiTar = reco::phi(tsoaTar.view(), gid); + float etaTar = tarTracks[gid].eta(); + float phiTar = reco::phi(tarTracks, gid); float dr2 = reco::deltaR2(etaRef, phiRef, etaTar, phiTar); if (dr2 > dr2cut_) continue; // this is arbitrary @@ -232,25 +235,24 @@ void SiPixelCompareTracks::analyzeSeparate(U tokenRef, V tokenTar, const edm::Ev continue; nLooseAndAboveTracksRef_matchedTar++; - hchi2_->Fill(tsoaRef.view()[it].chi2(), tsoaTar.view()[closestTkidx].chi2()); - hCharge_->Fill(qRef, reco::charge(tsoaTar.view(), closestTkidx)); - hnHits_->Fill(reco::nHits(tsoaRef.view(), it), reco::nHits(tsoaTar.view(), closestTkidx)); - hnLayers_->Fill(tsoaRef.view()[it].nLayers(), tsoaTar.view()[closestTkidx].nLayers()); - hpt_->Fill(ptRef, tsoaTar.view()[closestTkidx].pt()); - hCurvature_->Fill(qRef / ptRef, reco::charge(tsoaTar.view(), closestTkidx) / tsoaTar.view()[closestTkidx].pt()); - hptLogLog_->Fill(ptRef, tsoaTar.view()[closestTkidx].pt()); - heta_->Fill(etaRef, tsoaTar.view()[closestTkidx].eta()); - hphi_->Fill(phiRef, reco::phi(tsoaTar.view(), closestTkidx)); - hz_->Fill(zipRef, reco::zip(tsoaTar.view(), closestTkidx)); - htip_->Fill(tipRef, reco::tip(tsoaTar.view(), closestTkidx)); - hptdiffMatched_->Fill(ptRef - tsoaTar.view()[closestTkidx].pt()); - hCurvdiffMatched_->Fill(qRef / ptRef - - (reco::charge(tsoaTar.view(), closestTkidx) / tsoaTar.view()[closestTkidx].pt())); - hetadiffMatched_->Fill(etaRef - tsoaTar.view()[closestTkidx].eta()); - hphidiffMatched_->Fill(reco::deltaPhi(phiRef, reco::phi(tsoaTar.view(), closestTkidx))); - hzdiffMatched_->Fill(zipRef - reco::zip(tsoaTar.view(), closestTkidx)); - htipdiffMatched_->Fill(tipRef - reco::tip(tsoaTar.view(), closestTkidx)); - hpt_eta_tkAllRefMatched_->Fill(etaRef, tsoaRef.view()[it].pt()); //matched to gpu + hchi2_->Fill(refTracks[it].chi2(), tarTracks[closestTkidx].chi2()); + hCharge_->Fill(qRef, reco::charge(tarTracks, closestTkidx)); + hnHits_->Fill(reco::nHits(refTracks, it), reco::nHits(tarTracks, closestTkidx)); + hnLayers_->Fill(refTracks[it].nLayers(), tarTracks[closestTkidx].nLayers()); + hpt_->Fill(ptRef, tarTracks[closestTkidx].pt()); + hCurvature_->Fill(qRef / ptRef, reco::charge(tarTracks, closestTkidx) / tarTracks[closestTkidx].pt()); + hptLogLog_->Fill(ptRef, tarTracks[closestTkidx].pt()); + heta_->Fill(etaRef, tarTracks[closestTkidx].eta()); + hphi_->Fill(phiRef, reco::phi(tarTracks, closestTkidx)); + hz_->Fill(zipRef, reco::zip(tarTracks, closestTkidx)); + htip_->Fill(tipRef, reco::tip(tarTracks, closestTkidx)); + hptdiffMatched_->Fill(ptRef - tarTracks[closestTkidx].pt()); + hCurvdiffMatched_->Fill(qRef / ptRef - (reco::charge(tarTracks, closestTkidx) / tarTracks[closestTkidx].pt())); + hetadiffMatched_->Fill(etaRef - tarTracks[closestTkidx].eta()); + hphidiffMatched_->Fill(reco::deltaPhi(phiRef, reco::phi(tarTracks, closestTkidx))); + hzdiffMatched_->Fill(zipRef - reco::zip(tarTracks, closestTkidx)); + htipdiffMatched_->Fill(tipRef - reco::tip(tarTracks, closestTkidx)); + hpt_eta_tkAllRefMatched_->Fill(etaRef, refTracks[it].pt()); //matched to gpu hphi_z_tkAllRefMatched_->Fill(etaRef, zipRef); } diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertices.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertices.cc index 792c7878087ae..de05d96e8f2ed 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertices.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelCompareVertices.cc @@ -39,8 +39,8 @@ class SiPixelCompareVertices : public DQMEDAnalyzer { private: // these two are both on Host but originally they have been produced on Host or on Device - const edm::EDGetTokenT tokenSoAVertexReferenceSoA_; - const edm::EDGetTokenT tokenSoAVertexTargetSoA_; + const edm::EDGetTokenT tokenSoAVertexReferenceSoA_; + const edm::EDGetTokenT tokenSoAVertexTargetSoA_; const edm::EDGetTokenT tokenBeamSpot_; const std::string topFolderName_; const float dzCut_; @@ -63,8 +63,9 @@ class SiPixelCompareVertices : public DQMEDAnalyzer { SiPixelCompareVertices::SiPixelCompareVertices(const edm::ParameterSet& iConfig) : tokenSoAVertexReferenceSoA_( - consumes(iConfig.getParameter("pixelVertexReferenceSoA"))), - tokenSoAVertexTargetSoA_(consumes(iConfig.getParameter("pixelVertexTargetSoA"))), + consumes(iConfig.getParameter("pixelVertexReferenceSoA"))), + tokenSoAVertexTargetSoA_( + consumes(iConfig.getParameter("pixelVertexTargetSoA"))), tokenBeamSpot_(consumes(iConfig.getParameter("beamSpotSrc"))), topFolderName_(iConfig.getParameter("topFolderName")), dzCut_(iConfig.getParameter("dzCut")) {} @@ -87,9 +88,9 @@ void SiPixelCompareVertices::analyzeSeparate(U tokenRef, V tokenTar, const edm:: } auto const& vsoaRef = *vsoaHandleRef; - int nVerticesRef = vsoaRef.view().nvFinal(); + int nVerticesRef = vsoaRef.view().zvertex().nvFinal(); auto const& vsoaTar = *vsoaHandleTar; - int nVerticesTar = vsoaTar.view().nvFinal(); + int nVerticesTar = vsoaTar.view().zvertex().nvFinal(); auto bsHandle = iEvent.getHandle(tokenBeamSpot_); float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; @@ -105,22 +106,22 @@ void SiPixelCompareVertices::analyzeSeparate(U tokenRef, V tokenTar, const edm:: } for (int ivc = 0; ivc < nVerticesRef; ivc++) { - auto sic = vsoaRef.view()[ivc].sortInd(); - auto zc = vsoaRef.view()[sic].zv(); + auto sic = vsoaRef.view().zvertex()[ivc].sortInd(); + auto zc = vsoaRef.view().zvertex()[sic].zv(); auto xc = x0 + dxdz * zc; auto yc = y0 + dydz * zc; zc += z0; - auto ndofRef = vsoaRef.template view()[sic].ndof(); - auto chi2Ref = vsoaRef.view()[sic].chi2(); + auto ndofRef = vsoaRef.view().zvertexTracks()[sic].ndof(); + auto chi2Ref = vsoaRef.view().zvertex()[sic].chi2(); const int32_t notFound = -1; int32_t closestVtxidx = notFound; float mindz = dzCut_; for (int ivg = 0; ivg < nVerticesTar; ivg++) { - auto sig = vsoaTar.view()[ivg].sortInd(); - auto zgc = vsoaTar.view()[sig].zv() + z0; + auto sig = vsoaTar.view().zvertex()[ivg].sortInd(); + auto zgc = vsoaTar.view().zvertex()[sig].zv() + z0; auto zDist = std::abs(zc - zgc); //insert some matching condition if (zDist > dzCut_) @@ -133,12 +134,12 @@ void SiPixelCompareVertices::analyzeSeparate(U tokenRef, V tokenTar, const edm:: if (closestVtxidx == notFound) continue; - auto zg = vsoaTar.view()[closestVtxidx].zv(); + auto zg = vsoaTar.view().zvertex()[closestVtxidx].zv(); auto xg = x0 + dxdz * zg; auto yg = y0 + dydz * zg; zg += z0; - auto ndofTar = vsoaTar.template view()[closestVtxidx].ndof(); - auto chi2Tar = vsoaTar.view()[closestVtxidx].chi2(); + auto ndofTar = vsoaTar.view().zvertexTracks()[closestVtxidx].ndof(); + auto chi2Tar = vsoaTar.view().zvertex()[closestVtxidx].chi2(); hx_->Fill(xc - x0, xg - x0); hy_->Fill(yc - y0, yg - y0); @@ -148,7 +149,7 @@ void SiPixelCompareVertices::analyzeSeparate(U tokenRef, V tokenTar, const edm:: hzdiff_->Fill(zc - zg); hchi2_->Fill(chi2Ref, chi2Tar); hchi2oNdof_->Fill(chi2Ref / ndofRef, chi2Tar / ndofTar); - hptv2_->Fill(vsoaRef.view()[sic].ptv2(), vsoaTar.view()[closestVtxidx].ptv2()); + hptv2_->Fill(vsoaRef.view().zvertex()[sic].ptv2(), vsoaTar.view().zvertex()[closestVtxidx].ptv2()); hntrks_->Fill(ndofRef + 1, ndofTar + 1); } hnVertex_->Fill(nVerticesRef, nVerticesTar); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc index df84da66fbd1b..62676ab14e092 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorRecHitsSoAAlpaka.cc @@ -87,7 +87,7 @@ void SiPixelMonitorRecHitsSoAAlpaka::analyze(const edm::Event& iEvent, const edm return; } auto const& rhsoa = *rhsoaHandle; - auto const& soa2d = rhsoa.const_view(); + auto const& soa2d = rhsoa.const_view().trackingHits(); uint32_t nHits_ = soa2d.metadata().size(); hnHits->Fill(nHits_); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc index 877ecb483691e..36bb7e5dbcde7 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorTrackSoAAlpaka.cc @@ -83,17 +83,17 @@ void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm:: } auto const& tsoa = *tsoaHandle.product(); - auto maxTracks = tsoa.view().metadata().size(); - auto const quality = tsoa.view().quality(); + auto maxTracks = tsoa.view().tracks().metadata().size(); + auto const quality = tsoa.view().tracks().quality(); int32_t nTracks = 0; int32_t nLooseAndAboveTracks = 0; for (int32_t it = 0; it < maxTracks; ++it) { - auto nHits = reco::nHits(tsoa.const_view(), it); - auto nLayers = tsoa.view()[it].nLayers(); + auto nHits = reco::nHits(tsoa.const_view().tracks(), it); + auto nLayers = tsoa.view().tracks()[it].nLayers(); if (nHits == 0) break; // this is a guard - float pt = tsoa.view()[it].pt(); + float pt = tsoa.view().tracks()[it].pt(); if (!(pt > 0.)) continue; @@ -106,13 +106,13 @@ void SiPixelMonitorTrackSoAAlpaka::analyze(const edm::Event& iEvent, const edm:: continue; // fill parameters only for quality >= loose - - float chi2 = tsoa.view()[it].chi2(); - float phi = tsoa.view()[it].state()(0); //TODO: put these numbers in enum - float zip = tsoa.view()[it].state()(4); - float eta = tsoa.view()[it].eta(); - float tip = tsoa.view()[it].state()(1); - auto charge = reco::charge(tsoa.view(), it); + auto track = tsoa.view().tracks()[it]; + float chi2 = track.chi2(); + float phi = track.state()(0); //TODO: put these numbers in enum + float zip = track.state()(4); + float eta = track.eta(); + float tip = track.state()(1); + auto charge = reco::charge(tsoa.view().tracks(), it); hchi2->Fill(chi2); hChi2VsPhi->Fill(phi, chi2); diff --git a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc index 7b488553626b8..a22dafb8b87cf 100644 --- a/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc +++ b/DQM/SiPixelHeterogeneous/plugins/SiPixelMonitorVertexSoAAlpaka.cc @@ -34,7 +34,7 @@ class SiPixelMonitorVertexSoAAlpaka : public DQMEDAnalyzer { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: - const edm::EDGetTokenT tokenSoAVertex_; + const edm::EDGetTokenT tokenSoAVertex_; const edm::EDGetTokenT tokenBeamSpot_; std::string topFolderName_; MonitorElement* hnVertex; @@ -52,7 +52,7 @@ class SiPixelMonitorVertexSoAAlpaka : public DQMEDAnalyzer { // SiPixelMonitorVertexSoAAlpaka::SiPixelMonitorVertexSoAAlpaka(const edm::ParameterSet& iConfig) - : tokenSoAVertex_(consumes(iConfig.getParameter("pixelVertexSrc"))), + : tokenSoAVertex_(consumes(iConfig.getParameter("pixelVertexSrc"))), tokenBeamSpot_(consumes(iConfig.getParameter("beamSpotSrc"))), topFolderName_(iConfig.getParameter("topFolderName")) {} @@ -67,8 +67,8 @@ void SiPixelMonitorVertexSoAAlpaka::analyze(const edm::Event& iEvent, const edm: } auto const& vsoa = *vsoaHandle; - auto vtx_view = vsoa.view(); - auto trk_view = vsoa.view(); + auto vtx_view = vsoa.view().zvertex(); + auto trk_view = vsoa.view().zvertexTracks(); int nVertices = vtx_view.nvFinal(); auto bsHandle = iEvent.getHandle(tokenBeamSpot_); float x0 = 0., y0 = 0., z0 = 0., dxdz = 0., dydz = 0.; diff --git a/DataFormats/HGCalDigi/src/classes_def.xml b/DataFormats/HGCalDigi/src/classes_def.xml index c72e17ae60676..a9df2fa71e6f6 100644 --- a/DataFormats/HGCalDigi/src/classes_def.xml +++ b/DataFormats/HGCalDigi/src/classes_def.xml @@ -50,9 +50,6 @@ - - - @@ -74,6 +71,5 @@ - diff --git a/DataFormats/Portable/README.md b/DataFormats/Portable/README.md index 56922621e380d..dda335e6fb068 100644 --- a/DataFormats/Portable/README.md +++ b/DataFormats/Portable/README.md @@ -147,12 +147,13 @@ Modules that implement portable interfaces (_e.g._ producers) should use the gen ## Multi layout collections -Some use cases require multiple sets of columns of different sizes. This is can be achieved in a single -`PortableCollection` using `PortableCollection2`, `PortableCollection3` and so on up to -`PortableCollection5<...>`. The numbered, fixed size wrappers are needed in order to be added to the ROOT dictionary. -Behind the scenes recursive `PortableHostMultiCollection` and -`ALPAKA_ACCELERATOR_NAMESPACE::PortableDeviceMultiCollection` (note the reversed parameter order) provide -the actual class definitions. +Multiple SoA layouts can be concatenated using the `GENERATE_SOA_BLOCKS` macro (see `DataFormats/SoATemplate` for details). +This macro generates a new composite layout that contains other layouts as members +and manages a single contiguous memory buffer large enough to hold all of them. + +A `PortableCollection` can then be templated on this `SoABlocks` layout. +In this case, the `view` and `const_view` methods return composite views that themselves contain views of each sublayout. + ## ROOT dictionary declaration helper scripts @@ -161,17 +162,16 @@ as instructed in `/src/classes_dev.xml` and `/src/alpaka/classes `/src/alpaka/classes_rocm_def.xml`. Two scripts generate the code to be added to the xml files. Both scripts expect the collections to be aliased as in: ``` -using TestDeviceMultiCollection3 = PortableCollection3; +using TestDeviceCollection3 = PortableCollection; ``` -and assume the `TestDeviceMultiCollection3` is used in the `SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES()` macro. -For the host xml, SoA layouts have to be listed and duplicates should be removed manually is multiple -collections share a same layout. The scripts are called as follows: +In the example here the `TestSoABlocks3` template is an SoABlocks layout composed of three layouts. +For the host xml, all SoA layouts have to be listed. The scripts are called as follows: ``` -./DataFormats/Portable/scripts/portableHostCollectionHints portabletest::TestHostMultiCollection3 \ - portabletest::TestSoALayout portabletest::TestSoALayout2 portabletest::TestSoALayout3 +./DataFormats/Portable/scripts/portableHostCollectionHints portabletest::TestHostCollection3 \ + portabletest::TestSoALayout portabletest::TestSoALayout2 portabletest::TestSoALayout3 portabletest::SoABlocks3 ./DataFormats/Portable/scripts/portableDeviceCollectionHints portabletest::TestHostMultiCollection3 ``` -The layouts should not be added as parameters for the device collection. Those script can be use equally with the -single layout collections or multi layout collections. +The layouts should not be added as parameters for the device collection. Those script can be used equally with the +single layout collections or multi layout collections. \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableCollection.h b/DataFormats/Portable/interface/PortableCollection.h index 182d2726c2cbb..6b4b11b34beda 100644 --- a/DataFormats/Portable/interface/PortableCollection.h +++ b/DataFormats/Portable/interface/PortableCollection.h @@ -25,49 +25,29 @@ namespace traits { using CollectionType = PortableHostCollection; }; - // trait for a generic multi-SoA-based product - template - struct PortableMultiCollectionTrait { - using CollectionType = PortableDeviceMultiCollection; - }; - - // specialise for host device - template - struct PortableMultiCollectionTrait { - using CollectionType = PortableHostMultiCollection; - }; - } // namespace traits // type alias for a generic SoA-based product template >> using PortableCollection = typename traits::PortableCollectionTrait::CollectionType; -// type alias for a generic SoA-based product -template -using PortableMultiCollection = typename traits::PortableMultiCollectionTrait::CollectionType; - // define how to copy PortableCollection between host and device namespace cms::alpakatools { template requires alpaka::isDevice struct CopyToHost> { template - requires alpaka::isQueue + requires alpaka::isQueue && (!portablecollection::hasBlocksNumber) static auto copyAsync(TQueue& queue, PortableDeviceCollection const& srcData) { PortableHostCollection dstData(srcData->metadata().size(), queue); alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); return dstData; } - }; - template - requires alpaka::isDevice - struct CopyToHost> { template - requires alpaka::isQueue - static auto copyAsync(TQueue& queue, PortableDeviceMultiCollection const& srcData) { - PortableHostMultiCollection dstData(srcData.sizes(), queue); + requires alpaka::isQueue && portablecollection::hasBlocksNumber + static auto copyAsync(TQueue& queue, PortableDeviceCollection const& srcData) { + PortableHostCollection dstData(queue, srcData->metadata().size()); alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); return dstData; } @@ -76,24 +56,24 @@ namespace cms::alpakatools { template struct CopyToDevice> { template + requires(!portablecollection::hasBlocksNumber) static auto copyAsync(TQueue& queue, PortableHostCollection const& srcData) { using TDevice = typename alpaka::trait::DevType::type; PortableDeviceCollection dstData(srcData->metadata().size(), queue); alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); return dstData; } - }; - template - struct CopyToDevice> { template - static auto copyAsync(TQueue& queue, PortableHostMultiCollection const& srcData) { + requires portablecollection::hasBlocksNumber + static auto copyAsync(TQueue& queue, PortableHostCollection const& srcData) { using TDevice = typename alpaka::trait::DevType::type; - PortableDeviceMultiCollection dstData(srcData.sizes(), queue); + PortableDeviceCollection dstData(queue, srcData->metadata().size()); alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); return dstData; } }; + } // namespace cms::alpakatools #endif // DataFormats_Portable_interface_PortableCollection_h diff --git a/DataFormats/Portable/interface/PortableCollectionCommon.h b/DataFormats/Portable/interface/PortableCollectionCommon.h index 65b7c11baeda3..632e948a57e3e 100644 --- a/DataFormats/Portable/interface/PortableCollectionCommon.h +++ b/DataFormats/Portable/interface/PortableCollectionCommon.h @@ -7,97 +7,6 @@ namespace portablecollection { - // Note: if there are other uses for this, it could be moved to a central place - template - constexpr void constexpr_for(F&& f) { - if constexpr (Start < End) { - f(std::integral_constant()); - constexpr_for(std::forward(f)); - } - } - - template - struct CollectionLeaf { - CollectionLeaf() = default; - CollectionLeaf(std::byte* buffer, int32_t elements) : layout_(buffer, elements), view_(layout_) {} - template - CollectionLeaf(std::byte* buffer, std::array const& sizes) - : layout_(buffer, sizes[Idx]), view_(layout_) { - static_assert(N >= Idx); - } - using Layout = T; - using View = typename Layout::View; - using ConstView = typename Layout::ConstView; - Layout layout_; // - View view_; //! - // Make sure types are not void. - static_assert(not std::is_same::value); - }; - - template - struct CollectionImpl : public CollectionLeaf, public CollectionImpl { - CollectionImpl() = default; - CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} - - template - CollectionImpl(std::byte* buffer, std::array const& sizes) - : CollectionLeaf(buffer, sizes), - CollectionImpl(CollectionLeaf::layout_.metadata().nextByte(), sizes) {} - }; - - template - struct CollectionImpl : public CollectionLeaf { - CollectionImpl() = default; - CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} - - template - CollectionImpl(std::byte* buffer, std::array const& sizes) : CollectionLeaf(buffer, sizes) { - static_assert(N == Idx + 1); - } - }; - - template - struct Collections : public CollectionImpl<0, Args...> {}; - - // return the type at the Idx position in Args... - template - using TypeResolver = typename std::tuple_element>::type; - - // count how many times the type T occurs in Args... - template - inline constexpr std::size_t typeCount = ((std::is_same::value ? 1 : 0) + ... + 0); - - // count the non-void elements of Args... - template - inline constexpr std::size_t membersCount = sizeof...(Args); - - // if the type T occurs in Tuple, TupleTypeIndex has a static member value with the corresponding index; - // otherwise there is no such data member. - template - struct TupleTypeIndex {}; - - template - struct TupleTypeIndex> { - static_assert(typeCount == 0, "the requested type appears more than once among the arguments"); - static constexpr std::size_t value = 0; - }; - - template - struct TupleTypeIndex> { - static_assert(not std::is_same_v); - static_assert(typeCount == 1, "the requested type does not appear among the arguments"); - static constexpr std::size_t value = 1 + TupleTypeIndex>::value; - }; - - // if the type T occurs in Args..., TypeIndex has a static member value with the corresponding index; - // otherwise there is no such data member. - template - using TypeIndex = TupleTypeIndex>; - - // return the index where the type T occurs in Args... - template - inline constexpr std::size_t typeIndex = TypeIndex::value; - // concept to check if a Layout has a static member blocksNumber template concept hasBlocksNumber = requires { L::blocksNumber; }; diff --git a/DataFormats/Portable/interface/PortableDeviceCollection.h b/DataFormats/Portable/interface/PortableDeviceCollection.h index 0ccbb648c748d..6693fd15dc691 100644 --- a/DataFormats/Portable/interface/PortableDeviceCollection.h +++ b/DataFormats/Portable/interface/PortableDeviceCollection.h @@ -135,6 +135,9 @@ class PortableDeviceCollection { _deepCopy<0>(desc_, desc, queue); } + // Either int32_t for normal layouts or std::array for SoABlocks layouts + auto size() const { return layout_.metadata().size(); } + private: // Helper function implementing the recursive deep copy template @@ -155,223 +158,4 @@ class PortableDeviceCollection { View view_; //! }; -// generic SoA-based product in device memory -template -class PortableDeviceMultiCollection { - //static_assert(alpaka::isDevice); - static_assert(not std::is_same_v, - "Use PortableHostCollection instead of PortableDeviceCollection"); - - template - static constexpr std::size_t count_t_ = portablecollection::typeCount; - - template - static constexpr std::size_t index_t_ = portablecollection::typeIndex; - - static constexpr std::size_t members_ = sizeof...(Args) + 1; - -public: - using Buffer = cms::alpakatools::device_buffer; - using ConstBuffer = cms::alpakatools::const_device_buffer; - using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; - - using SizesArray = std::array; - - template - using Layout = portablecollection::TypeResolver; - - //template - //using View = typename Layout::View; - // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args... - template - using View = typename std::tuple_element>::type::View; - - //template - //using ConstView = typename Layout::ConstView; - // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args..." - template - using ConstView = typename std::tuple_element>::type::ConstView; - -private: - template - using Leaf = portablecollection::CollectionLeaf>; - - template - Leaf& get() { - return static_cast&>(impl_); - } - - template - Leaf const& get() const { - return static_cast const&>(impl_); - } - - template - Leaf>& get() { - return static_cast>&>(impl_); - } - - template - Leaf> const& get() const { - return static_cast> const&>(impl_); - } - -public: - PortableDeviceMultiCollection() = delete; - - explicit PortableDeviceMultiCollection(edm::Uninitialized) noexcept {} - - PortableDeviceMultiCollection(int32_t elements, TDev const& device) - : buffer_{cms::alpakatools::make_device_buffer(device, Layout<>::computeDataSize(elements))}, - impl_{buffer_->data(), elements} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); - static_assert(members_ == 1); - } - - template >> - PortableDeviceMultiCollection(int32_t elements, TQueue const& queue) - : buffer_{cms::alpakatools::make_device_buffer(queue, Layout<>::computeDataSize(elements))}, - impl_{buffer_->data(), elements} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); - static_assert(members_ == 1); - } - - static int32_t computeDataSize(const SizesArray& sizes) { - int32_t ret = 0; - portablecollection::constexpr_for<0, members_>( - [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); - return ret; - } - - PortableDeviceMultiCollection(const SizesArray& sizes, TDev const& device) - // allocate device memory - : buffer_{cms::alpakatools::make_device_buffer(device, computeDataSize(sizes))}, - impl_{buffer_->data(), sizes} { - portablecollection::constexpr_for<0, members_>( - [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); - constexpr auto alignment = Layout<0>::alignment; - portablecollection::constexpr_for<1, members_>( - [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); - } - - template >> - PortableDeviceMultiCollection(const SizesArray& sizes, TQueue const& queue) - // allocate device memory asynchronously on the given work queue - : buffer_{cms::alpakatools::make_device_buffer(queue, computeDataSize(sizes))}, - impl_{buffer_->data(), sizes} { - portablecollection::constexpr_for<0, members_>( - [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); - constexpr auto alignment = Layout<0>::alignment; - portablecollection::constexpr_for<1, members_>( - [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); - } - - // non-copyable - PortableDeviceMultiCollection(PortableDeviceMultiCollection const&) = delete; - PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection const&) = delete; - - // movable - PortableDeviceMultiCollection(PortableDeviceMultiCollection&&) = default; - PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection&&) = default; - - // default destructor - ~PortableDeviceMultiCollection() = default; - - // access the View by index - template Idx)>> - View& view() { - return get().view_; - } - - template Idx)>> - ConstView const& view() const { - return get().view_; - } - - template Idx)>> - ConstView const& const_view() const { - return get().view_; - } - - template Idx)>> - View& operator*() { - return get().view_; - } - - template Idx)>> - ConstView const& operator*() const { - return get().view_; - } - - template Idx)>> - View* operator->() { - return &get().view_; - } - - template Idx)>> - ConstView const* operator->() const { - return &get().view_; - } - - // access the View by type - template - typename T::View& view() { - return get().view_; - } - - template - typename T::ConstView const& view() const { - return get().view_; - } - - template - typename T::ConstView const& const_view() const { - return get().view_; - } - - template - typename T::View& operator*() { - return get().view_; - } - - template - typename T::ConstView const& operator*() const { - return get().view_; - } - - template - typename T::View* operator->() { - return &get().view_; - } - - template - typename T::ConstView const* operator->() const { - return &get().view_; - } - - // access the Buffer - Buffer buffer() { return *buffer_; } - ConstBuffer buffer() const { return *buffer_; } - ConstBuffer const_buffer() const { return *buffer_; } - - // erases the data in the Buffer by writing zeros (bytes containing '\0') to it - template >> - void zeroInitialise(TQueue&& queue) { - alpaka::memset(std::forward(queue), *buffer_, 0x00); - } - - // extract the sizes array - SizesArray sizes() const { - SizesArray ret; - portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); - return ret; - } - -private: - std::optional buffer_; //! - Implementation impl_; // (serialized: this is where the layouts live) -}; - #endif // DataFormats_Portable_interface_PortableDeviceCollection_h diff --git a/DataFormats/Portable/interface/PortableHostCollection.h b/DataFormats/Portable/interface/PortableHostCollection.h index 869598e880c59..6f90d1a87dba7 100644 --- a/DataFormats/Portable/interface/PortableHostCollection.h +++ b/DataFormats/Portable/interface/PortableHostCollection.h @@ -143,8 +143,16 @@ class PortableHostCollection { static void ROOTReadStreamer(PortableHostCollection* newObj, Layout& layout) { // destroy the default-constructed collection newObj->~PortableHostCollection(); + // construct in-place a new collection, with the known size, using the global "host" object returned by cms::alpakatools::host() - new (newObj) PortableHostCollection(layout.metadata().size(), cms::alpakatools::host()); + if constexpr (portablecollection::hasBlocksNumber) { + // Version with blocks: (host, size) + new (newObj) PortableHostCollection(cms::alpakatools::host(), layout.metadata().size()); + } else { + // Version without blocks: (size, host) + new (newObj) PortableHostCollection(layout.metadata().size(), cms::alpakatools::host()); + } + // copy the data from the on-file layout to the new collection newObj->layout_.ROOTReadStreamer(layout); // free the memory allocated by ROOT @@ -165,6 +173,9 @@ class PortableHostCollection { _deepCopy<0>(desc_, desc, queue); } + // Either int32_t for normal layouts or std::array for SoABlocks layouts + auto size() const { return layout_.metadata().size(); } + private: // Helper function implementing the recursive deep copy template @@ -185,267 +196,31 @@ class PortableHostCollection { View view_; //! }; -// generic SoA-based product in host memory -template -class PortableHostMultiCollection { - template - static constexpr std::size_t count_t_ = portablecollection::typeCount; - - template - static constexpr std::size_t index_t_ = portablecollection::typeIndex; - - static constexpr std::size_t members_ = portablecollection::membersCount; - -public: - using Buffer = cms::alpakatools::host_buffer; - using ConstBuffer = cms::alpakatools::const_host_buffer; - using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; - - using SizesArray = std::array; - - template Idx)>> - using Layout = portablecollection::TypeResolver; - template Idx)>> - using View = typename Layout::View; - template Idx)>> - using ConstView = typename Layout::ConstView; - -private: - template - using Leaf = portablecollection::CollectionLeaf>; - - template - Leaf& get() { - return static_cast&>(impl_); - } - - template - Leaf const& get() const { - return static_cast const&>(impl_); - } - - template - portablecollection::CollectionLeaf, T>& get() { - return static_cast, T>&>(impl_); - } - - template - const portablecollection::CollectionLeaf, T>& get() const { - return static_cast, T>&>(impl_); - } - - static int32_t computeDataSize(const std::array& sizes) { - int32_t ret = 0; - portablecollection::constexpr_for<0, members_>( - [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); - return ret; - } - -public: - PortableHostMultiCollection() = delete; - - explicit PortableHostMultiCollection(edm::Uninitialized) noexcept {}; - - PortableHostMultiCollection(int32_t elements, alpaka_common::DevHost const& host) - // allocate pageable host memory - : buffer_{cms::alpakatools::make_host_buffer(Layout<>::computeDataSize(elements))}, - impl_{buffer_->data(), elements} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); - static_assert(members_ == 1); - } - - template >> - PortableHostMultiCollection(int32_t elements, TQueue const& queue) - // allocate pinned host memory associated to the given work queue, accessible by the queue's device - : buffer_{cms::alpakatools::make_host_buffer(queue, Layout<>::computeDataSize(elements))}, - impl_{buffer_->data(), elements} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); - static_assert(members_ == 1); - } - - PortableHostMultiCollection(const std::array& sizes, alpaka_common::DevHost const& host) - // allocate pinned host memory associated to the given work queue, accessible by the queue's device - : buffer_{cms::alpakatools::make_host_buffer(computeDataSize(sizes))}, - impl_{buffer_->data(), sizes} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - portablecollection::constexpr_for<0, members_>( - [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); - constexpr auto alignment = Layout<0>::alignment; - portablecollection::constexpr_for<1, members_>( - [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); - } - - template >> - PortableHostMultiCollection(const std::array& sizes, TQueue const& queue) - // allocate pinned host memory associated to the given work queue, accessible by the queue's device - : buffer_{cms::alpakatools::make_host_buffer(queue, computeDataSize(sizes))}, - impl_{buffer_->data(), sizes} { - // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 - portablecollection::constexpr_for<0, members_>( - [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); - constexpr auto alignment = Layout<0>::alignment; - portablecollection::constexpr_for<1, members_>( - [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); - } - - // non-copyable - PortableHostMultiCollection(PortableHostMultiCollection const&) = delete; - PortableHostMultiCollection& operator=(PortableHostMultiCollection const&) = delete; - - // movable - PortableHostMultiCollection(PortableHostMultiCollection&&) = default; - PortableHostMultiCollection& operator=(PortableHostMultiCollection&&) = default; - - // default destructor - ~PortableHostMultiCollection() = default; - - // access the View by index - template Idx)>> - View& view() { - return get().view_; - } - - template Idx)>> - ConstView const& view() const { - return get().view_; - } - - template Idx)>> - ConstView const& const_view() const { - return get().view_; - } - - template Idx)>> - View& operator*() { - return get().view_; - } - - template Idx)>> - ConstView const& operator*() const { - return get().view_; - } - - template Idx)>> - View* operator->() { - return &get().view_; - } - - template Idx)>> - ConstView const* operator->() const { - return &get().view_; - } - - // access the View by type - template - typename T::View& view() { - return get().view_; - } - - template - typename T::ConstView const& view() const { - return get().view_; - } - - template - typename T::ConstView const& const_view() const { - return get().view_; - } - - template - typename T::View& operator*() { - return get().view_; - } - - template - typename T::ConstView const& operator*() const { - return get().view_; - } - - template - typename T::View* operator->() { - return &get().view_; - } - - template - typename T::ConstView const* operator->() const { - return &get().view_; - } - - // access the Buffer - Buffer buffer() { return *buffer_; } - ConstBuffer buffer() const { return *buffer_; } - ConstBuffer const_buffer() const { return *buffer_; } - - // erases the data in the Buffer by writing zeros (bytes containing '\0') to it - void zeroInitialise() { - std::memset(std::data(*buffer_), 0x00, alpaka::getExtentProduct(*buffer_) * sizeof(std::byte)); - } - - template >> - void zeroInitialise(TQueue&& queue) { - alpaka::memset(std::forward(queue), *buffer_, 0x00); - } - - // extract the sizes array - SizesArray sizes() const { - SizesArray ret; - portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); - return ret; - } - - // part of the ROOT read streamer - static void ROOTReadStreamer(PortableHostMultiCollection* newObj, Implementation& onfileImpl) { - newObj->~PortableHostMultiCollection(); - // use the global "host" object returned by cms::alpakatools::host() - std::array sizes; - portablecollection::constexpr_for<0, members_>([&sizes, &onfileImpl](auto i) { - sizes[i] = static_cast const&>(onfileImpl).layout_.metadata().size(); - }); - new (newObj) PortableHostMultiCollection(sizes, cms::alpakatools::host()); - portablecollection::constexpr_for<0, members_>([&newObj, &onfileImpl](auto i) { - static_cast&>(newObj->impl_).layout_.ROOTReadStreamer(static_cast const&>(onfileImpl).layout_); - static_cast&>(onfileImpl).layout_.ROOTStreamerCleaner(); - }); - } - -private: - std::optional buffer_; //! - Implementation impl_; // (serialized: this is where the layouts live) -}; - -// Singleton case does not need to be aliased. A special template covers it. - -// This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. -template -using PortableHostCollection2 = ::PortableHostMultiCollection; - -template -using PortableHostCollection3 = ::PortableHostMultiCollection; - -template -using PortableHostCollection4 = ::PortableHostMultiCollection; - -template -using PortableHostCollection5 = ::PortableHostMultiCollection; - namespace ngt { // Specialize the MemoryCopyTraits for PortableHostColletion template struct MemoryCopyTraits> { using value_type = PortableHostCollection; - using Properties = int32_t; + // For basic Layouts -> T::size_type, for Layouts by blocks -> std::array + using Properties = decltype(std::declval()->metadata().size()); // The properties needed to initialize a new PrortableHostCollection are just its size. static Properties properties(value_type const& object) { return object->metadata().size(); } - static void initialize(value_type& object, Properties const& size) { - // Replace the default-constructed empty object with one where the buffer has been allocated in pageable system memory. + // Replace the default-constructed empty object with one where the buffer has been allocated in pageable system memory. + static void initialize(value_type& object, Properties const& size) + requires(!portablecollection::hasBlocksNumber) + { object = value_type(size, cms::alpakatools::host()); } + static void initialize(value_type& object, Properties const& size) + requires portablecollection::hasBlocksNumber + { + object = value_type(cms::alpakatools::host(), size); + } + static std::vector> regions(value_type& object) { // The whole PortableHostCollection is stored in a single contiguous memory region. std::byte* address = reinterpret_cast(object.buffer().data()); @@ -460,35 +235,6 @@ namespace ngt { return {{address, size}}; } }; - - // Specialize the MemoryCopyTraits for PortableHostMultiCollection - template - struct MemoryCopyTraits> { - using value_type = PortableHostMultiCollection; - using Properties = typename PortableHostMultiCollection::SizesArray; - - // The properties needed to initialize a new PrortableHostMultiCollection are the sizes of all underlying PortableHostCollections. - static Properties properties(PortableHostMultiCollection const& object) { return object.sizes(); } - - static void initialize(PortableHostMultiCollection& object, Properties const& sizes) { - // Replace the default-constructed empty object with one where the buffer has been allocated in pageable system memory. - object = PortableHostMultiCollection(sizes, cms::alpakatools::host()); - } - - static std::vector> regions(PortableHostMultiCollection& object) { - // The whole PortableHostMultiCollection is stored in a single contiguous memory region. - std::byte* address = reinterpret_cast(object.buffer().data()); - size_t size = alpaka::getExtentProduct(object.buffer()); - return {{address, size}}; - } - - static std::vector> regions(PortableHostMultiCollection const& object) { - // The whole PortableHostMultiCollection is stored in a single contiguous memory region. - const std::byte* address = reinterpret_cast(object.buffer().data()); - size_t size = alpaka::getExtentProduct(object.buffer()); - return {{address, size}}; - } - }; } // namespace ngt #endif // DataFormats_Portable_interface_PortableHostCollection_h diff --git a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h index bc6a6e730e238..e207665b757e2 100644 --- a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h +++ b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h @@ -37,35 +37,6 @@ static void readPortableHostCollection_v1(char *target, TVirtualObject *from_buf Collection::ROOTReadStreamer(newObj, onfile.layout_); } -// read function for PortableHostCollection, called for every event -template -static void readPortableHostMultiCollection_v1(char *target, TVirtualObject *from_buffer) { - // extract the actual types - using Collection = T; - using Implementation = typename Collection::Implementation; - - // valid only for PortableHostCollection - //static_assert(std::is_same_v>); - - // proxy for the object being read from file - struct OnFile { - Implementation &impl_; - }; - - // address in memory of the buffer containing the object being read from file - char *address = static_cast(from_buffer->GetObject()); - // offset of the "layout_" data member - static ptrdiff_t impl_offset = from_buffer->GetClass()->GetDataMemberOffset("impl_"); - // reference to the Layout object being read from file - OnFile onfile = {*(Implementation *)(address + impl_offset)}; - - // pointer to the Collection object being constructed in memory - Collection *newObj = (Collection *)target; - - // move the data from the on-file layout to the newly constructed object - Collection::ROOTReadStreamer(newObj, onfile.impl_); -} - // put set_PortableHostCollection_read_rules in the ROOT namespace to let it forward declare GenerateInitInstance namespace ROOT { @@ -96,42 +67,10 @@ namespace ROOT { return true; } - - // set the read rules for PortableHostMultiCollection; - // this is called only once, when the dictionary is loaded. - template - static bool set_PortableHostMultiCollection_read_rules(std::string const &type) { - // forward declaration - TGenericClassInfo *GenerateInitInstance(T const *); - - // build the read rules - std::vector readrules(1); - ROOT::Internal::TSchemaHelper &rule = readrules[0]; - rule.fTarget = "buffer_,impl_"; - rule.fSourceClass = type; - rule.fSource = type + "::Implementation impl_;"; - rule.fCode = type + "::ROOTReadStreamer(newObj, onfile.impl_)"; - rule.fVersion = "[1-]"; - rule.fChecksum = ""; - rule.fInclude = ""; - rule.fEmbed = false; - rule.fFunctionPtr = reinterpret_cast(::readPortableHostMultiCollection_v1); - rule.fAttributes = ""; - - // set the read rules - TGenericClassInfo *instance = GenerateInitInstance((T const *)nullptr); - instance->SetReadRules(readrules); - - return true; - } } // namespace ROOT #define SET_PORTABLEHOSTCOLLECTION_READ_RULES(COLLECTION) \ static bool EDM_CONCATENATE(set_PortableHostCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ ROOT::set_PortableHostCollection_read_rules(EDM_STRINGIZE(COLLECTION)) -#define SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(COLLECTION) \ - static bool EDM_CONCATENATE(set_PortableHostMultiCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ - ROOT::set_PortableHostMultiCollection_read_rules(EDM_STRINGIZE(COLLECTION)) - #endif // DataFormats_Portable_interface_PortableHostCollectionReadRules_h diff --git a/DataFormats/Portable/interface/alpaka/PortableCollection.h b/DataFormats/Portable/interface/alpaka/PortableCollection.h index 8a081ea53e651..1f9fa22e49cd8 100644 --- a/DataFormats/Portable/interface/alpaka/PortableCollection.h +++ b/DataFormats/Portable/interface/alpaka/PortableCollection.h @@ -16,21 +16,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { template using PortableCollection = ::PortableCollection; - // Singleton case does not need to be aliased. A special template covers it. - // - // This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. - template - using PortableCollection2 = ::PortableMultiCollection; - - template - using PortableCollection3 = ::PortableMultiCollection; - - template - using PortableCollection4 = ::PortableMultiCollection; - - template - using PortableCollection5 = ::PortableMultiCollection; - } // namespace ALPAKA_ACCELERATOR_NAMESPACE #endif // DataFormats_Portable_interface_alpaka_PortableCollection_h diff --git a/DataFormats/Portable/scripts/portableDeviceCollectionHints b/DataFormats/Portable/scripts/portableDeviceCollectionHints index 40c3943db3144..109c1d43230ed 100755 --- a/DataFormats/Portable/scripts/portableDeviceCollectionHints +++ b/DataFormats/Portable/scripts/portableDeviceCollectionHints @@ -8,6 +8,7 @@ if len(sys.argv) != 2: collectionName = sys.argv[1] +# TODO: do we need to change something here becuase of the removal of PortableMultiCollection? print("In /src/alpaka/classes_cuda_def.xml (with necessary includes in /src/aplaka/classes_cuda.h):\n") print("") print(" ") print(" "% collectionName) -print(" "% collectionName) -print(" "% collectionName) +print(" "% collectionName) +print(" "% collectionName) +print() +for l in layouts: + print(" "% l) print() -if len(layouts) > 1: - print(" ") - for i in range(0, len(layouts)): - print(" \"/>") - print("\n ") - for i in range(0, len(layouts)): - print(" \"/>" % (i, layouts[i])) - print("") print(" \" splitLevel=\"0\"/>"% collectionName) print("") diff --git a/DataFormats/Portable/test/test_catch2_portableMultiCollectionOnHost.cc b/DataFormats/Portable/test/test_catch2_portableMultiCollectionOnHost.cc deleted file mode 100644 index 88d1cd8e4dfe6..0000000000000 --- a/DataFormats/Portable/test/test_catch2_portableMultiCollectionOnHost.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include "DataFormats/Portable/interface/PortableCollection.h" -#include "DataFormats/Portable/interface/PortableHostCollection.h" -#include "DataFormats/SoATemplate/interface/SoACommon.h" -#include "DataFormats/SoATemplate/interface/SoALayout.h" - -namespace { - GENERATE_SOA_LAYOUT(TestLayout1, SOA_COLUMN(double, x), SOA_COLUMN(int32_t, id)) - GENERATE_SOA_LAYOUT(TestLayout2, SOA_COLUMN(float, y), SOA_COLUMN(int32_t, z)) - - using TestSoA1 = TestLayout1<>; - using TestSoA2 = TestLayout2<>; - - constexpr auto s_tag = "[PortableMultiCollection]"; -} // namespace - -// This test is currently mostly about the code compiling -TEST_CASE("Use of PortableMultiCollection on host code", s_tag) { - std::array const sizes{{10, 5}}; - - PortableMultiCollection coll(sizes, cms::alpakatools::host()); - - SECTION("Tests") { REQUIRE(coll.sizes() == sizes); } - - static_assert(std::is_same_v, - PortableHostMultiCollection>); -} diff --git a/DataFormats/PortableTestObjects/interface/TestHostCollection.h b/DataFormats/PortableTestObjects/interface/TestHostCollection.h index 65150341ad1d2..d8aa4136e1432 100644 --- a/DataFormats/PortableTestObjects/interface/TestHostCollection.h +++ b/DataFormats/PortableTestObjects/interface/TestHostCollection.h @@ -9,9 +9,9 @@ namespace portabletest { // SoA with x, y, z, id fields in host memory using TestHostCollection = PortableHostCollection; - using TestHostMultiCollection2 = PortableHostCollection2; + using TestHostCollection2 = PortableHostCollection; - using TestHostMultiCollection3 = PortableHostCollection3; + using TestHostCollection3 = PortableHostCollection; } // namespace portabletest diff --git a/DataFormats/PortableTestObjects/interface/TestSoA.h b/DataFormats/PortableTestObjects/interface/TestSoA.h index 3e9ba9fa8f888..3e86b6b506fd8 100644 --- a/DataFormats/PortableTestObjects/interface/TestSoA.h +++ b/DataFormats/PortableTestObjects/interface/TestSoA.h @@ -9,6 +9,7 @@ #include "DataFormats/Common/interface/StdArray.h" #include "DataFormats/SoATemplate/interface/SoACommon.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" namespace portabletest { @@ -61,6 +62,16 @@ namespace portabletest { using TestSoA3 = TestSoALayout3<>; + GENERATE_SOA_BLOCKS(SoABlocks2, SOA_BLOCK(first, TestSoALayout), SOA_BLOCK(second, TestSoALayout2)) + + using TestSoABlocks2 = SoABlocks2<>; + + GENERATE_SOA_BLOCKS(SoABlocks3, + SOA_BLOCK(first, TestSoALayout), + SOA_BLOCK(second, TestSoALayout2), + SOA_BLOCK(third, TestSoALayout3)) + + using TestSoABlocks3 = SoABlocks3<>; } // namespace portabletest #endif // DataFormats_PortableTestObjects_interface_TestSoA_h diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h index 1facc29e19fd3..b6c6e5208e879 100644 --- a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h +++ b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h @@ -20,9 +20,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // SoA with x, y, z, id fields in device global memory using TestDeviceCollection = PortableCollection; - using TestDeviceMultiCollection2 = PortableCollection2; + using TestDeviceCollection2 = PortableCollection; - using TestDeviceMultiCollection3 = PortableCollection3; + using TestDeviceCollection3 = PortableCollection; } // namespace portabletest diff --git a/DataFormats/PortableTestObjects/plugins/TrivialSerialisation.cc b/DataFormats/PortableTestObjects/plugins/TrivialSerialisation.cc index 874b21eb8e1a7..97687fda47178 100644 --- a/DataFormats/PortableTestObjects/plugins/TrivialSerialisation.cc +++ b/DataFormats/PortableTestObjects/plugins/TrivialSerialisation.cc @@ -15,6 +15,6 @@ DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::MultiHeadNetHostCollection); DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::ParticleHostCollection); DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::SimpleNetHostCollection); DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostCollection); -DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostMultiCollection2); -DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostMultiCollection3); +DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostCollection2); +DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostCollection3); DEFINE_TRIVIAL_SERIALISER_PLUGIN(portabletest::TestHostObject); diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml index da34d8cc38aff..813e27847ee2b 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml @@ -3,17 +3,17 @@ + + + + + + + + - - - - - - - - diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml index 00a54d403c225..9f66cfbbe5415 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml @@ -3,18 +3,18 @@ + + + + + + + + - - - - - - - - diff --git a/DataFormats/PortableTestObjects/src/classes.cc b/DataFormats/PortableTestObjects/src/classes.cc index 00b618a5b502d..122e158d2485c 100644 --- a/DataFormats/PortableTestObjects/src/classes.cc +++ b/DataFormats/PortableTestObjects/src/classes.cc @@ -11,8 +11,8 @@ #include "DataFormats/PortableTestObjects/interface/MaskHostCollection.h" SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection); -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection2); -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection3); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection2); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection3); SET_PORTABLEHOSTOBJECT_READ_RULES(portabletest::TestHostObject); SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::ParticleHostCollection); diff --git a/DataFormats/PortableTestObjects/src/classes_def.xml b/DataFormats/PortableTestObjects/src/classes_def.xml index e452b6191885d..8d6168cc12600 100644 --- a/DataFormats/PortableTestObjects/src/classes_def.xml +++ b/DataFormats/PortableTestObjects/src/classes_def.xml @@ -4,6 +4,16 @@ + + + + + + + + + + @@ -18,40 +28,8 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + diff --git a/DataFormats/PortableTestObjects/test/TestSoA.cc b/DataFormats/PortableTestObjects/test/TestSoA.cc index 2d3880d4c9bf6..26bc4d14ad90f 100644 --- a/DataFormats/PortableTestObjects/test/TestSoA.cc +++ b/DataFormats/PortableTestObjects/test/TestSoA.cc @@ -10,6 +10,8 @@ int main() { constexpr const int size = 42; + constexpr const int size2 = 21; + constexpr const int size3 = 69; portabletest::TestHostCollection collection(size, cms::alpakatools::host()); const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; @@ -23,5 +25,42 @@ int main() { collection.view()[i] = {0.568, 0.823, 0., i, flags, matrix * i}; } + // Same test but for portabletest::TestHostCollection2 --> collection with 2 SoABlocks + + portabletest::TestHostCollection2 collection2(cms::alpakatools::host(), size, size2); + collection2.zeroInitialise(); + + collection2.view().first().r() = 1.; + collection2.view().second().r2() = 2.; + + for (int i = 0; i < size; ++i) { + collection2.view().first()[i] = {0.568, 0.823, 0., i, flags, matrix * i}; + } + + for (int i = 0; i < size2; ++i) { + collection2.view().second()[i] = {1.568, 1.823, 1., i + 1000, matrix * (i + 1000)}; + } + + // Same test but for portabletest::TestHostCollection3 --> collection with 3 SoABlocks + + portabletest::TestHostCollection3 collection3(cms::alpakatools::host(), size, size2, size3); + collection3.zeroInitialise(); + + collection3.view().first().r() = 1.; + collection3.view().second().r2() = 2.; + collection3.view().third().r3() = 3.; + + for (int i = 0; i < size; ++i) { + collection3.view().first()[i] = {0.568, 0.823, 0., i, flags, matrix * i}; + } + + for (int i = 0; i < size2; ++i) { + collection3.view().second()[i] = {1.568, 1.823, 1., i + 1000, matrix * (i + 1000)}; + } + + for (int i = 0; i < size3; ++i) { + collection3.view().third()[i] = {2.568, 2.823, 2., i + 2000, matrix * (i + 2000)}; + } + return 0; } diff --git a/DataFormats/SoATemplate/interface/SoABlocks.h b/DataFormats/SoATemplate/interface/SoABlocks.h index 236f504014069..2553cc0b6dd90 100644 --- a/DataFormats/SoATemplate/interface/SoABlocks.h +++ b/DataFormats/SoATemplate/interface/SoABlocks.h @@ -5,8 +5,8 @@ * SoA Blocks: collection of SoA layouts (blocks) that can be accessed in a structured way. */ -#include "SoALayout.h" #include "SoACommon.h" +#include "SoALayout.h" /* * Declare accessors for the View of each block @@ -231,7 +231,7 @@ * Call ROOTReadstreamer for each block. */ #define _STREAMER_READ_SOA_BLOCK_DATA_MEMBER_IMPL(VALUE_TYPE, NAME, LAYOUT_NAME) \ - BOOST_PP_CAT(NAME, _).ROOTReadStreamer(onfile); + BOOST_PP_CAT(NAME, _).ROOTReadStreamer(onfile.BOOST_PP_CAT(NAME, _)); #define _STREAMER_READ_SOA_BLOCK_DATA_MEMBER(R, DATA, NAME) \ BOOST_PP_IF(BOOST_PP_GREATER(BOOST_PP_TUPLE_ELEM(0, NAME), _VALUE_TYPE_BLOCK), \ @@ -497,7 +497,8 @@ _ITERATE_ON_ALL_COMMA(_DECLARE_MEMBER_TRIVIAL_CONSTRUCTION_BLOCKS, ~, __VA_ARGS__) {} \ \ /* Constructor relying on user provided storage and array of sizes */ \ - SOA_HOST_ONLY CLASS(std::byte* mem, std::array elements) : sizes_(elements) { \ + SOA_HOST_ONLY CLASS(std::byte* mem, std::array elements) \ + : sizes_(elements) { \ byte_size_type offset = 0; \ size_type index = 0; \ _ITERATE_ON_ALL(_DECLARE_MEMBER_CONSTRUCTION_BLOCKS, ~, __VA_ARGS__) \ diff --git a/DataFormats/TrackSoA/interface/TracksDevice.h b/DataFormats/TrackSoA/interface/TracksDevice.h index 38c61aec7cfe0..8eda36ab266f9 100644 --- a/DataFormats/TrackSoA/interface/TracksDevice.h +++ b/DataFormats/TrackSoA/interface/TracksDevice.h @@ -12,7 +12,7 @@ namespace reco { template - using TracksDevice = PortableDeviceMultiCollection; + using TracksDevice = PortableDeviceCollection; } #endif // DataFormats_Track_TracksDevice_H diff --git a/DataFormats/TrackSoA/interface/TracksHost.h b/DataFormats/TrackSoA/interface/TracksHost.h index e462764bdc7bd..5de4415cc405a 100644 --- a/DataFormats/TrackSoA/interface/TracksHost.h +++ b/DataFormats/TrackSoA/interface/TracksHost.h @@ -5,7 +5,7 @@ #include "DataFormats/TrackSoA/interface/TracksSoA.h" namespace reco { - using TracksHost = PortableHostMultiCollection; + using TracksHost = PortableHostCollection; } #endif // DataFormats_TrackSoA_interface_TracksHost_H diff --git a/DataFormats/TrackSoA/interface/TracksSoA.h b/DataFormats/TrackSoA/interface/TracksSoA.h index 78f363778c36a..b0656a437abe0 100644 --- a/DataFormats/TrackSoA/interface/TracksSoA.h +++ b/DataFormats/TrackSoA/interface/TracksSoA.h @@ -8,6 +8,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/TrackSoA/interface/TrackDefinitions.h" namespace reco { @@ -32,6 +33,8 @@ namespace reco { GENERATE_SOA_LAYOUT(TrackHitsLayout, SOA_COLUMN(uint32_t, id), SOA_COLUMN(uint32_t, detId)) + GENERATE_SOA_BLOCKS(TrackBlocksLayout, SOA_BLOCK(tracks, TrackLayout), SOA_BLOCK(trackHits, TrackHitsLayout)) + using TrackSoA = TrackLayout<>; using TrackSoAView = TrackSoA::View; using TrackSoAConstView = TrackSoA::ConstView; @@ -40,16 +43,22 @@ namespace reco { using TrackHitSoAView = TrackHitSoA::View; using TrackHitSoAConstView = TrackHitSoA::ConstView; + // SoABlocks Layout that combines tracks and associated trackHits + using TrackBlocks = TrackBlocksLayout<>; + using TrackBlocksView = TrackBlocks::View; + using TrackBlocksConstView = TrackBlocks::ConstView; + // All these below were constexpr. Now I get this: // note: non-literal type 'reco::TrackLayout<128, false>::ConstViewTemplateFreeParams<128, false, true, true>::const_element' // cannot be used in a constant expression // TODO: move to use the layer gaps defined in CAGeometry - ALPAKA_FN_HOST_ACC inline int nLayers(const TrackSoAConstView &tracks, - const TrackHitSoAConstView &hits, + ALPAKA_FN_HOST_ACC inline int nLayers(const TrackBlocksConstView &tracksBlocks, uint16_t maxLayers, uint32_t const *__restrict__ layerStarts, int32_t i) { + const TrackSoAConstView &tracks = tracksBlocks.tracks(); + const TrackHitSoAConstView &hits = tracksBlocks.trackHits(); auto start = (i == 0) ? 0 : tracks[i - 1].hitOffsets(); auto end = tracks[i].hitOffsets(); auto hitId = hits[start].id(); diff --git a/DataFormats/TrackSoA/src/classes.cc b/DataFormats/TrackSoA/src/classes.cc index 64d7353c3acae..bffa503a8ce58 100644 --- a/DataFormats/TrackSoA/src/classes.cc +++ b/DataFormats/TrackSoA/src/classes.cc @@ -1,4 +1,4 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" #include "DataFormats/TrackSoA/interface/TracksHost.h" -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(reco::TracksHost); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(reco::TracksHost); diff --git a/DataFormats/TrackSoA/src/classes_def.xml b/DataFormats/TrackSoA/src/classes_def.xml index 0756f11d83910..725d2e50c5679 100644 --- a/DataFormats/TrackSoA/src/classes_def.xml +++ b/DataFormats/TrackSoA/src/classes_def.xml @@ -1,21 +1,12 @@ - + + + + + + - - - - - + - - - - - - - - - - diff --git a/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc b/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc index e98dd6af3dd84..d345a7a4adeb8 100644 --- a/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc +++ b/DataFormats/TrackSoA/test/TestReadHostTrackSoA.cc @@ -32,13 +32,13 @@ namespace edmtest { void TestReadHostTrackSoA::analyze(edm::StreamID, edm::Event const& iEvent, edm::EventSetup const&) const { auto const& tracks = iEvent.get(getToken_); - auto tracksView = tracks.view(); + auto tracksBlocksView = tracks.view(); - assert(tracksView.metadata().size() == int(trackSize_)); - assert(tracksView.nTracks() == int(trackSize_)); + assert(tracksBlocksView.tracks().metadata().size() == int(trackSize_)); + assert(tracksBlocksView.tracks().nTracks() == int(trackSize_)); - for (int i = 0; i < tracksView.metadata().size(); ++i) { - if (tracksView[i].eta() != float(i)) { + for (int i = 0; i < tracksBlocksView.tracks().metadata().size(); ++i) { + if (tracksBlocksView.tracks()[i].eta() != float(i)) { throw cms::Exception("TestReadHostTrackSoA Failure") << "TestReadHostTrackSoA::analyze, entry. i = " << i; } } diff --git a/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc b/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc index fde8acbfa554a..ea435b6d2a9bc 100644 --- a/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc +++ b/DataFormats/TrackSoA/test/TestWriteHostTrackSoA.cc @@ -31,12 +31,13 @@ namespace edmtest { : trackSize_(iPSet.getParameter("trackSize")), putToken_(produces()) {} void TestWriteHostTrackSoA::produce(edm::StreamID, edm::Event& iEvent, edm::EventSetup const&) const { - ::reco::TracksHost tracks({{int(trackSize_), int(4 * trackSize_)}}, cms::alpakatools::host()); - auto tracksView = tracks.view(); + ::reco::TracksHost tracks( + cms::alpakatools::host(), static_cast(trackSize_), static_cast(4 * trackSize_)); + auto tracksBlocksView = tracks.view(); for (unsigned int i = 0; i < trackSize_; ++i) { - tracksView[i].eta() = float(i); + tracksBlocksView.tracks()[i].eta() = float(i); } - tracksView.nTracks() = trackSize_; + tracksBlocksView.tracks().nTracks() = trackSize_; iEvent.emplace(putToken_, std::move(tracks)); } diff --git a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc index 09b3bebeadcfd..946cf2b5b49cb 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrackSoAHeterogeneous_test.cc @@ -53,14 +53,15 @@ int main() { constexpr auto nTracks = 1000; constexpr auto nHits = nTracks * 5; - TracksSoACollection tracks_d({{nTracks, nHits}}, queue); - testTrackSoA::runKernels(tracks_d.view(), queue); + TracksSoACollection tracks_d(queue, nTracks, nHits); + auto tracksView = tracks_d.view().tracks(); + testTrackSoA::runKernels(tracksView, queue); // Instantate tracks on host. This is where the data will be // copied to from device. - ::reco::TracksHost tracks_h({{nTracks, nHits}}, queue); + ::reco::TracksHost tracks_h(queue, nTracks, nHits); - std::cout << "no. of tracks = " << tracks_h.view().metadata().size() << std::endl; + std::cout << "no. of tracks = " << tracks_h.view().tracks().metadata().size() << std::endl; alpaka::memcpy(queue, tracks_h.buffer(), tracks_d.const_buffer()); alpaka::wait(queue); @@ -78,9 +79,10 @@ int main() { << "hitIndices off" << std::endl; for (int i = 0; i < 10; ++i) { - std::cout << tracks_h.view()[i].pt() << "\t" << tracks_h.view()[i].eta() << "\t" << tracks_h.view()[i].chi2() - << "\t" << (int)tracks_h.view()[i].quality() << "\t" << (int)tracks_h.view()[i].nLayers() << "\t" - << tracks_h.view()[i].hitOffsets() << std::endl; + std::cout << tracks_h.view().tracks()[i].pt() << "\t" << tracks_h.view().tracks()[i].eta() << "\t" + << tracks_h.view().tracks()[i].chi2() << "\t" << (int)tracks_h.view().tracks()[i].quality() << "\t" + << (int)tracks_h.view().tracks()[i].nLayers() << "\t" << tracks_h.view().tracks()[i].hitOffsets() + << std::endl; } } } diff --git a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc index 7c3bc1ababfe4..c4c98cf9688a4 100644 --- a/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc +++ b/DataFormats/TrackSoA/test/alpaka/TrajectoryStateSoA_t.cc @@ -39,9 +39,10 @@ int main() { // Inner scope to deallocate memory before destroying the stream. { - TracksSoACollection tracks_d({{1000, 5000}}, queue); + TracksSoACollection tracks_d(queue, 1000, 5000); - test::testTrackSoA(queue, tracks_d.view()); + auto tracksView = tracks_d.view().tracks(); + test::testTrackSoA(queue, tracksView); // Wait for the tests to complete. alpaka::wait(queue); diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h index 96455bf938a87..a1173d37ef66e 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h @@ -18,7 +18,7 @@ namespace reco { template - using HitPortableCollectionDevice = PortableDeviceMultiCollection; + using HitPortableCollectionDevice = PortableDeviceCollection; template class TrackingRecHitDevice : public HitPortableCollectionDevice { @@ -30,7 +30,7 @@ namespace reco { // Constructor which specifies only the SoA size, to be used when copying the results from host to device template explicit TrackingRecHitDevice(TQueue queue, uint32_t nHits, uint32_t nModules) - : HitPortableCollectionDevice({{int(nHits), int(nModules + 1)}}, queue) {} + : HitPortableCollectionDevice(queue, static_cast(nHits), static_cast(nModules + 1)) {} // N.B. why this + 1? Because the HitModulesLayout is holding the // moduleStart vector that is a cumulative sum of all the hits @@ -42,10 +42,11 @@ namespace reco { // Constructor from clusters template explicit TrackingRecHitDevice(TQueue queue, SiPixelClustersDevice const &clusters) - : HitPortableCollectionDevice({{int(clusters.nClusters()), clusters.view().metadata().size()}}, queue), + : HitPortableCollectionDevice( + queue, static_cast(clusters.nClusters()), clusters.view().metadata().size()), offsetBPIX2_{clusters.offsetBPIX2()} { - auto hitsView = this->template view(); - auto modsView = this->template view(); + auto hitsView = this->view().trackingHits(); + auto modsView = this->view().hitModules(); auto nModules = clusters.view().metadata().size(); @@ -59,8 +60,8 @@ namespace reco { alpaka::memcpy(queue, off_d, off_h); } - uint32_t nHits() const { return this->template view().metadata().size(); } - uint32_t nModules() const { return this->template view().metadata().size() - 1; } + uint32_t nHits() const { return static_cast(this->view().trackingHits().metadata().size()); } + uint32_t nModules() const { return static_cast(this->view().hitModules().metadata().size() - 1); } int32_t offsetBPIX2() const { return offsetBPIX2_; } @@ -68,7 +69,7 @@ namespace reco { template void updateFromDevice(TQueue queue) { auto off_h = cms::alpakatools::make_host_view(offsetBPIX2_); - auto off_d = cms::alpakatools::make_device_view(queue, this->template view().offsetBPIX2()); + auto off_d = cms::alpakatools::make_device_view(queue, this->view().trackingHits().offsetBPIX2()); alpaka::memcpy(queue, off_h, off_d); } diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h index cd9998a975a55..6f1c3d20fb0db 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h @@ -18,26 +18,27 @@ namespace reco { - using HitPortableCollectionHost = PortableHostMultiCollection; + using HitPortableCollectionHost = PortableHostCollection; class TrackingRecHitHost : public HitPortableCollectionHost { public: - TrackingRecHitHost(edm::Uninitialized) - : PortableHostMultiCollection{edm::kUninitialized} {} + TrackingRecHitHost(edm::Uninitialized) : PortableHostCollection{edm::kUninitialized} {} // Constructor which specifies only the SoA size, to be used when copying the results from the device to the host // FIXME add an explicit overload for the host case template explicit TrackingRecHitHost(TQueue queue, uint32_t nHits, uint32_t nModules) - : HitPortableCollectionHost({{int(nHits), int(nModules + 1)}}, queue) {} + : HitPortableCollectionHost(queue, static_cast(nHits), static_cast(nModules + 1)) {} // Why this +1? See TrackingRecHitDevice.h constructor for an explanation // Constructor from clusters template explicit TrackingRecHitHost(TQueue queue, SiPixelClustersHost const& clusters) - : HitPortableCollectionHost({{int(clusters.nClusters()), clusters.view().metadata().size()}}, queue) { - auto hitsView = this->template view(); - auto modsView = this->template view(); + : HitPortableCollectionHost(queue, + static_cast(clusters.nClusters()), + static_cast(clusters.view().metadata().size())) { + auto hitsView = view().trackingHits(); + auto modsView = view().hitModules(); auto nModules = clusters.view().metadata().size(); @@ -49,10 +50,10 @@ namespace reco { hitsView.offsetBPIX2() = clusters.offsetBPIX2(); } - uint32_t nHits() const { return this->template view().metadata().size(); } - uint32_t nModules() const { return this->template view().metadata().size() - 1; } + uint32_t nHits() const { return this->view().trackingHits().metadata().size(); } + uint32_t nModules() const { return this->view().hitModules().metadata().size() - 1; } - int32_t offsetBPIX2() const { return this->template view().offsetBPIX2(); } + int32_t offsetBPIX2() const { return this->view().trackingHits().offsetBPIX2(); } // do nothing for a host collection template diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h index cb1574f8a31b3..e3cffcd9f089b 100644 --- a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h @@ -4,6 +4,7 @@ #include #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" @@ -28,6 +29,10 @@ namespace reco { GENERATE_SOA_LAYOUT(HitModulesLayout, SOA_COLUMN(uint32_t, moduleStart)); + GENERATE_SOA_BLOCKS(TrackingBlocksLayout, + SOA_BLOCK(trackingHits, TrackingHitsLayout), + SOA_BLOCK(hitModules, HitModulesLayout)) + // N.B. this layout is not really included by default in the hits SoA // This holds the needed parameters to activate (via ONLY_TRIPLETS_IN_HOLE) the // calculations to check if a triplet points to the disk hole @@ -52,6 +57,10 @@ namespace reco { using HitModuleSoAView = HitModuleSoA::View; using HitModuleSoAConstView = HitModuleSoA::ConstView; + using TrackingBlocksSoA = TrackingBlocksLayout<>; + using TrackingBlocksSoAView = TrackingBlocksSoA::View; + using TrackingBlocksSoAConstView = TrackingBlocksSoA::ConstView; + using AverageGeometrySoA = AverageGeometryLayout<>; using AverageGeometryView = AverageGeometrySoA::View; using AverageGeometryConstView = AverageGeometrySoA::ConstView; diff --git a/DataFormats/TrackingRecHitSoA/src/classes.cc b/DataFormats/TrackingRecHitSoA/src/classes.cc index aeb59999bc6cc..06ad17217cbaf 100644 --- a/DataFormats/TrackingRecHitSoA/src/classes.cc +++ b/DataFormats/TrackingRecHitSoA/src/classes.cc @@ -1,4 +1,4 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" #include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(reco::HitPortableCollectionHost); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(reco::HitPortableCollectionHost); diff --git a/DataFormats/TrackingRecHitSoA/src/classes_def.xml b/DataFormats/TrackingRecHitSoA/src/classes_def.xml index 3384a6e226543..b05698402834d 100644 --- a/DataFormats/TrackingRecHitSoA/src/classes_def.xml +++ b/DataFormats/TrackingRecHitSoA/src/classes_def.xml @@ -1,25 +1,15 @@ + + + - - - - + - - - - - - - - - - diff --git a/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc b/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc index a7c60c28c3464..50a620df6aeb3 100644 --- a/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc +++ b/DataFormats/TrackingRecHitSoA/test/TestReadHostHitSoA.cc @@ -38,7 +38,7 @@ namespace edmtest { void TestReadHostHitSoA::analyze(edm::StreamID, edm::Event const& iEvent, edm::EventSetup const&) const { auto const& hits = iEvent.get(getToken_); - auto hitsView = hits.view(); + auto hitsView = hits.view().trackingHits(); std::cout << "hitsView.metadata().size() = " << hitsView.metadata().size() << std::endl; assert(hitsView.metadata().size() == int(hitSize_)); diff --git a/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc b/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc index fef547acc2a70..34627e9c3ad23 100644 --- a/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc +++ b/DataFormats/TrackingRecHitSoA/test/TestWriteHostHitSoA.cc @@ -37,7 +37,7 @@ namespace edmtest { void TestWriteHostHitSoA::produce(edm::StreamID, edm::Event& iEvent, edm::EventSetup const&) const { HitsOnHost hits(cms::alpakatools::host(), hitSize_, 100); - auto hitsView = hits.view(); + auto hitsView = hits.view().trackingHits(); for (unsigned int i = 0; i < hitSize_; ++i) { hitsView[i].xGlobal() = float(i); } diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc index dc3508979e4dc..2c70c326ba5b0 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc @@ -59,17 +59,18 @@ int main() { TrackingRecHitsSoACollection tkhit(queue, clusters); // exercise the copy of a full column (on device) - auto hitXD = cms::alpakatools::make_device_view(queue, tkhit.view().xLocal().data(), nHits); + auto hitXD = cms::alpakatools::make_device_view(queue, tkhit.view().trackingHits().xLocal().data(), nHits); alpaka::memcpy(queue, hitXD, hitsX); // exercise the memset of a colum (on device) - auto hitYD = cms::alpakatools::make_device_view(queue, tkhit.view().yGlobal().data(), nHits); + auto hitYD = + cms::alpakatools::make_device_view(queue, tkhit.view().trackingHits().yGlobal().data(), nHits); constexpr float constYG = -14.0458; std::vector constYV(nHits, constYG); auto constYGV_v = cms::alpakatools::make_host_view(constYV.data(), nHits); alpaka::memcpy(queue, hitYD, constYGV_v); - testTrackingRecHitSoA::runKernels(tkhit.view(), tkhit.view<::reco::HitModuleSoA>(), queue); + testTrackingRecHitSoA::runKernels(tkhit.view(), queue); tkhit.updateFromDevice(queue); #if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED or defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED @@ -78,7 +79,7 @@ int main() { ::reco::TrackingRecHitHost const& host_collection = tkhit; #else ::reco::TrackingRecHitHost host_collection = - cms::alpakatools::CopyToHost<::reco::TrackingRecHitDevice>::copyAsync(queue, tkhit); + cms::alpakatools::CopyToHost< ::reco::TrackingRecHitDevice >::copyAsync(queue, tkhit); alpaka::wait(queue); #endif @@ -87,7 +88,8 @@ int main() { ::reco::TrackingRecHitHost host_collection_2(cms::alpakatools::host(), nHits, nModules); // exercise the memset of a colum (on host) - auto hitLYH = cms::alpakatools::make_host_view(host_collection_2.view().yLocal().data(), nHits); + auto hitLYH = + cms::alpakatools::make_host_view(host_collection_2.view().trackingHits().yLocal().data(), nHits); constexpr float constYL = -27.0855; std::vector constYLV(nHits, constYL); auto constYL_v = cms::alpakatools::make_host_view(constYLV.data(), nHits); @@ -96,9 +98,9 @@ int main() { // wait for the copy above to complete alpaka::wait(queue_host); - assert(host_collection.view().xLocal()[12] == 24.); - assert(host_collection.view().yGlobal()[int(nHits / 2)] == constYG); - assert(host_collection_2.view().yLocal()[nHits - 1] == constYL); + assert(host_collection.view().trackingHits().xLocal()[12] == 24.); + assert(host_collection.view().trackingHits().yGlobal()[int(nHits / 2)] == constYG); + assert(host_collection_2.view().trackingHits().yLocal()[nHits - 1] == constYL); assert(tkhit.nHits() == nHits); assert(tkhit.offsetBPIX2() == 22); // set in the kernel diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc index 6a37fa05954c0..13ab194e801d9 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -19,46 +19,42 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { namespace testTrackingRecHitSoA { struct TestFillKernel { - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - ::reco::TrackingRecHitView soa, - ::reco::HitModuleSoAView mods) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ::reco::TrackingBlocksSoAView soa) const { const uint32_t i(alpaka::getIdx(acc)[0u]); const uint32_t j(alpaka::getIdx(acc)[0u]); if (cms::alpakatools::once_per_grid(acc)) { - soa.offsetBPIX2() = 22; - soa[10].xLocal() = 1.11; + soa.trackingHits().offsetBPIX2() = 22; + soa.trackingHits()[10].xLocal() = 1.11; } - soa[i].iphi() = i % 10; - mods[j].moduleStart() = j; + soa.trackingHits()[i].iphi() = i % 10; + soa.hitModules()[j].moduleStart() = j; } }; struct ShowKernel { - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - ::reco::TrackingRecHitConstView soa, - ::reco::HitModuleSoAView mods) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ::reco::TrackingBlocksSoAConstView soa) const { if (cms::alpakatools::once_per_grid(acc)) { - printf("offsetBPIX = %d\n", soa.offsetBPIX2()); - printf("nHits = %d\n", soa.metadata().size()); - printf("hitsModuleStart[28] = %d\n", mods[28].moduleStart()); + printf("offsetBPIX = %d\n", soa.trackingHits().offsetBPIX2()); + printf("nHits = %d\n", soa.trackingHits().metadata().size()); + printf("hitsModuleStart[28] = %d\n", soa.hitModules()[28].moduleStart()); } // can be increased to soa.nHits() for debugging - for (uint32_t i : cms::alpakatools::uniform_elements(acc, soa.metadata().size())) { - printf("iPhi %d -> %d\n", i, soa[i].iphi()); - printf("x %d -> %.2f \n", i, soa[i].xLocal()); + for (uint32_t i : cms::alpakatools::uniform_elements(acc, soa.trackingHits().metadata().size())) { + printf("iPhi %d -> %d\n", i, soa.trackingHits()[i].iphi()); + printf("x %d -> %.2f \n", i, soa.trackingHits()[i].xLocal()); } } }; - void runKernels(::reco::TrackingRecHitView& view, ::reco::HitModuleSoAView& mods, Queue& queue) { + void runKernels(::reco::TrackingBlocksSoAView& view, Queue& queue) { uint32_t items = 64; - uint32_t groups = divide_up_by(view.metadata().size(), items); + uint32_t groups = divide_up_by(view.trackingHits().metadata().size(), items); auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, view, mods); - alpaka::exec(queue, workDiv, ShowKernel{}, view, mods); + alpaka::exec(queue, workDiv, TestFillKernel{}, view); + alpaka::exec(queue, workDiv, ShowKernel{}, view); } } // namespace testTrackingRecHitSoA diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h index 646c036043b34..c44e0e0ec6974 100644 --- a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.h @@ -6,7 +6,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackingRecHitSoA { - void runKernels(::reco::TrackingRecHitView& hits, ::reco::HitModuleSoAView& mods, Queue& queue); + void runKernels(::reco::TrackingBlocksSoAView& trackingBlocks, Queue& queue); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testTrackingRecHitSoA diff --git a/DataFormats/VertexSoA/README.md b/DataFormats/VertexSoA/README.md index 36d20478a2508..58a7e3412b778 100644 --- a/DataFormats/VertexSoA/README.md +++ b/DataFormats/VertexSoA/README.md @@ -4,13 +4,14 @@ information about reconstructed pixel vertices in Structure of Array (SoA) format. -The host collection is an instantiation of `PortableHostMultiCollection`, while -the device collection is an instantiation of `PortableDeviceMultiCollection`. -Both collections use two SoA layouts (`ZVertexLayout` and `ZVertexTracksLayout`) -with different number of elements, defined at run-time. -The layouts are defined by the `GENERATE_SOA_LAYOUT` macro in -`DataFormats/VertexSoA/interface/ZVertexSoA.h`. +The `ZVertexBlocks` layout is composed of the layouts `ZVertexLayout` and +`ZVertexTracksLayout` using the `GENERATE_SOA_BLOCKS` macro. +The `PortableHostCollection` and +`PortableDeviceCollection` can be created using a +different number of elements for each sub-layout defined at run-time. +All Layouts are defined by the `GENERATE_SOA_LAYOUT` and `GENERATE_SOA_BLOCKS` +macro in `DataFormats/VertexSoA/interface/ZVertexSoA.h`. ## `ZVertexHost` diff --git a/DataFormats/VertexSoA/interface/ZVertexDevice.h b/DataFormats/VertexSoA/interface/ZVertexDevice.h index 3a8b95e7ca816..fd7d9321be9dd 100644 --- a/DataFormats/VertexSoA/interface/ZVertexDevice.h +++ b/DataFormats/VertexSoA/interface/ZVertexDevice.h @@ -9,7 +9,9 @@ #include "DataFormats/VertexSoA/interface/ZVertexHost.h" #include "DataFormats/Portable/interface/PortableDeviceCollection.h" -template -using ZVertexDevice = PortableDeviceMultiCollection; +namespace reco { + template + using ZVertexDevice = PortableDeviceCollection; +} // namespace reco #endif // DataFormats_VertexSoA_interface_ZVertexDevice_h diff --git a/DataFormats/VertexSoA/interface/ZVertexHost.h b/DataFormats/VertexSoA/interface/ZVertexHost.h index a805495ce5fe0..4135f4330a184 100644 --- a/DataFormats/VertexSoA/interface/ZVertexHost.h +++ b/DataFormats/VertexSoA/interface/ZVertexHost.h @@ -9,6 +9,8 @@ #include "DataFormats/VertexSoA/interface/ZVertexSoA.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -using ZVertexHost = PortableHostCollection2; +namespace reco { + using ZVertexHost = PortableHostCollection; +} // namespace reco #endif // DataFormats_VertexSoA_ZVertexHost_H diff --git a/DataFormats/VertexSoA/interface/ZVertexSoA.h b/DataFormats/VertexSoA/interface/ZVertexSoA.h index f84eeb1e4e812..886f82d6152be 100644 --- a/DataFormats/VertexSoA/interface/ZVertexSoA.h +++ b/DataFormats/VertexSoA/interface/ZVertexSoA.h @@ -6,6 +6,7 @@ #include #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" namespace reco { @@ -23,6 +24,10 @@ namespace reco { SOA_COLUMN(int32_t, ndof)) // vertices number of dof // FIXME: reused as workspace for the number of nearest neighbours + GENERATE_SOA_BLOCKS(ZVertexBlocksLayout, + SOA_BLOCK(zvertex, ZVertexLayout), + SOA_BLOCK(zvertexTracks, ZVertexTracksLayout)) + // Common types for both Host and Device code using ZVertexSoA = ZVertexLayout<>; using ZVertexSoAView = ZVertexSoA::View; @@ -33,6 +38,11 @@ namespace reco { using ZVertexTracksSoAView = ZVertexTracksSoA::View; using ZVertexTracksSoAConstView = ZVertexTracksSoA::ConstView; + // SoABlocks Layout that combines zvertex and associated tracks + using ZVertexBlocks = ZVertexBlocksLayout<>; + using ZVertexBlocksView = ZVertexBlocks::View; + using ZVertexBlocksConstView = ZVertexBlocks::ConstView; + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void init(ZVertexSoAView &vertices) { vertices.nvFinal() = 0; } } // namespace reco diff --git a/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h b/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h index 212256558fc01..53a99fde23728 100644 --- a/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h +++ b/DataFormats/VertexSoA/interface/alpaka/ZVertexSoACollection.h @@ -12,13 +12,15 @@ #include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -namespace ALPAKA_ACCELERATOR_NAMESPACE { +namespace ALPAKA_ACCELERATOR_NAMESPACE::reco { + using ::reco::ZVertexDevice; + using ::reco::ZVertexHost; using ZVertexSoACollection = std::conditional_t, ZVertexHost, ZVertexDevice>; -} // namespace ALPAKA_ACCELERATOR_NAMESPACE +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::reco -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(ZVertexSoACollection, ZVertexHost); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(reco::ZVertexSoACollection, reco::ZVertexHost); #endif // DataFormats_VertexSoA_interface_ZVertexSoACollection_h diff --git a/DataFormats/VertexSoA/plugins/TrivialSerialisation.cc b/DataFormats/VertexSoA/plugins/TrivialSerialisation.cc index 92fee377f783d..1c230dfbfe5ec 100644 --- a/DataFormats/VertexSoA/plugins/TrivialSerialisation.cc +++ b/DataFormats/VertexSoA/plugins/TrivialSerialisation.cc @@ -1,4 +1,4 @@ #include "DataFormats/VertexSoA/interface/ZVertexHost.h" #include "HeterogeneousCore/TrivialSerialisation/interface/SerialiserFactory.h" -DEFINE_TRIVIAL_SERIALISER_PLUGIN(ZVertexHost); +DEFINE_TRIVIAL_SERIALISER_PLUGIN(reco::ZVertexHost); diff --git a/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml index 606937a5bd3e5..fbbc3ea063df6 100644 --- a/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/VertexSoA/src/alpaka/classes_cuda_def.xml @@ -1,6 +1,6 @@ - - - + + + diff --git a/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml index 94deb6fff7d61..b2adcdfe29d06 100644 --- a/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/VertexSoA/src/alpaka/classes_rocm_def.xml @@ -1,6 +1,6 @@ - - - + + + diff --git a/DataFormats/VertexSoA/src/classes.cc b/DataFormats/VertexSoA/src/classes.cc index d333705981ff2..1767c3fe68550 100644 --- a/DataFormats/VertexSoA/src/classes.cc +++ b/DataFormats/VertexSoA/src/classes.cc @@ -1,4 +1,4 @@ #include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" #include "DataFormats/VertexSoA/interface/ZVertexHost.h" -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(ZVertexHost); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(reco::ZVertexHost); diff --git a/DataFormats/VertexSoA/src/classes_def.xml b/DataFormats/VertexSoA/src/classes_def.xml index 1f924cb466f4a..d9311a160cc51 100644 --- a/DataFormats/VertexSoA/src/classes_def.xml +++ b/DataFormats/VertexSoA/src/classes_def.xml @@ -1,20 +1,12 @@ - - + + + + + + - - - - - - - - - - - - - + diff --git a/DataFormats/VertexSoA/test/TestReadHostVertexSoA.cc b/DataFormats/VertexSoA/test/TestReadHostVertexSoA.cc index 2be25a049c6c3..a56af68483bbc 100644 --- a/DataFormats/VertexSoA/test/TestReadHostVertexSoA.cc +++ b/DataFormats/VertexSoA/test/TestReadHostVertexSoA.cc @@ -22,7 +22,7 @@ namespace edmtest { static void fillDescriptions(edm::ConfigurationDescriptions&); private: - edm::EDGetTokenT getToken_; + edm::EDGetTokenT getToken_; }; TestReadHostVertexSoA::TestReadHostVertexSoA(edm::ParameterSet const& iPSet) @@ -32,10 +32,10 @@ namespace edmtest { auto const& ZVertexs = iEvent.get(getToken_); auto ZVertexsView = ZVertexs.view(); - std::cout << "ZVertexsView.metadata().size() " << ZVertexsView.metadata().size() << std::endl; - std::cout << "ZVertexsView.chi2() " << ZVertexsView[10].chi2() << std::endl; - for (int i = 0; i < ZVertexsView.metadata().size(); ++i) { - if (ZVertexsView[i].chi2() != float(i)) { + std::cout << "ZVertexsView.zvertex().metadata().size() " << ZVertexsView.zvertex().metadata().size() << std::endl; + std::cout << "ZVertexsView.zvertex()[10].chi2() " << ZVertexsView.zvertex()[10].chi2() << std::endl; + for (int i = 0; i < ZVertexsView.zvertex().metadata().size(); ++i) { + if (ZVertexsView.zvertex()[i].chi2() != float(i)) { throw cms::Exception("TestReadHostVertexSoA Failure") << "TestReadHostVertexSoA::analyze, entry. i = " << i; } } diff --git a/DataFormats/VertexSoA/test/TestWriteHostVertexSoA.cc b/DataFormats/VertexSoA/test/TestWriteHostVertexSoA.cc index 85847f5ee4a78..c266ab1f2914b 100644 --- a/DataFormats/VertexSoA/test/TestWriteHostVertexSoA.cc +++ b/DataFormats/VertexSoA/test/TestWriteHostVertexSoA.cc @@ -24,17 +24,17 @@ namespace edmtest { private: unsigned int vertexSize_; - edm::EDPutTokenT putToken_; + edm::EDPutTokenT putToken_; }; TestWriteHostVertexSoA::TestWriteHostVertexSoA(edm::ParameterSet const& iPSet) : vertexSize_(iPSet.getParameter("vertexSize")), putToken_(produces()) {} void TestWriteHostVertexSoA::produce(edm::StreamID, edm::Event& iEvent, edm::EventSetup const&) const { - ZVertexHost ZVertexs({{int(vertexSize_), int(4 * vertexSize_)}}, cms::alpakatools::host()); + reco::ZVertexHost ZVertexs(cms::alpakatools::host(), int(vertexSize_), int(4 * vertexSize_)); auto ZVertexsView = ZVertexs.view(); for (unsigned int i = 0; i < vertexSize_; ++i) { - ZVertexsView[i].chi2() = float(i); + ZVertexsView.zvertex()[i].chi2() = float(i); } iEvent.emplace(putToken_, std::move(ZVertexs)); } diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc index bd358d7c45c53..d49005946a732 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.cc @@ -29,6 +29,7 @@ #include "ZVertexSoA_test.h" using namespace ALPAKA_ACCELERATOR_NAMESPACE; +using namespace ALPAKA_ACCELERATOR_NAMESPACE::reco; // Run 3 values, used for testing constexpr uint32_t maxTracks = 32 * 1024; @@ -51,8 +52,8 @@ int main() { { // Instantiate vertices on device. PortableCollection allocates // SoA on device automatically. - ZVertexSoACollection zvertex_d({{maxTracks, maxVertices}}, queue); - testZVertexSoAT::runKernels(zvertex_d.view(), zvertex_d.view(), queue); + ZVertexSoACollection zvertex_d(queue, maxTracks, maxVertices); + testZVertexSoAT::runKernels(zvertex_d.view(), queue); // If the device is actually the host, use the collection as-is. // Otherwise, copy the data from the device to the host. @@ -62,7 +63,7 @@ int main() { ZVertexHost zvertex_h = cms::alpakatools::CopyToHost::copyAsync(queue, zvertex_d); #endif alpaka::wait(queue); - std::cout << zvertex_h.view().metadata().size() << std::endl; + std::cout << zvertex_h.view().zvertex().metadata().size() << std::endl; // Print results std::cout << "idv\t" @@ -74,8 +75,8 @@ int main() { << "sortInd\t" << "nvFinal\n"; - auto vtx_v = zvertex_h.view(); - auto trk_v = zvertex_h.view(); + auto vtx_v = zvertex_h.view().zvertex(); + auto trk_v = zvertex_h.view().zvertexTracks(); for (int i = 0; i < 10; ++i) { auto vi = vtx_v[i]; auto ti = trk_v[i]; diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc index 5b0d496570317..75c00a44d1bd1 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.dev.cc @@ -10,9 +10,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { class TestFillKernel { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - reco::ZVertexSoAView zvertex_view, - reco::ZVertexTracksSoAView ztracks_view) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ::reco::ZVertexBlocksView view) const { + ::reco::ZVertexSoAView zvertex_view = view.zvertex(); + ::reco::ZVertexTracksSoAView ztracks_view = view.zvertexTracks(); + if (cms::alpakatools::once_per_grid(acc)) { zvertex_view.nvFinal() = 420; } @@ -33,9 +34,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { class TestVerifyKernel { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - reco::ZVertexSoAView zvertex_view, - reco::ZVertexTracksSoAView ztracks_view) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, ::reco::ZVertexBlocksView view) const { + ::reco::ZVertexSoAView zvertex_view = view.zvertex(); + ::reco::ZVertexTracksSoAView ztracks_view = view.zvertexTracks(); + if (cms::alpakatools::once_per_grid(acc)) { ALPAKA_ASSERT_ACC(zvertex_view.nvFinal() == 420); } @@ -54,12 +56,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { } }; - void runKernels(reco::ZVertexSoAView zvertex_view, reco::ZVertexTracksSoAView ztracks_view, Queue& queue) { + void runKernels(::reco::ZVertexBlocksView view, Queue& queue) { uint32_t items = 64; - uint32_t groups = cms::alpakatools::divide_up_by(zvertex_view.metadata().size(), items); + uint32_t groups = cms::alpakatools::divide_up_by(view.zvertex().metadata().size(), items); auto workDiv = cms::alpakatools::make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestFillKernel{}, zvertex_view, ztracks_view); - alpaka::exec(queue, workDiv, TestVerifyKernel{}, zvertex_view, ztracks_view); + alpaka::exec(queue, workDiv, TestFillKernel{}, view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, view); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT diff --git a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h index fa54fffa8ce38..a77a8ae8c7d5c 100644 --- a/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h +++ b/DataFormats/VertexSoA/test/alpaka/ZVertexSoA_test.h @@ -6,7 +6,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT { - void runKernels(reco::ZVertexSoAView zvertex_view, reco::ZVertexTracksSoAView ztracks_view, Queue& queue); + void runKernels(::reco::ZVertexBlocksView view, Queue& queue); } // namespace ALPAKA_ACCELERATOR_NAMESPACE::testZVertexSoAT diff --git a/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h b/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h index 5446efb8c83c1..362d61b481423 100644 --- a/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h +++ b/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h @@ -14,7 +14,7 @@ namespace cms::alpakatest { using AlpakaESTestDataCHost = PortableHostCollection; using AlpakaESTestDataDHost = PortableHostCollection; - using AlpakaESTestDataACMultiHost = PortableHostMultiCollection; + using AlpakaESTestDataBlocksACHost = PortableHostCollection; // Template-over-device model template diff --git a/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestSoA.h b/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestSoA.h index e46c80d26d7cf..12cd54bab5247 100644 --- a/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestSoA.h +++ b/HeterogeneousCore/AlpakaTest/interface/AlpakaESTestSoA.h @@ -3,6 +3,7 @@ #include "DataFormats/SoATemplate/interface/SoACommon.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" namespace cms::alpakatest { // PortableCollection-based model @@ -12,11 +13,16 @@ namespace cms::alpakatest { GENERATE_SOA_LAYOUT(AlpakaESTestSoALayoutE, SOA_COLUMN(float, val), SOA_COLUMN(int, ind)) GENERATE_SOA_LAYOUT(AlpakaESTestSoALayoutEData, SOA_COLUMN(float, val2)) + GENERATE_SOA_BLOCKS(AlpakaESTestSoABlocksLayoutAC, + SOA_BLOCK(testSoAA, AlpakaESTestSoALayoutA), + SOA_BLOCK(testSoAC, AlpakaESTestSoALayoutC)) + using AlpakaESTestSoAA = AlpakaESTestSoALayoutA<>; using AlpakaESTestSoAC = AlpakaESTestSoALayoutC<>; using AlpakaESTestSoAD = AlpakaESTestSoALayoutD<>; using AlpakaESTestSoAE = AlpakaESTestSoALayoutE<>; using AlpakaESTestSoAEData = AlpakaESTestSoALayoutEData<>; + using AlpakaESTestSoABlocksAC = AlpakaESTestSoABlocksLayoutAC<>; } // namespace cms::alpakatest #endif diff --git a/HeterogeneousCore/AlpakaTest/interface/alpaka/AlpakaESTestData.h b/HeterogeneousCore/AlpakaTest/interface/alpaka/AlpakaESTestData.h index a7e31f46cdf63..d68413b4bd502 100644 --- a/HeterogeneousCore/AlpakaTest/interface/alpaka/AlpakaESTestData.h +++ b/HeterogeneousCore/AlpakaTest/interface/alpaka/AlpakaESTestData.h @@ -21,9 +21,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { using AlpakaESTestDataEHost = cms::alpakatest::AlpakaESTestDataEHost; using AlpakaESTestDataEDevice = cms::alpakatest::AlpakaESTestDataE; - using AlpakaESTestDataACMultiHost = cms::alpakatest::AlpakaESTestDataACMultiHost; - using AlpakaESTestDataACMultiDevice = - PortableMultiCollection; + using AlpakaESTestDataBlocksACHost = cms::alpakatest::AlpakaESTestDataBlocksACHost; + using AlpakaESTestDataACBlocksDevice = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE @@ -32,6 +31,6 @@ ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataADevice, cms::alpakatest:: ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataCDevice, cms::alpakatest::AlpakaESTestDataCHost); ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataDDevice, cms::alpakatest::AlpakaESTestDataDHost); ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataEDevice, cms::alpakatest::AlpakaESTestDataEHost); -ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataACMultiDevice, ::cms::alpakatest::AlpakaESTestDataACMultiHost); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(AlpakaESTestDataACBlocksDevice, ::cms::alpakatest::AlpakaESTestDataBlocksACHost); #endif // HeterogeneousCore_AlpakaTest_interface_alpaka_AlpakaESTestData_h diff --git a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc index 2a5437bf30ce6..a3fbfe0a6308b 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc @@ -131,9 +131,8 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { TestAlpakaAnalyzer(edm::ParameterSet const& config) : source_{config.getParameter("source")}, token_{consumes(source_)}, - //tokenMulti_{consumes(source_)}, - tokenMulti2_{consumes(source_)}, - tokenMulti3_{consumes(source_)}, + tokenBlocks2_{consumes(source_)}, + tokenBlocks3_{consumes(source_)}, expectSize_{config.getParameter("expectSize")}, expectXvalues_{config.getParameter>("expectXvalues")} { if (std::string const& eb = config.getParameter("expectBackend"); not eb.empty()) { @@ -213,21 +212,13 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { } } - // portabletest::TestHostMultiCollection const& productMulti = event.get(tokenMulti_); - // auto const& viewMulti0 = productMulti.const_view<0>(); - // auto& mviewMulti0 = productMulti.view<0>(); - // auto const& cmviewMulti0 = productMulti.view<0>(); - // auto const& viewMulti1 = productMulti.const_view<1>(); - // auto& mviewMulti1 = productMulti.view<1>(); - // auto const& cmviewMulti1 = productMulti.view<1>(); - - portabletest::TestHostMultiCollection2 const& productMulti2 = event.get(tokenMulti2_); - auto const& viewMulti2_0 = productMulti2.const_view<0>(); - auto& mviewMulti2_0 = productMulti2.view<0>(); - auto const& cmviewMulti2_0 = productMulti2.view<0>(); - auto const& viewMulti2_1 = productMulti2.const_view<1>(); - auto& mviewMulti2_1 = productMulti2.view<1>(); - auto const& cmviewMulti2_1 = productMulti2.view<1>(); + portabletest::TestHostCollection2 const& productMulti2 = event.get(tokenBlocks2_); + auto const& viewMulti2_0 = productMulti2.const_view().first(); + auto& mviewMulti2_0 = productMulti2.view().first(); + auto const& cmviewMulti2_0 = productMulti2.view().first(); + auto const& viewMulti2_1 = productMulti2.const_view().second(); + auto& mviewMulti2_1 = productMulti2.view().second(); + auto const& cmviewMulti2_1 = productMulti2.view().second(); checkViewAddresses(viewMulti2_0); checkViewAddresses(mviewMulti2_0); @@ -265,16 +256,16 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { assert(vi.m2() == matrix * i); } - portabletest::TestHostMultiCollection3 const& productMulti3 = event.get(tokenMulti3_); - auto const& viewMulti3_0 = productMulti3.const_view<0>(); - auto& mviewMulti3_0 = productMulti3.view<0>(); - auto const& cmviewMulti3_0 = productMulti3.view<0>(); - auto const& viewMulti3_1 = productMulti3.const_view<1>(); - auto& mviewMulti3_1 = productMulti3.view<1>(); - auto const& cmviewMulti3_1 = productMulti3.view<1>(); - auto const& viewMulti3_2 = productMulti3.const_view<2>(); - auto& mviewMulti3_2 = productMulti3.view<2>(); - auto const& cmviewMulti3_2 = productMulti3.view<2>(); + portabletest::TestHostCollection3 const& productMulti3 = event.get(tokenBlocks3_); + auto const& viewMulti3_0 = productMulti3.const_view().first(); + auto& mviewMulti3_0 = productMulti3.view().first(); + auto const& cmviewMulti3_0 = productMulti3.view().first(); + auto const& viewMulti3_1 = productMulti3.const_view().second(); + auto& mviewMulti3_1 = productMulti3.view().second(); + auto const& cmviewMulti3_1 = productMulti3.view().second(); + auto const& viewMulti3_2 = productMulti3.const_view().third(); + auto& mviewMulti3_2 = productMulti3.view().third(); + auto const& cmviewMulti3_2 = productMulti3.view().third(); checkViewAddresses(viewMulti3_0); checkViewAddresses(mviewMulti3_0); @@ -348,8 +339,8 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { edm::EDGetTokenT backendToken_; std::optional expectBackend_; //const edm::EDGetTokenT tokenMulti_; - const edm::EDGetTokenT tokenMulti2_; - const edm::EDGetTokenT tokenMulti3_; + const edm::EDGetTokenT tokenBlocks2_; + const edm::EDGetTokenT tokenBlocks3_; const int expectSize_; const std::vector expectXvalues_; }; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc index d92473e485c98..9873182fd0432 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc @@ -17,9 +17,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoKernel { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - portabletest::TestDeviceCollection::View view, - double xvalue) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestSoA::View view, double xvalue) const { const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; const portabletest::Array flags = {{6, 4, 2, 0}}; @@ -37,9 +35,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoMultiKernel2 { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - portabletest::TestDeviceMultiCollection2::View<1> view, - double xvalue) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestSoA2::View view, double xvalue) const { const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; // set this only once in the whole kernel grid @@ -56,9 +52,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoMultiKernel3 { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, - portabletest::TestDeviceMultiCollection3::View<2> view, - double xvalue) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestSoA3::View view, double xvalue) const { const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; // set this only once in the whole kernel grid @@ -88,13 +82,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); } - void TestAlgo::fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue) const { + void TestAlgo::fillMulti2(Queue& queue, portabletest::TestDeviceCollection2& collection, double xvalue) const { // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - uint32_t groups = divide_up_by(collection->metadata().size(), items); - uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items); + uint32_t groups = divide_up_by(collection.view().first().metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view().second().metadata().size(), items); // map items to // - threads with a single element per thread on a GPU backend @@ -102,8 +96,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto workDiv = make_workdiv(groups, items); auto workDiv2 = make_workdiv(groups2, items); - alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); - alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); + auto view1 = collection.view().first(); + auto view2 = collection.view().second(); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, view1, xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, view2, xvalue); } class TestAlgoStructKernel { @@ -132,14 +129,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id); } - void TestAlgo::fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue) const { + void TestAlgo::fillMulti3(Queue& queue, portabletest::TestDeviceCollection3& collection, double xvalue) const { // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - uint32_t groups = divide_up_by(collection.view().metadata().size(), items); - uint32_t groups2 = divide_up_by(collection.view().metadata().size(), items); - uint32_t groups3 = divide_up_by(collection.view().metadata().size(), items); + uint32_t groups = divide_up_by(collection.view().first().metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view().second().metadata().size(), items); + uint32_t groups3 = divide_up_by(collection.view().third().metadata().size(), items); // map items to // - threads with a single element per thread on a GPU backend @@ -148,9 +145,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto workDiv2 = make_workdiv(groups2, items); auto workDiv3 = make_workdiv(groups3, items); - alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); - alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); - alpaka::exec(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view(), xvalue); + auto view1 = collection.view().first(); + auto view2 = collection.view().second(); + auto view3 = collection.view().third(); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, view1, xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, view2, xvalue); + alpaka::exec(queue, workDiv3, TestAlgoMultiKernel3{}, view3, xvalue); } class TestAlgoKernelUpdate { @@ -331,13 +332,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection TestAlgo::update(Queue& queue, portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const { - portabletest::TestDeviceCollection collection{input->metadata().size(), queue}; + portabletest::TestDeviceCollection collection{input.size(), queue}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - uint32_t groups = divide_up_by(collection->metadata().size(), items); + uint32_t groups = divide_up_by(collection.size(), items); // map items to // - threads with a single element per thread on a GPU backend @@ -349,16 +350,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return collection; } - portabletest::TestDeviceMultiCollection2 TestAlgo::updateMulti2(Queue& queue, - portabletest::TestDeviceMultiCollection2 const& input, - AlpakaESTestDataEDevice const& esData) const { - portabletest::TestDeviceMultiCollection2 collection{input.sizes(), queue}; + portabletest::TestDeviceCollection2 TestAlgo::updateMulti2(Queue& queue, + portabletest::TestDeviceCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceCollection2 collection{queue, input.size()}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - auto sizes = collection.sizes(); + auto sizes = collection.size(); uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); // map items to @@ -366,28 +367,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, - workDiv, - TestAlgoKernelUpdateMulti2{}, - input.view(), - input.view(), - esData.view(), - collection.view(), - collection.view()); + auto inputView1 = input.view().first(); + auto inputView2 = input.view().second(); + auto outputView1 = collection.view().first(); + auto outputView2 = collection.view().second(); + alpaka::exec( + queue, workDiv, TestAlgoKernelUpdateMulti2{}, inputView1, inputView2, esData.view(), outputView1, outputView2); return collection; } - portabletest::TestDeviceMultiCollection3 TestAlgo::updateMulti3(Queue& queue, - portabletest::TestDeviceMultiCollection3 const& input, - AlpakaESTestDataEDevice const& esData) const { - portabletest::TestDeviceMultiCollection3 collection{input.sizes(), queue}; + portabletest::TestDeviceCollection3 TestAlgo::updateMulti3(Queue& queue, + portabletest::TestDeviceCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceCollection3 collection{queue, input.size()}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - auto sizes = collection.sizes(); + auto sizes = collection.size(); uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); // map items to @@ -395,16 +394,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); + auto inputView1 = input.view().first(); + auto inputView2 = input.view().second(); + auto inputView3 = input.view().third(); + auto outputView1 = collection.view().first(); + auto outputView2 = collection.view().second(); + auto outputView3 = collection.view().third(); + alpaka::exec(queue, workDiv, TestAlgoKernelUpdateMulti3{}, - input.view(), - input.view(), - input.view(), + inputView1, + inputView2, + inputView3, esData.view(), - collection.view(), - collection.view(), - collection.view()); + outputView1, + outputView2, + outputView3); return collection; } @@ -412,13 +418,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection TestAlgo::update(Queue& queue, portabletest::TestDeviceCollection const& input, UpdateInfo const* d_updateInfo) const { - portabletest::TestDeviceCollection collection{input->metadata().size(), queue}; + portabletest::TestDeviceCollection collection{input.size(), queue}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - uint32_t groups = divide_up_by(collection->metadata().size(), items); + uint32_t groups = divide_up_by(collection.size(), items); // map items to // - threads with a single element per thread on a GPU backend @@ -430,16 +436,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return collection; } - portabletest::TestDeviceMultiCollection2 TestAlgo::updateMulti2(Queue& queue, - portabletest::TestDeviceMultiCollection2 const& input, - UpdateInfo const* d_updateInfo) const { - portabletest::TestDeviceMultiCollection2 collection{input.sizes(), queue}; + portabletest::TestDeviceCollection2 TestAlgo::updateMulti2(Queue& queue, + portabletest::TestDeviceCollection2 const& input, + UpdateInfo const* d_updateInfo) const { + portabletest::TestDeviceCollection2 collection{queue, input.size()}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - auto sizes = collection.sizes(); + auto sizes = collection.size(); uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); // map items to @@ -447,28 +453,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, - workDiv, - TestAlgoKernelUpdateMulti2{}, - input.view(), - input.view(), - d_updateInfo, - collection.view(), - collection.view()); + auto inputView1 = input.view().first(); + auto inputView2 = input.view().second(); + auto outputView1 = collection.view().first(); + auto outputView2 = collection.view().second(); + alpaka::exec( + queue, workDiv, TestAlgoKernelUpdateMulti2{}, inputView1, inputView2, d_updateInfo, outputView1, outputView2); return collection; } - portabletest::TestDeviceMultiCollection3 TestAlgo::updateMulti3(Queue& queue, - portabletest::TestDeviceMultiCollection3 const& input, - UpdateInfo const* d_updateInfo) const { - portabletest::TestDeviceMultiCollection3 collection{input.sizes(), queue}; + portabletest::TestDeviceCollection3 TestAlgo::updateMulti3(Queue& queue, + portabletest::TestDeviceCollection3 const& input, + UpdateInfo const* d_updateInfo) const { + portabletest::TestDeviceCollection3 collection{queue, input.size()}; // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; // use as many groups as needed to cover the whole problem - auto sizes = collection.sizes(); + auto sizes = collection.size(); uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); // map items to @@ -476,16 +480,23 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); + auto inputView1 = input.view().first(); + auto inputView2 = input.view().second(); + auto inputView3 = input.view().third(); + auto outputView1 = collection.view().first(); + auto outputView2 = collection.view().second(); + auto outputView3 = collection.view().third(); + alpaka::exec(queue, workDiv, TestAlgoKernelUpdateMulti3{}, - input.view(), - input.view(), - input.view(), + inputView1, + inputView2, + inputView3, d_updateInfo, - collection.view(), - collection.view(), - collection.view()); + outputView1, + outputView2, + outputView3); return collection; } @@ -516,7 +527,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestZeroMultiCollectionKernel2 { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestDeviceMultiCollection2::ConstView<1> view) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestSoA2::ConstView view) const { const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}}; // check this only once in the whole kernel grid @@ -538,7 +549,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestZeroMultiCollectionKernel3 { public: - ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestDeviceMultiCollection3::ConstView<2> view) const { + ALPAKA_FN_ACC void operator()(Acc1D const& acc, portabletest::TestSoA3::ConstView view) const { const portabletest::Matrix matrix{{0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}}; // check this only once in the whole kernel grid @@ -583,31 +594,35 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // Check that the collection has been filled with zeroes. - void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const { + void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceCollection2 const& collection) const { // create a work division with a single block and // - 32 threads with a single element per thread on a GPU backend // - 32 elements within a single thread on a CPU backend auto workDiv = make_workdiv(1, 32); + auto constView1 = collection.const_view().first(); + auto constView2 = collection.const_view().second(); + // the kernels will make a strided loop over the launch grid to cover all elements in the collection - alpaka::exec(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view()); - alpaka::exec( - queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view()); + alpaka::exec(queue, workDiv, TestZeroCollectionKernel{}, constView1); + alpaka::exec(queue, workDiv, TestZeroMultiCollectionKernel2{}, constView2); } // Check that the collection has been filled with zeroes. - void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const { + void TestAlgo::checkZero(Queue& queue, portabletest::TestDeviceCollection3 const& collection) const { // create a work division with a single block and // - 32 threads with a single element per thread on a GPU backend // - 32 elements within a single thread on a CPU backend auto workDiv = make_workdiv(1, 32); + auto constView1 = collection.const_view().first(); + auto constView2 = collection.const_view().second(); + auto constView3 = collection.const_view().third(); + // the kernels will make a strided loop over the launch grid to cover all elements in the collection - alpaka::exec(queue, workDiv, TestZeroCollectionKernel{}, collection.const_view()); - alpaka::exec( - queue, workDiv, TestZeroMultiCollectionKernel2{}, collection.const_view()); - alpaka::exec( - queue, workDiv, TestZeroMultiCollectionKernel3{}, collection.const_view()); + alpaka::exec(queue, workDiv, TestZeroCollectionKernel{}, constView1); + alpaka::exec(queue, workDiv, TestZeroMultiCollectionKernel2{}, constView2); + alpaka::exec(queue, workDiv, TestZeroMultiCollectionKernel3{}, constView3); } // Check that the object has been filled with zeroes. diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h index dbebf60e898b5..bd59266d86d3e 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h @@ -18,12 +18,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const; - portabletest::TestDeviceMultiCollection2 updateMulti2(Queue& queue, - portabletest::TestDeviceMultiCollection2 const& input, - AlpakaESTestDataEDevice const& esData) const; - portabletest::TestDeviceMultiCollection3 updateMulti3(Queue& queue, - portabletest::TestDeviceMultiCollection3 const& input, - AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceCollection2 updateMulti2(Queue& queue, + portabletest::TestDeviceCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceCollection3 updateMulti3(Queue& queue, + portabletest::TestDeviceCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const; struct UpdateInfo { int x, y, z; @@ -31,19 +31,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection update(Queue& queue, portabletest::TestDeviceCollection const& input, UpdateInfo const* d_updateInfo) const; - portabletest::TestDeviceMultiCollection2 updateMulti2(Queue& queue, - portabletest::TestDeviceMultiCollection2 const& input, - UpdateInfo const* d_updateInfo) const; - portabletest::TestDeviceMultiCollection3 updateMulti3(Queue& queue, - portabletest::TestDeviceMultiCollection3 const& input, - UpdateInfo const* d_updateInfo) const; + portabletest::TestDeviceCollection2 updateMulti2(Queue& queue, + portabletest::TestDeviceCollection2 const& input, + UpdateInfo const* d_updateInfo) const; + portabletest::TestDeviceCollection3 updateMulti3(Queue& queue, + portabletest::TestDeviceCollection3 const& input, + UpdateInfo const* d_updateInfo) const; - void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const; - void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const; + void fillMulti2(Queue& queue, portabletest::TestDeviceCollection2& collection, double xvalue = 0.) const; + void fillMulti3(Queue& queue, portabletest::TestDeviceCollection3& collection, double xvalue = 0.) const; void checkZero(Queue& queue, portabletest::TestDeviceCollection const& collection) const; - void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection2 const& collection) const; - void checkZero(Queue& queue, portabletest::TestDeviceMultiCollection3 const& collection) const; + void checkZero(Queue& queue, portabletest::TestDeviceCollection2 const& collection) const; + void checkZero(Queue& queue, portabletest::TestDeviceCollection3 const& collection) const; void checkZero(Queue& queue, portabletest::TestDeviceObject const& object) const; }; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerAMulti.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerBlocks.cc similarity index 75% rename from HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerAMulti.cc rename to HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerBlocks.cc index 38c5dadae55c1..94002663e54cc 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerAMulti.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaESProducerBlocks.cc @@ -17,9 +17,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { * host ESProduct and converts the data into PortableHostMultiCollection, and * implicitly transfers the data product to device */ - class TestAlpakaESProducerAMulti : public ESProducer { + class TestAlpakaESProducerBlocks : public ESProducer { public: - TestAlpakaESProducerAMulti(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { + TestAlpakaESProducerBlocks(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { auto cc = setWhatProduced(this); token_ = cc.consumes(); } @@ -29,17 +29,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { descriptions.addWithDefaultLabel(desc); } - std::optional produce(AlpakaESTestRecordA const& iRecord) { + std::optional produce(AlpakaESTestRecordA const& iRecord) { auto const& input = iRecord.get(token_); int const sizeA = 10; int const sizeC = 100; // TODO: pinned allocation? // TODO: cached allocation? - AlpakaESTestDataACMultiHost product({{sizeA, sizeC}}, cms::alpakatools::host()); - auto viewA = product.view< - cms::alpakatest::AlpakaESTestSoAA>(); // this template is not really needed as this is fhe first layout - auto viewC = product.view(); + AlpakaESTestDataBlocksACHost product(cms::alpakatools::host(), sizeA, sizeC); + auto viewA = product.view().testSoAA(); // this template is not really needed as this is fhe first layout + auto viewC = product.view().testSoAC(); for (int i = 0; i < sizeA; ++i) { viewA[i].z() = input.value() - i; @@ -57,4 +56,4 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE -DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(TestAlpakaESProducerAMulti); +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(TestAlpakaESProducerBlocks); diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc index 58dac53421a4e..00c0b5f097295 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc @@ -23,7 +23,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaGlobalProducer(edm::ParameterSet const& config) : EDProducer<>(config), esToken_(esConsumes(config.getParameter("eventSetupSource"))), - esMultiToken_(esConsumes(config.getParameter("eventSetupSourceMulti"))), + esBlocksToken_(esConsumes(config.getParameter("eventSetupSourceBlocks"))), deviceToken_{produces()}, deviceTokenMulti2_{produces()}, deviceTokenMulti3_{produces()}, @@ -36,11 +36,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void produce(edm::StreamID, device::Event& iEvent, device::EventSetup const& iSetup) const override { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); - [[maybe_unused]] auto const& esMultiData = iSetup.getData(esMultiToken_); + [[maybe_unused]] auto const& esBlocksData = iSetup.getData(esBlocksToken_); portabletest::TestDeviceCollection deviceProduct{size_, iEvent.queue()}; - portabletest::TestDeviceMultiCollection2 deviceProductMulti2{{{size_, size2_}}, iEvent.queue()}; - portabletest::TestDeviceMultiCollection3 deviceProductMulti3{{{size_, size2_, size3_}}, iEvent.queue()}; + portabletest::TestDeviceCollection2 deviceProductMulti2{iEvent.queue(), size_, size2_}; + portabletest::TestDeviceCollection3 deviceProductMulti3{iEvent.queue(), size_, size2_, size3_}; // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), deviceProduct); @@ -55,7 +55,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("eventSetupSource", edm::ESInputTag{}); - desc.add("eventSetupSourceMulti", edm::ESInputTag{}); + desc.add("eventSetupSourceBlocks", edm::ESInputTag{}); edm::ParameterSetDescription psetSize; psetSize.add("alpaka_serial_sync"); @@ -68,10 +68,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; - const device::ESGetToken esMultiToken_; + const device::ESGetToken esBlocksToken_; const device::EDPutToken deviceToken_; - const device::EDPutToken deviceTokenMulti2_; - const device::EDPutToken deviceTokenMulti3_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; const int32_t size2_; const int32_t size3_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerCopyToDeviceCache.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerCopyToDeviceCache.cc index 44ff9a9c1f384..bf6035b9a92fb 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerCopyToDeviceCache.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerCopyToDeviceCache.cc @@ -66,11 +66,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::EDGetToken getToken_; - const device::EDGetToken getTokenMulti2_; - const device::EDGetToken getTokenMulti3_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const device::EDPutToken putToken_; - const device::EDPutToken putTokenMulti2_; - const device::EDPutToken putTokenMulti3_; + const device::EDPutToken putTokenMulti2_; + const device::EDPutToken putTokenMulti3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc index 8012dac2b20a2..e65c5436aa90a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc @@ -58,11 +58,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; const device::EDGetToken getToken_; - const device::EDGetToken getTokenMulti2_; - const device::EDGetToken getTokenMulti3_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const device::EDPutToken putToken_; - const device::EDPutToken putTokenMulti2_; - const device::EDPutToken putTokenMulti3_; + const device::EDPutToken putTokenMulti2_; + const device::EDPutToken putTokenMulti3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerMoveToDeviceCache.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerMoveToDeviceCache.cc index a328a55babc8e..5707a2e41826e 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerMoveToDeviceCache.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerMoveToDeviceCache.cc @@ -65,11 +65,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::EDGetToken getToken_; - const device::EDGetToken getTokenMulti2_; - const device::EDGetToken getTokenMulti3_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const device::EDPutToken putToken_; - const device::EDPutToken putTokenMulti2_; - const device::EDPutToken putTokenMulti3_; + const device::EDPutToken putTokenMulti2_; + const device::EDPutToken putTokenMulti3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc index 3f29ff8b88b9f..7a663dc70d1ae 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc @@ -40,12 +40,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { algo_.checkZero(event.queue(), deviceObject); algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42); - portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()}; + portabletest::TestDeviceCollection2 deviceMultiProduct2{event.queue(), size_, size2_}; deviceMultiProduct2.zeroInitialise(event.queue()); algo_.checkZero(event.queue(), deviceMultiProduct2); algo_.fillMulti2(event.queue(), deviceMultiProduct2); - portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()}; + portabletest::TestDeviceCollection3 deviceMultiProduct3{event.queue(), size_, size2_, size3_}; deviceMultiProduct3.zeroInitialise(event.queue()); algo_.checkZero(event.queue(), deviceMultiProduct3); algo_.fillMulti3(event.queue(), deviceMultiProduct3); @@ -68,8 +68,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::EDPutToken objectToken_; const device::EDPutToken collectionToken_; - const device::EDPutToken deviceTokenMulti2_; - const device::EDPutToken deviceTokenMulti3_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; const int32_t size2_; const int32_t size3_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc index f5998ed980ba4..0b876cee08de6 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc @@ -43,10 +43,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); auto deviceProduct = std::make_unique(size_, iEvent.queue()); - auto deviceProductMulti2 = std::make_unique( - portabletest::TestDeviceMultiCollection2::SizesArray{{size_, size2_}}, iEvent.queue()); - auto deviceProductMulti3 = std::make_unique( - portabletest::TestDeviceMultiCollection3::SizesArray{{size_, size2_, size3_}}, iEvent.queue()); + auto deviceProductMulti2 = std::make_unique(iEvent.queue(), size_, size2_); + auto deviceProductMulti3 = + std::make_unique(iEvent.queue(), size_, size2_, size3_); // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), *deviceProduct); @@ -77,8 +76,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { edm::EDGetTokenT getToken_; device::ESGetToken, AlpakaESTestRecordB> esToken_; device::EDPutToken devicePutToken_; - device::EDPutToken devicePutTokenMulti2_; - device::EDPutToken devicePutTokenMulti3_; + device::EDPutToken devicePutTokenMulti2_; + device::EDPutToken devicePutTokenMulti3_; const int32_t size_; const int32_t size2_; const int32_t size3_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc index 3badb1bb75b45..728dfbb9372ec 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc @@ -62,8 +62,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esTokenDevice_; const edm::EDPutTokenT putToken_; - const edm::EDPutTokenT putTokenMulti2_; - const edm::EDPutTokenT putTokenMulti3_; + const edm::EDPutTokenT putTokenMulti2_; + const edm::EDPutTokenT putTokenMulti3_; TestHelperClass helper_; cms::alpakatest::TestHostOnlyHelperClass const hostHelper_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc index 8de494e37430f..35037f0a9789a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc @@ -16,13 +16,12 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto esDataHostHandle = iSetup.getHandle(esTokenHost_); [[maybe_unused]] auto const& esDataDevice = iSetup.getData(esTokenDevice_); portabletest::TestDeviceCollection const& deviceProduct = iEvent.get(getToken_); - portabletest::TestDeviceMultiCollection2 const& deviceProductMulti2 = iEvent.get(getTokenMulti2_); - portabletest::TestDeviceMultiCollection3 const& deviceProductMulti3 = iEvent.get(getTokenMulti3_); - - hostProduct_ = portabletest::TestHostCollection{deviceProduct->metadata().size(), iEvent.queue()}; - hostProductMulti2_ = portabletest::TestHostMultiCollection2{deviceProductMulti2.sizes(), iEvent.queue()}; - hostProductMulti3_ = portabletest::TestHostMultiCollection3{deviceProductMulti3.sizes(), iEvent.queue()}; + portabletest::TestDeviceCollection2 const& deviceProductMulti2 = iEvent.get(getTokenMulti2_); + portabletest::TestDeviceCollection3 const& deviceProductMulti3 = iEvent.get(getTokenMulti3_); + hostProduct_ = portabletest::TestHostCollection{deviceProduct.size(), iEvent.queue()}; + hostProductMulti2_ = portabletest::TestHostCollection2{iEvent.queue(), deviceProductMulti2.size()}; + hostProductMulti3_ = portabletest::TestHostCollection3{iEvent.queue(), deviceProductMulti3.size()}; alpaka::memcpy(iEvent.queue(), hostProduct_->buffer(), deviceProduct.const_buffer()); alpaka::memcpy(iEvent.queue(), hostProductMulti2_->buffer(), deviceProductMulti2.const_buffer()); alpaka::memcpy(iEvent.queue(), hostProductMulti3_->buffer(), deviceProductMulti3.const_buffer()); diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h index c369e78fc639a..96267d1d506ef 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h @@ -31,13 +31,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return product; } - portabletest::TestHostMultiCollection2 moveFromMulti2() { + portabletest::TestHostCollection2 moveFromMulti2() { auto product = std::move(*hostProductMulti2_); hostProductMulti2_.reset(); return product; } - portabletest::TestHostMultiCollection3 moveFromMulti3() { + portabletest::TestHostCollection3 moveFromMulti3() { auto product = std::move(*hostProductMulti3_); hostProductMulti3_.reset(); return product; @@ -45,15 +45,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::EDGetToken getToken_; - const device::EDGetToken getTokenMulti2_; - const device::EDGetToken getTokenMulti3_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const edm::ESGetToken esTokenHost_; const device::ESGetToken esTokenDevice_; // hold the output product between acquire() and produce() std::optional hostProduct_; - std::optional hostProductMulti2_; - std::optional hostProductMulti3_; + std::optional hostProductMulti2_; + std::optional hostProductMulti3_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/src/ES_AlpakaESTestData.cc b/HeterogeneousCore/AlpakaTest/src/ES_AlpakaESTestData.cc index ed017b50cf873..c03f2762bd984 100644 --- a/HeterogeneousCore/AlpakaTest/src/ES_AlpakaESTestData.cc +++ b/HeterogeneousCore/AlpakaTest/src/ES_AlpakaESTestData.cc @@ -5,7 +5,7 @@ TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataAHost); TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataCHost); TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataDHost); -TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataACMultiHost); +TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataBlocksACHost); // Template-over-device model TYPELOOKUP_DATA_REG(cms::alpakatest::AlpakaESTestDataB); diff --git a/HeterogeneousCore/AlpakaTest/src/alpaka/ES_AlpakaESTestData.cc b/HeterogeneousCore/AlpakaTest/src/alpaka/ES_AlpakaESTestData.cc index a9f0ee95f286a..8879f793d7755 100644 --- a/HeterogeneousCore/AlpakaTest/src/alpaka/ES_AlpakaESTestData.cc +++ b/HeterogeneousCore/AlpakaTest/src/alpaka/ES_AlpakaESTestData.cc @@ -5,7 +5,7 @@ TYPELOOKUP_ALPAKA_DATA_REG(AlpakaESTestDataADevice); TYPELOOKUP_ALPAKA_DATA_REG(AlpakaESTestDataCDevice); TYPELOOKUP_ALPAKA_DATA_REG(AlpakaESTestDataDDevice); -TYPELOOKUP_ALPAKA_DATA_REG(AlpakaESTestDataACMultiDevice); +TYPELOOKUP_ALPAKA_DATA_REG(AlpakaESTestDataACBlocksDevice); // Template-over-device model #include "HeterogeneousCore/AlpakaTest/interface/AlpakaESTestData.h" diff --git a/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py b/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py index 1bb3deb6a9efb..4f7dd8e21b8fa 100644 --- a/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py +++ b/HeterogeneousCore/AlpakaTest/test/testAlpakaModules_cfg.py @@ -61,16 +61,16 @@ appendToDataLabel = cms.string("null"), ) -# PortableMultiCollection -from HeterogeneousCore.AlpakaTest.testAlpakaESProducerAMulti_cfi import testAlpakaESProducerAMulti +# PortableCollection with SoABlocks +from HeterogeneousCore.AlpakaTest.testAlpakaESProducerBlocks_cfi import testAlpakaESProducerBlocks process.intProduct = cms.EDProducer("IntProducer", ivalue = cms.int32(42)) -process.alpakaESProducerAMulti = testAlpakaESProducerAMulti.clone(appendToDataLabel = cms.string("appendedLabel")) +process.alpakaESProducerBlocks = testAlpakaESProducerBlocks.clone(appendToDataLabel = cms.string("appendedLabel")) from HeterogeneousCore.AlpakaTest.testAlpakaGlobalProducer_cfi import testAlpakaGlobalProducer process.alpakaGlobalProducer = testAlpakaGlobalProducer.clone( eventSetupSource = cms.ESInputTag("alpakaESProducerA", "appendedLabel"), - eventSetupSourceMulti = cms.ESInputTag("alpakaESProducerAMulti", "appendedLabel"), + eventSetupSourceBlocks = cms.ESInputTag("alpakaESProducerBlocks", "appendedLabel"), size = dict( alpaka_serial_sync = 10, alpaka_cuda_async = 20, @@ -178,7 +178,7 @@ eventSetupSource = cms.ESInputTag("", "null") ) -_postfixes = ["ESProducerA", "ESProducerB", "ESProducerC", "ESProducerD", "ESProducerE", "ESProducerAMulti", +_postfixes = ["ESProducerA", "ESProducerB", "ESProducerC", "ESProducerD", "ESProducerE", "ESProducerBlocks", "ESProducerNull", "GlobalProducer", "GlobalProducerE", "GlobalProducerCopyToDeviceCache", "GlobalProducerMoveToDeviceCache", diff --git a/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc b/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc index daef627eef43e..0865a189e41e4 100644 --- a/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc +++ b/RecoLocalTracker/Phase2TrackerRecHits/plugins/alpaka/Phase2OTRecHitsSoAConverter.cc @@ -126,7 +126,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const auto& stripHits = iEvent.get(recHitToken_); const auto& pixelHitsSoA = iEvent.get(pixelHitsSoA_); - int nPixelHits = pixelHitsSoA.view().metadata().size(); + int nPixelHits = pixelHitsSoA.view().trackingHits().metadata().size(); // Count strip hits and active strip modules const int nStripHits = stripHits.data().size(); @@ -149,7 +149,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { << "Total hits of PinOTBarrel: " << PHitsInOTBarrel << '\n'; Hits stripHitsSoA(queue, PHitsInOTBarrel, orderedModules_.size()); - auto& stripHitsModuleView = stripHitsSoA.view<::reco::HitModuleSoA>(); + auto stripHitsModuleView = stripHitsSoA.view().hitModules(); std::vector counterOfHitsPerModule(orderedModules_.size(), 0); assert(!orderedModules_.empty()); @@ -197,7 +197,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if (detIdIsP_[detId]) { int idx = moduleHitIndex++; assert(idx < PHitsInOTBarrel); - auto hit = stripHitsSoA.view()[idx]; + auto hit = stripHitsSoA.view().trackingHits()[idx]; hit.xLocal() = recHit.localPosition().x(); hit.yLocal() = recHit.localPosition().y(); hit.xerrLocal() = recHit.localPositionError().xx(); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc index b5edf4411d1c6..53d6f056be386 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc @@ -66,8 +66,8 @@ void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { auto const& hits = iEvent.get(hitsToken_); - auto hitsView = hits.view(); - auto modulesView = hits.view<::reco::HitModuleSoA>(); + auto hitsView = hits.view().trackingHits(); + auto modulesView = hits.view().hitModules(); auto nHits = hitsView.metadata().size(); auto nModules = modulesView.metadata().size(); LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " hits in max " << nModules << " modules"; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc index 8bd80708652ed..17c920ea5e6fb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -64,7 +64,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { digis_d.nDigis(), digis_d.nModules(), clusters_d.view(), - hits_d.view()); + hits_d.view().trackingHits()); #ifdef GPU_DEBUG alpaka::wait(queue); #endif diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc index 332fa09e4b894..011332f056cee 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitExtendedAlpaka.cc @@ -116,9 +116,9 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif // start from the hits SoA, use metarecords to loop over all the columns - auto outView = output.view(); - auto pixView = pixColl.view(); - auto trkView = trkColl.view(); + auto outView = output.view().trackingHits(); + auto pixView = pixColl.view().trackingHits(); + auto trkView = trkColl.view().trackingHits(); // layout type (same for all views) using ViewType = decltype(outView); @@ -166,15 +166,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // copy hitModuleStart for Pixel modules alpaka::memcpy( queue, - cms::alpakatools::make_device_view(queue, output.view<::reco::HitModuleSoA>().moduleStart().data(), nPixMod), - cms::alpakatools::make_device_view(queue, pixColl.view<::reco::HitModuleSoA>().moduleStart().data(), nPixMod)); + cms::alpakatools::make_device_view(queue, output.view().hitModules().moduleStart().data(), nPixMod), + cms::alpakatools::make_device_view(queue, pixColl.view().hitModules().moduleStart().data(), nPixMod)); // copy hitModuleStart for Tracker modules (offset after Pixel modules) // copy nTrkMod + 1 elements to include the last "hidden" element - alpaka::memcpy(queue, - cms::alpakatools::make_device_view( - queue, output.view<::reco::HitModuleSoA>().moduleStart().data() + nPixMod, nTrkMod + 1), - cms::alpakatools::make_device_view( - queue, trkColl.view<::reco::HitModuleSoA>().moduleStart().data(), nTrkMod + 1)); + alpaka::memcpy( + queue, + cms::alpakatools::make_device_view( + queue, output.view().hitModules().moduleStart().data() + nPixMod, nTrkMod + 1), + cms::alpakatools::make_device_view(queue, trkColl.view().hitModules().moduleStart().data(), nTrkMod + 1)); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "Copied hitModuleStart for Pixel and Tracker modules\n"; diff --git a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc index d9df55a52fca0..3e86b8e86904e 100644 --- a/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc +++ b/RecoTauTag/HLTProducers/src/L2TauTagNNProducerAlpaka.cc @@ -147,6 +147,7 @@ struct L2TauNNProducerAlpakaCacheData { class L2TauNNProducerAlpaka : public edm::stream::EDProducer> { public: using TracksHost = reco::TracksHost; + using ZVertexHost = reco::ZVertexHost; struct caloRecHitCollections { const HBHERecHitCollection* hbhe; @@ -209,7 +210,7 @@ class L2TauNNProducerAlpaka : public edm::stream::EDProducer eeToken_; const edm::ESGetToken geometryToken_; const edm::ESGetToken bFieldToken_; - const edm::EDGetTokenT pataVerticesToken_; + const edm::EDGetTokenT pataVerticesToken_; const edm::EDGetTokenT pataTracksToken_; const edm::EDGetTokenT beamSpotToken_; const unsigned int maxVtx_; @@ -575,28 +576,28 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav const TracksHost& patatracks_tsoa, std::vector& trkGood, std::vector& vtxGood) { - const auto maxTracks = patatracks_tsoa.view().metadata().size(); - const int nv = patavtx_soa.view().nvFinal(); + const auto maxTracks = patatracks_tsoa.view().tracks().metadata().size(); + const int nv = patavtx_soa.view().zvertex().nvFinal(); trkGood.clear(); trkGood.reserve(maxTracks); vtxGood.clear(); vtxGood.reserve(nv); - auto const quality = patatracks_tsoa.view().quality(); + auto const quality = patatracks_tsoa.view().tracks().quality(); // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). std::vector pTSquaredSum(nv, 0); std::vector nTrkAssociated(nv, 0); for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { - auto n_hits = nHits(patatracks_tsoa.view(), trk_idx); + auto n_hits = nHits(patatracks_tsoa.view().tracks(), trk_idx); if (n_hits == 0) { break; } - int vtx_ass_to_track = patavtx_soa.view()[trk_idx].idv(); + int vtx_ass_to_track = patavtx_soa.view().zvertexTracks()[trk_idx].idv(); if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) { - auto patatrackPt = patatracks_tsoa.view()[trk_idx].pt(); + auto patatrackPt = patatracks_tsoa.view().tracks()[trk_idx].pt(); ++nTrkAssociated[vtx_ass_to_track]; - if (patatrackPt >= trackPtMin_ && patatracks_tsoa.const_view()[trk_idx].chi2() <= trackChi2Max_) { + if (patatrackPt >= trackPtMin_ && patatracks_tsoa.view().tracks()[trk_idx].chi2() <= trackChi2Max_) { patatrackPt = std::min(patatrackPt, trackPtMax_); pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; } @@ -608,7 +609,7 @@ void L2TauNNProducerAlpaka::selectGoodTracksAndVertices(const ZVertexHost& patav if (nv > 0) { const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_; for (int j = nv - 1; j >= 0 && vtxGood.size() < maxVtx_; --j) { - auto vtx_idx = patavtx_soa.view()[j].sortInd(); + auto vtx_idx = patavtx_soa.view().zvertex()[j].sortInd(); assert(vtx_idx < nv); if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && pTSquaredSum[vtx_idx] > minSumPt2_) { @@ -626,7 +627,7 @@ std::pair L2TauNNProducerAlpaka::impactParameter(int it, /* dxy and dz */ riemannFit::Vector5d ipar, opar; riemannFit::Matrix5d icov, ocov; - copyToDense(patatracks_tsoa.view(), ipar, icov, it); + copyToDense(patatracks_tsoa.view().tracks(), ipar, icov, it); riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); float sp = std::sin(patatrackPhi); @@ -677,19 +678,19 @@ void L2TauNNProducerAlpaka::fillPatatracks(tensorflow::Tensor& cellGridMatrix, const float tauPhi = allTaus[tau_idx]->phi(); for (const auto it : trkGood) { - const float patatrackPt = patatracks_tsoa.const_view()[it].pt(); + const float patatrackPt = patatracks_tsoa.const_view().tracks()[it].pt(); if (patatrackPt <= 0) continue; - const float patatrackPhi = reco::phi(patatracks_tsoa.const_view(), it); - const float patatrackEta = patatracks_tsoa.const_view()[it].eta(); - const float patatrackCharge = reco::charge(patatracks_tsoa.const_view(), it); - const float patatrackChi2OverNdof = patatracks_tsoa.view()[it].chi2(); - const auto n_hits = nHits(patatracks_tsoa.const_view(), it); + const float patatrackPhi = reco::phi(patatracks_tsoa.const_view().tracks(), it); + const float patatrackEta = patatracks_tsoa.const_view().tracks()[it].eta(); + const float patatrackCharge = reco::charge(patatracks_tsoa.const_view().tracks(), it); + const float patatrackChi2OverNdof = patatracks_tsoa.view().tracks()[it].chi2(); + const auto n_hits = nHits(patatracks_tsoa.const_view().tracks(), it); if (n_hits <= 0) continue; const int patatrackNdof = 2 * std::min(6, n_hits) - 5; - const int vtx_idx_assTrk = patavtx_soa.view()[it].idv(); + const int vtx_idx_assTrk = patavtx_soa.view().zvertexTracks()[it].idv(); if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) { std::tie(deta, dphi, eta_idx, phi_idx) = getEtaPhiIndices(patatrackEta, patatrackPhi, allTaus[tau_idx]->polarP4()); diff --git a/RecoTracker/LST/plugins/LSTOutputConverter.cc b/RecoTracker/LST/plugins/LSTOutputConverter.cc index d20026c89a012..58b8be19a77f8 100644 --- a/RecoTracker/LST/plugins/LSTOutputConverter.cc +++ b/RecoTracker/LST/plugins/LSTOutputConverter.cc @@ -146,7 +146,7 @@ void LSTOutputConverter::produce(edm::Event& iEvent, const edm::EventSetup& iSet outputpTTC.reserve(nTrackCandidates); outputpLSTC.reserve(nTrackCandidates); - auto OTHits = lstInputHC.const_view().hits(); + auto OTHits = lstInputHC.const_view().hits().hits(); LogDebug("LSTOutputConverter") << "nTrackCandidates " << nTrackCandidates; for (unsigned int i = 0; i < nTrackCandidates; i++) { diff --git a/RecoTracker/LSTCore/interface/HitsHostCollection.h b/RecoTracker/LSTCore/interface/HitsHostCollection.h index 84255c9240185..5fec1cbf3ce5a 100644 --- a/RecoTracker/LSTCore/interface/HitsHostCollection.h +++ b/RecoTracker/LSTCore/interface/HitsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using HitsHostCollection = PortableHostMultiCollection; + using HitsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/HitsSoA.h b/RecoTracker/LSTCore/interface/HitsSoA.h index 93b51bbfb70de..a9726b9647ced 100644 --- a/RecoTracker/LSTCore/interface/HitsSoA.h +++ b/RecoTracker/LSTCore/interface/HitsSoA.h @@ -2,6 +2,7 @@ #define RecoTracker_LSTCore_interface_HitsSoA_h #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/Portable/interface/PortableCollection.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -25,13 +26,32 @@ namespace lst { SOA_COLUMN(int16_t, hitRangesnLower), SOA_COLUMN(int16_t, hitRangesnUpper)) + GENERATE_SOA_BLOCKS(HitsSoALayout, SOA_BLOCK(extended, HitsExtendedSoALayout), SOA_BLOCK(ranges, HitsRangesSoALayout)) + using HitsExtendedSoA = HitsExtendedSoALayout<>; using HitsRangesSoA = HitsRangesSoALayout<>; + using HitsSoA = HitsSoALayout<>; using HitsExtended = HitsExtendedSoA::View; using HitsExtendedConst = HitsExtendedSoA::ConstView; using HitsRanges = HitsRangesSoA::View; using HitsRangesConst = HitsRangesSoA::ConstView; + using HitsView = HitsSoA::View; + using HitsConstView = HitsSoA::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct HitsViewAccessor; + + template <> + struct HitsViewAccessor { + static constexpr auto get(auto const& v) { return v.extended(); } + }; + + template <> + struct HitsViewAccessor { + static constexpr auto get(auto const& v) { return v.ranges(); } + }; } // namespace lst diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h index bfa10186f8f2e..0ce45fc578dff 100644 --- a/RecoTracker/LSTCore/interface/LSTESData.h +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -19,7 +19,7 @@ namespace lst { unsigned int nPixels; unsigned int nEndCapMap; // Using shared_ptr so that for the serial backend all streams can use the same data - std::shared_ptr> modules; + std::shared_ptr> modules; std::shared_ptr> endcapGeometry; // Host-side object that is shared between the LSTESData objects for different devices std::shared_ptr pixelMapping; @@ -28,7 +28,7 @@ namespace lst { uint16_t const& nLowerModulesIn, unsigned int const& nPixelsIn, unsigned int const& nEndCapMapIn, - std::shared_ptr> modulesIn, + std::shared_ptr> modulesIn, std::shared_ptr> endcapGeometryIn, std::shared_ptr const& pixelMappingIn) : nModules(nModulesIn), @@ -52,16 +52,15 @@ namespace cms::alpakatools { static lst::LSTESData> copyAsync(TQueue& queue, lst::LSTESData const& srcData) { using TDev = alpaka::Dev; - std::shared_ptr> deviceModules; + std::shared_ptr> deviceModules; std::shared_ptr> deviceEndcapGeometry; if constexpr (std::is_same_v) { deviceModules = srcData.modules; deviceEndcapGeometry = srcData.endcapGeometry; } else { - deviceModules = std::make_shared>( - CopyToDevice>::copyAsync( - queue, *srcData.modules)); + deviceModules = std::make_shared>( + CopyToDevice>::copyAsync(queue, *srcData.modules)); deviceEndcapGeometry = std::make_shared>( CopyToDevice>::copyAsync(queue, *srcData.endcapGeometry)); } diff --git a/RecoTracker/LSTCore/interface/LSTInputHostCollection.h b/RecoTracker/LSTCore/interface/LSTInputHostCollection.h index f9ea02e289769..05dbec836c6b4 100644 --- a/RecoTracker/LSTCore/interface/LSTInputHostCollection.h +++ b/RecoTracker/LSTCore/interface/LSTInputHostCollection.h @@ -6,7 +6,7 @@ #include "DataFormats/Portable/interface/PortableDeviceCollection.h" namespace lst { - using LSTInputHostCollection = PortableHostMultiCollection; + using LSTInputHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/LSTInputSoA.h b/RecoTracker/LSTCore/interface/LSTInputSoA.h index 0d2b132b1fc78..7502d8ae7057a 100644 --- a/RecoTracker/LSTCore/interface/LSTInputSoA.h +++ b/RecoTracker/LSTCore/interface/LSTInputSoA.h @@ -6,6 +6,7 @@ #endif #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/Portable/interface/PortableCollection.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -42,13 +43,32 @@ namespace lst { SOA_COLUMN(float, eta), SOA_COLUMN(float, phi)) + GENERATE_SOA_BLOCKS(LSTInputSoALayout, SOA_BLOCK(hits, HitsBaseSoALayout), SOA_BLOCK(pixelSeeds, PixelSeedsSoALayout)) + using HitsBaseSoA = HitsBaseSoALayout<>; using PixelSeedsSoA = PixelSeedsSoALayout<>; + using LSTInputSoA = LSTInputSoALayout<>; using HitsBase = HitsBaseSoA::View; using HitsBaseConst = HitsBaseSoA::ConstView; using PixelSeeds = PixelSeedsSoA::View; using PixelSeedsConst = PixelSeedsSoA::ConstView; + using LSTInputView = LSTInputSoA::View; + using LSTInputConstView = LSTInputSoA::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct LSTInputViewAccessor; + + template <> + struct LSTInputViewAccessor { + static constexpr auto get(auto const& v) { return v.hits(); } + }; + + template <> + struct LSTInputViewAccessor { + static constexpr auto get(auto const& v) { return v.pixelSeeds(); } + }; } // namespace lst diff --git a/RecoTracker/LSTCore/interface/LSTPrepareInput.h b/RecoTracker/LSTCore/interface/LSTPrepareInput.h index 066dc73507261..03a008a1512b8 100644 --- a/RecoTracker/LSTCore/interface/LSTPrepareInput.h +++ b/RecoTracker/LSTCore/interface/LSTPrepareInput.h @@ -213,10 +213,10 @@ namespace lst { nPixelSeeds = n_max_pixel_segments_per_module; } - std::array const soa_sizes{{nHitsIT + nHitsOT, nPixelSeeds}}; - LSTInputHostCollection lstInputHC(soa_sizes, queue); + LSTInputHostCollection lstInputHC( + queue, static_cast(nHitsIT + nHitsOT), static_cast(nPixelSeeds)); - auto hits = lstInputHC.view(); + auto hits = lstInputHC.view().hits(); std::memcpy(hits.xs().data(), ph2_x.data(), nHitsOT * sizeof(float)); std::memcpy(hits.ys().data(), ph2_y.data(), nHitsOT * sizeof(float)); std::memcpy(hits.zs().data(), ph2_z.data(), nHitsOT * sizeof(float)); @@ -237,7 +237,7 @@ namespace lst { std::memcpy(hits.idxs().data(), hitIdxs.data(), (nHitsIT + nHitsOT) * sizeof(unsigned int)); - auto pixelSeeds = lstInputHC.view(); + auto pixelSeeds = lstInputHC.view().pixelSeeds(); std::memcpy(pixelSeeds.hitIndices().data(), hitIndices_vec.data(), nPixelSeeds * sizeof(Params_pLS::ArrayUxHits)); std::memcpy(pixelSeeds.deltaPhi().data(), deltaPhi_vec.data(), nPixelSeeds * sizeof(float)); std::memcpy(pixelSeeds.ptIn().data(), ptIn_vec.data(), nPixelSeeds * sizeof(float)); diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h index 33169a07b9e51..7a68a41ab0a81 100644 --- a/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h +++ b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using MiniDoubletsHostCollection = PortableHostMultiCollection; + using MiniDoubletsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h index e2265b2fd978d..118227ca19871 100644 --- a/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h +++ b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h @@ -2,6 +2,7 @@ #define RecoTracker_LSTCore_interface_MiniDoubletsSoA_h #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/Portable/interface/PortableCollection.h" namespace lst { @@ -46,13 +47,34 @@ namespace lst { SOA_COLUMN(unsigned int, nMDs), SOA_COLUMN(unsigned int, totOccupancyMDs)) + GENERATE_SOA_BLOCKS(MiniDoubletsSoABlocksLayout, + SOA_BLOCK(miniDoublets, MiniDoubletsSoALayout), + SOA_BLOCK(miniDoubletsOccupancy, MiniDoubletsOccupancySoALayout)) + using MiniDoubletsSoA = MiniDoubletsSoALayout<>; using MiniDoubletsOccupancySoA = MiniDoubletsOccupancySoALayout<>; + using MiniDoubletsSoABlocks = MiniDoubletsSoABlocksLayout<>; using MiniDoublets = MiniDoubletsSoA::View; using MiniDoubletsConst = MiniDoubletsSoA::ConstView; using MiniDoubletsOccupancy = MiniDoubletsOccupancySoA::View; using MiniDoubletsOccupancyConst = MiniDoubletsOccupancySoA::ConstView; + using MiniDoubletsSoABlocksView = MiniDoubletsSoABlocks::View; + using MiniDoubletsSoABlocksConstView = MiniDoubletsSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct MiniDoubletsViewAccessor; + + template <> + struct MiniDoubletsViewAccessor { + static constexpr auto get(auto const& v) { return v.miniDoublets(); } + }; + + template <> + struct MiniDoubletsViewAccessor { + static constexpr auto get(auto const& v) { return v.miniDoubletsOccupancy(); } + }; } // namespace lst diff --git a/RecoTracker/LSTCore/interface/ModulesHostCollection.h b/RecoTracker/LSTCore/interface/ModulesHostCollection.h index 4119fb6ffb1a2..ec1c9bde0c97c 100644 --- a/RecoTracker/LSTCore/interface/ModulesHostCollection.h +++ b/RecoTracker/LSTCore/interface/ModulesHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using ModulesHostCollection = PortableHostMultiCollection; + using ModulesHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/ModulesSoA.h b/RecoTracker/LSTCore/interface/ModulesSoA.h index fbf5a21cd9f91..9fca722a8b5b8 100644 --- a/RecoTracker/LSTCore/interface/ModulesSoA.h +++ b/RecoTracker/LSTCore/interface/ModulesSoA.h @@ -2,6 +2,7 @@ #define RecoTracker_LSTCore_interface_ModulesSoA_h #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/Portable/interface/PortableCollection.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -46,13 +47,34 @@ namespace lst { GENERATE_SOA_LAYOUT(ModulesPixelSoALayout, SOA_COLUMN(unsigned int, connectedPixels)) + GENERATE_SOA_BLOCKS(ModulesSoABlocksLayout, + SOA_BLOCK(modules, ModulesSoALayout), + SOA_BLOCK(modulesPixel, ModulesPixelSoALayout)) + using ModulesSoA = ModulesSoALayout<>; using ModulesPixelSoA = ModulesPixelSoALayout<>; + using ModulesSoABlocks = ModulesSoABlocksLayout<>; using Modules = ModulesSoA::View; using ModulesConst = ModulesSoA::ConstView; using ModulesPixel = ModulesPixelSoA::View; using ModulesPixelConst = ModulesPixelSoA::ConstView; + using ModulesSoABlocksView = ModulesSoABlocks::View; + using ModulesSoABlocksConstView = ModulesSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct ModulesViewAccessor; + + template <> + struct ModulesViewAccessor { + static constexpr auto get(auto const& v) { return v.modules(); } + }; + + template <> + struct ModulesViewAccessor { + static constexpr auto get(auto const& v) { return v.modulesPixel(); } + }; } // namespace lst diff --git a/RecoTracker/LSTCore/interface/QuadrupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuadrupletsHostCollection.h index 128ed65e3ece3..e9ec69a7dcefe 100644 --- a/RecoTracker/LSTCore/interface/QuadrupletsHostCollection.h +++ b/RecoTracker/LSTCore/interface/QuadrupletsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using QuadrupletsHostCollection = PortableHostMultiCollection; + using QuadrupletsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/QuadrupletsSoA.h b/RecoTracker/LSTCore/interface/QuadrupletsSoA.h index 0fa57a35e7f40..9a40c53c564ae 100644 --- a/RecoTracker/LSTCore/interface/QuadrupletsSoA.h +++ b/RecoTracker/LSTCore/interface/QuadrupletsSoA.h @@ -4,6 +4,7 @@ #include #include "DataFormats/Common/interface/StdArray.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -48,5 +49,27 @@ namespace lst { using QuadrupletsOccupancy = QuadrupletsOccupancySoA::View; using QuadrupletsOccupancyConst = QuadrupletsOccupancySoA::ConstView; + GENERATE_SOA_BLOCKS(QuadrupletsSoABlocksLayout, + SOA_BLOCK(quadruplets, QuadrupletsSoALayout), + SOA_BLOCK(quadrupletsOccupancy, QuadrupletsOccupancySoALayout)) + + using QuadrupletsSoABlocks = QuadrupletsSoABlocksLayout<>; + using QuadrupletsSoABlocksView = QuadrupletsSoABlocks::View; + using QuadrupletsSoABlocksConstView = QuadrupletsSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct QuadrupletsViewAccessor; + + template <> + struct QuadrupletsViewAccessor { + static constexpr auto get(auto const& v) { return v.quadruplets(); } + }; + + template <> + struct QuadrupletsViewAccessor { + static constexpr auto get(auto const& v) { return v.quadrupletsOccupancy(); } + }; + } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h index 734ce03057be7..cfff4645eabb1 100644 --- a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h +++ b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using QuintupletsHostCollection = PortableHostMultiCollection; + using QuintupletsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsSoA.h b/RecoTracker/LSTCore/interface/QuintupletsSoA.h index 21fa680463a73..809b073376383 100644 --- a/RecoTracker/LSTCore/interface/QuintupletsSoA.h +++ b/RecoTracker/LSTCore/interface/QuintupletsSoA.h @@ -4,6 +4,7 @@ #include #include "DataFormats/Common/interface/StdArray.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -48,5 +49,27 @@ namespace lst { using QuintupletsOccupancy = QuintupletsOccupancySoA::View; using QuintupletsOccupancyConst = QuintupletsOccupancySoA::ConstView; + GENERATE_SOA_BLOCKS(QuintupletsSoABlocksLayout, + SOA_BLOCK(quintuplets, QuintupletsSoALayout), + SOA_BLOCK(quintupletsOccupancy, QuintupletsOccupancySoALayout)) + + using QuintupletsSoABlocks = QuintupletsSoABlocksLayout<>; + using QuintupletsSoABlocksView = QuintupletsSoABlocks::View; + using QuintupletsSoABlocksConstView = QuintupletsSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct QuintupletsViewAccessor; + + template <> + struct QuintupletsViewAccessor { + static constexpr auto get(auto const& v) { return v.quintuplets(); } + }; + + template <> + struct QuintupletsViewAccessor { + static constexpr auto get(auto const& v) { return v.quintupletsOccupancy(); } + }; + } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/SegmentsHostCollection.h b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h index 621d4378c15d2..e9843c2c28148 100644 --- a/RecoTracker/LSTCore/interface/SegmentsHostCollection.h +++ b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using SegmentsHostCollection = PortableHostMultiCollection; + using SegmentsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/SegmentsSoA.h b/RecoTracker/LSTCore/interface/SegmentsSoA.h index e274e8923a1db..842bea7877776 100644 --- a/RecoTracker/LSTCore/interface/SegmentsSoA.h +++ b/RecoTracker/LSTCore/interface/SegmentsSoA.h @@ -2,6 +2,7 @@ #define RecoTracker_LSTCore_interface_SegmentsSoA_h #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/Portable/interface/PortableCollection.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -35,6 +36,10 @@ namespace lst { SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module SOA_COLUMN(unsigned int, totOccupancySegments)) + GENERATE_SOA_BLOCKS(SegmentsSoABlocksLayout, + SOA_BLOCK(segments, SegmentsSoALayout), + SOA_BLOCK(segmentsOccupancy, SegmentsOccupancySoALayout)) + using SegmentsSoA = SegmentsSoALayout<>; using SegmentsOccupancySoA = SegmentsOccupancySoALayout<>; @@ -43,6 +48,24 @@ namespace lst { using SegmentsOccupancy = SegmentsOccupancySoA::View; using SegmentsOccupancyConst = SegmentsOccupancySoA::ConstView; + using SegmentsSoABlocks = SegmentsSoABlocksLayout<>; + using SegmentsSoABlocksView = SegmentsSoABlocks::View; + using SegmentsSoABlocksConstView = SegmentsSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct SegmentsViewAccessor; + + template <> + struct SegmentsViewAccessor { + static constexpr auto get(auto const& v) { return v.segments(); } + }; + + template <> + struct SegmentsViewAccessor { + static constexpr auto get(auto const& v) { return v.segmentsOccupancy(); } + }; + } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/TripletsHostCollection.h b/RecoTracker/LSTCore/interface/TripletsHostCollection.h index 6eaebd97e5bf6..75b4ba3c760a1 100644 --- a/RecoTracker/LSTCore/interface/TripletsHostCollection.h +++ b/RecoTracker/LSTCore/interface/TripletsHostCollection.h @@ -5,6 +5,6 @@ #include "DataFormats/Portable/interface/PortableHostCollection.h" namespace lst { - using TripletsHostCollection = PortableHostMultiCollection; + using TripletsHostCollection = PortableHostCollection; } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/TripletsSoA.h b/RecoTracker/LSTCore/interface/TripletsSoA.h index b67bf4ca5fc52..5fa2434c967ab 100644 --- a/RecoTracker/LSTCore/interface/TripletsSoA.h +++ b/RecoTracker/LSTCore/interface/TripletsSoA.h @@ -4,6 +4,7 @@ #include #include "DataFormats/Common/interface/StdArray.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "RecoTracker/LSTCore/interface/Common.h" @@ -44,5 +45,27 @@ namespace lst { using TripletsOccupancy = TripletsOccupancySoA::View; using TripletsOccupancyConst = TripletsOccupancySoA::ConstView; + GENERATE_SOA_BLOCKS(TripletsSoABlocksLayout, + SOA_BLOCK(triplets, TripletsSoALayout), + SOA_BLOCK(tripletsOccupancy, TripletsOccupancySoALayout)) + + using TripletsSoABlocks = TripletsSoABlocksLayout<>; + using TripletsSoABlocksView = TripletsSoABlocks::View; + using TripletsSoABlocksConstView = TripletsSoABlocks::ConstView; + + // Template based accessor for getting specific SoA views. Needed in LSTEvent.dev.cc + template + struct TripletsViewAccessor; + + template <> + struct TripletsViewAccessor { + static constexpr auto get(auto const& v) { return v.triplets(); } + }; + + template <> + struct TripletsViewAccessor { + static constexpr auto get(auto const& v) { return v.tripletsOccupancy(); } + }; + } // namespace lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h index b2f96601f7f5b..9a272b63c4442 100644 --- a/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h @@ -7,7 +7,7 @@ #include "RecoTracker/LSTCore/interface/HitsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using HitsDeviceCollection = PortableCollection2; + using HitsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/LSTInputDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/LSTInputDeviceCollection.h index af8ebe0ee654c..85cb4332afd10 100644 --- a/RecoTracker/LSTCore/interface/alpaka/LSTInputDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/LSTInputDeviceCollection.h @@ -7,7 +7,7 @@ #include "RecoTracker/LSTCore/interface/LSTInputSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using LSTInputDeviceCollection = PortableCollection2; + using LSTInputDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h index 7751f75ac5ec9..75541f7b3351c 100644 --- a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h @@ -7,7 +7,7 @@ #include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using MiniDoubletsDeviceCollection = PortableCollection2; + using MiniDoubletsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h index 73152a47b6a42..695deecde3395 100644 --- a/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h @@ -7,7 +7,7 @@ #include "RecoTracker/LSTCore/interface/ModulesSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using ModulesDeviceCollection = PortableCollection2; + using ModulesDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuadrupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuadrupletsDeviceCollection.h index f24e3a9856e46..f48c4e3ae9a0d 100644 --- a/RecoTracker/LSTCore/interface/alpaka/QuadrupletsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/QuadrupletsDeviceCollection.h @@ -7,6 +7,6 @@ #include "RecoTracker/LSTCore/interface/QuadrupletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using QuadrupletsDeviceCollection = PortableCollection2; + using QuadrupletsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h index 13fb5484ea0fd..a499a3651b280 100644 --- a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h @@ -7,6 +7,6 @@ #include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using QuintupletsDeviceCollection = PortableCollection2; + using QuintupletsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h index 800e7bc61a439..affae99595ffa 100644 --- a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h @@ -7,7 +7,7 @@ #include "RecoTracker/LSTCore/interface/SegmentsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using SegmentsDeviceCollection = PortableCollection2; + using SegmentsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h index 6db827680cee3..9db0017f45f2e 100644 --- a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h +++ b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h @@ -7,6 +7,6 @@ #include "RecoTracker/LSTCore/interface/TripletsSoA.h" namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { - using TripletsDeviceCollection = PortableCollection2; + using TripletsDeviceCollection = PortableCollection; } // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst #endif diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h index 05b1ff68eb53c..922026dd3a575 100644 --- a/RecoTracker/LSTCore/src/ModuleMethods.h +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -239,11 +239,10 @@ namespace lst { totalSizes_neg, connectedModuleDetIds_neg] = getConnectedPixels(nModules, nPixels, pixelMapping, pLStoLayer); - std::array const modules_sizes{{static_cast(nModules), static_cast(nPixels)}}; + auto modulesHC = std::make_shared( + cms::alpakatools::host(), static_cast(nModules), static_cast(nPixels)); - auto modulesHC = std::make_shared(modules_sizes, cms::alpakatools::host()); - - auto modules_view = modulesHC->view(); + auto modules_view = modulesHC->view().modules(); // Getting the underlying data pointers std::span host_detIds = modules_view.detIds(); @@ -386,7 +385,7 @@ namespace lst { // Fill pixel part pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); - auto modulesPixel_view = modulesHC->view(); + auto modulesPixel_view = modulesHC->view().modulesPixel(); auto connectedPixels = cms::alpakatools::make_host_view(modulesPixel_view.connectedPixels(), modulesPixel_view.metadata().size()); for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc index bd0be0b948628..03a7efdfdd01d 100644 --- a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc @@ -92,15 +92,14 @@ void LSTEvent::resetEventSync() { void LSTEvent::addInputToEvent(LSTInputDeviceCollection const* lstInputDC) { lstInputDC_ = lstInputDC; - pixelSize_ = lstInputDC_->sizes()[1]; + pixelSize_ = lstInputDC_->size()[1]; pixelModuleIndex_ = pixelMapping_.pixelModuleIndex; } void LSTEvent::addHitToEvent() { if (!hitsDC_) { - int nHits = lstInputDC_->sizes()[0]; - std::array const hits_sizes{{nHits, static_cast(nModules_)}}; - hitsDC_.emplace(hits_sizes, queue_); + const int32_t nHits = lstInputDC_->size()[0]; + hitsDC_.emplace(queue_, nHits, static_cast(nModules_)); auto buf = hitsDC_->buffer(); alpaka::memset(queue_, buf, 0xff); } @@ -121,18 +120,18 @@ void LSTEvent::addHitToEvent() { nModules_, nEndCapMap_, endcapGeometry_.const_view(), - modules_.const_view(), - lstInputDC_->const_view(), - hitsDC_->view(), - hitsDC_->view()); + modules_.const_view().modules(), + lstInputDC_->const_view().hits(), + hitsDC_->view().extended(), + hitsDC_->view().ranges()); auto const module_ranges_workdiv = cms::alpakatools::make_workdiv(max_blocks, 256); alpaka::exec(queue_, module_ranges_workdiv, ModuleRangesKernel{}, - modules_.const_view(), - hitsDC_->view(), + modules_.const_view().modules(), + hitsDC_->view().ranges(), nLowerModules_); } @@ -157,13 +156,13 @@ void LSTEvent::addPixelSegmentToEventFinalize() { alpaka::exec(queue_, addPixelSegmentToEvent_workdiv, AddPixelSegmentToEventKernel{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->const_view(), - lstInputDC_->const_view(), - hitsDC_->view(), - lstInputDC_->const_view(), - miniDoubletsDC_->view(), - segmentsDC_->view(), + lstInputDC_->const_view().hits(), + hitsDC_->view().extended(), + lstInputDC_->const_view().pixelSeeds(), + miniDoubletsDC_->view().miniDoublets(), + segmentsDC_->view().segments(), pixelSegmentsDC_->view(), pixelModuleIndex_, pixelSize_); @@ -192,8 +191,8 @@ void LSTEvent::createMiniDoublets() { alpaka::exec(queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, - modules_.const_view(), - hitsDC_->const_view(), + modules_.const_view().modules(), + hitsDC_->const_view().ranges(), rangesDC_->view(), ptCut_); @@ -205,24 +204,23 @@ void LSTEvent::createMiniDoublets() { *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); - std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; - miniDoubletsDC_.emplace(mds_sizes, queue_); + miniDoubletsDC_.emplace(queue_, static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)); - auto mdsOccupancy = miniDoubletsDC_->view(); + auto mdsOccupancy = miniDoubletsDC_->view().miniDoubletsOccupancy(); auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs()); auto totOccupancyMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs()); alpaka::memset(queue_, nMDs_view, 0u); alpaka::memset(queue_, totOccupancyMDs_view, 0u); } - auto mdView = miniDoubletsDC_->view(); + auto mdView = miniDoubletsDC_->view().miniDoublets(); auto connView = cms::alpakatools::make_device_view(queue_, mdView.connectedMax()); alpaka::memset(queue_, connView, 0u); unsigned int mdSize = pixelSize_ * 2; auto src_view_mdSize = cms::alpakatools::make_host_view(mdSize); - auto mdsOccupancy = miniDoubletsDC_->view(); + auto mdsOccupancy = miniDoubletsDC_->view().miniDoubletsOccupancy(); auto dst_view_nMDs = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs()[pixelModuleIndex_]); alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); @@ -239,12 +237,12 @@ void LSTEvent::createMiniDoublets() { alpaka::exec(queue_, createMiniDoublets_workDiv, CreateMiniDoublets{}, - modules_.const_view(), - lstInputDC_->const_view(), - hitsDC_->const_view(), - hitsDC_->const_view(), - miniDoubletsDC_->view(), - miniDoubletsDC_->view(), + modules_.const_view().modules(), + lstInputDC_->const_view().hits(), + hitsDC_->const_view().extended(), + hitsDC_->const_view().ranges(), + miniDoubletsDC_->view().miniDoublets(), + miniDoubletsDC_->view().miniDoubletsOccupancy(), rangesDC_->const_view(), ptCut_, clustSizeCut_); @@ -254,10 +252,10 @@ void LSTEvent::createMiniDoublets() { alpaka::exec(queue_, addMiniDoubletRangesToEventExplicit_workDiv, AddMiniDoubletRangesToEventExplicit{}, - modules_.const_view(), - miniDoubletsDC_->view(), + modules_.const_view().modules(), + miniDoubletsDC_->view().miniDoubletsOccupancy(), rangesDC_->view(), - hitsDC_->const_view()); + hitsDC_->const_view().ranges()); if (addObjects_) { addMiniDoubletsToEventExplicit(); @@ -271,9 +269,9 @@ void LSTEvent::createSegmentsWithModuleMap() { alpaka::exec(queue_, countMDConn_wd, CountMiniDoubletConnections{}, - modules_.const_view(), - miniDoubletsDC_->view(), - miniDoubletsDC_->const_view(), + modules_.const_view().modules(), + miniDoubletsDC_->view().miniDoublets(), + miniDoubletsDC_->const_view().miniDoubletsOccupancy(), rangesDC_->const_view(), ptCut_); @@ -282,10 +280,10 @@ void LSTEvent::createSegmentsWithModuleMap() { alpaka::exec(queue_, createSegmentArrayRanges_workDiv, CreateSegmentArrayRanges{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->view(), - miniDoubletsDC_->const_view(), - miniDoubletsDC_->const_view()); + miniDoubletsDC_->const_view().miniDoublets(), + miniDoubletsDC_->const_view().miniDoubletsOccupancy()); auto rangesOccupancy = rangesDC_->view(); auto nTotalSegments_view_h = cms::alpakatools::make_host_view(nTotalSegments_); @@ -295,11 +293,10 @@ void LSTEvent::createSegmentsWithModuleMap() { nTotalSegments_ += n_max_pixel_segments_per_module; - std::array const segments_sizes{{static_cast(nTotalSegments_), static_cast(nLowerModules_ + 1)}}; - segmentsDC_.emplace(segments_sizes, queue_); + segmentsDC_.emplace(queue_, static_cast(nTotalSegments_), static_cast(nLowerModules_ + 1)); - auto segmentsOccupancy = segmentsDC_->view(); - auto segments = segmentsDC_->view(); + auto segmentsOccupancy = segmentsDC_->view().segmentsOccupancy(); + auto segments = segmentsDC_->view().segments(); auto nSegments_view = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()); auto totOccupancySegments_view = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.totOccupancySegments()); @@ -325,11 +322,11 @@ void LSTEvent::createSegmentsWithModuleMap() { alpaka::exec(queue_, createSegments_workDiv, CreateSegments{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->view(), - segmentsDC_->view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + miniDoubletsDC_->const_view().miniDoubletsOccupancy(), + segmentsDC_->view().segments(), + segmentsDC_->view().segmentsOccupancy(), rangesDC_->const_view(), ptCut_); @@ -338,8 +335,8 @@ void LSTEvent::createSegmentsWithModuleMap() { alpaka::exec(queue_, addSegmentRangesToEventExplicit_workDiv, AddSegmentRangesToEventExplicit{}, - modules_.const_view(), - segmentsDC_->view(), + modules_.const_view().modules(), + segmentsDC_->view().segmentsOccupancy(), rangesDC_->view()); if (addObjects_) { @@ -354,9 +351,9 @@ void LSTEvent::createTriplets() { alpaka::exec(queue_, countSegConn_wd, CountSegmentConnections{}, - modules_.const_view(), - segmentsDC_->view(), - segmentsDC_->const_view(), + modules_.const_view().modules(), + segmentsDC_->view().segments(), + segmentsDC_->const_view().segmentsOccupancy(), rangesDC_->const_view()); auto const createTripletArrayRanges_workDiv = cms::alpakatools::make_workdiv(1, 1024); @@ -364,10 +361,10 @@ void LSTEvent::createTriplets() { alpaka::exec(queue_, createTripletArrayRanges_workDiv, CreateTripletArrayRanges{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->view(), - segmentsDC_->const_view(), - segmentsDC_->const_view()); + segmentsDC_->const_view().segments(), + segmentsDC_->const_view().segmentsOccupancy()); // TODO: Why are we pulling this back down only to put it back on the device in a new struct? auto rangesOccupancy = rangesDC_->view(); @@ -376,17 +373,15 @@ void LSTEvent::createTriplets() { alpaka::memcpy(queue_, maxTriplets_buf_h, maxTriplets_buf_d); alpaka::wait(queue_); // wait to get the value before using it - std::array const triplets_sizes{ - {static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)}}; - tripletsDC_.emplace(triplets_sizes, queue_); + tripletsDC_.emplace(queue_, static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)); - auto tripletsOccupancy = tripletsDC_->view(); + auto tripletsOccupancy = tripletsDC_->view().tripletsOccupancy(); auto nTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets()); alpaka::memset(queue_, nTriplets_view, 0u); auto totOccupancyTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.totOccupancyTriplets()); alpaka::memset(queue_, totOccupancyTriplets_view, 0u); - auto triplets = tripletsDC_->view(); + auto triplets = tripletsDC_->view().triplets(); auto partOfPT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT5()); alpaka::memset(queue_, partOfPT5_view, 0u); auto partOfT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfT5()); @@ -406,12 +401,12 @@ void LSTEvent::createTriplets() { // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); auto nSegments_buf_d = cms::alpakatools::make_device_view( - queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + queue_, segmentsDC_->const_view().segmentsOccupancy().nSegments(), nLowerModules_); alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_); // ... same for module_nConnectedModules // FIXME: replace by ES host data - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); auto module_nConnectedModules_buf_d = cms::alpakatools::make_device_view(queue_, modules.nConnectedModules(), nLowerModules_); // only lower modules @@ -445,12 +440,12 @@ void LSTEvent::createTriplets() { alpaka::exec(queue_, createTriplets_workDiv, CreateTriplets{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - segmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + segmentsDC_->const_view().segmentsOccupancy(), + tripletsDC_->view().triplets(), + tripletsDC_->view().tripletsOccupancy(), rangesDC_->const_view(), index_gpu_buf.data(), nonZeroModules, @@ -462,8 +457,8 @@ void LSTEvent::createTriplets() { alpaka::exec(queue_, addTripletRangesToEventExplicit_workDiv, AddTripletRangesToEventExplicit{}, - modules_.const_view(), - tripletsDC_->const_view(), + modules_.const_view().modules(), + tripletsDC_->const_view().tripletsOccupancy(), rangesDC_->view()); if (addObjects_) { @@ -486,10 +481,10 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, crossCleanpT3_workDiv, CrossCleanpT3{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->const_view(), pixelTripletsDC_->view(), - lstInputDC_->const_view(), + lstInputDC_->const_view().pixelSeeds(), pixelQuintupletsDC_->const_view()); auto const addpT3asTrackCandidates_workDiv = cms::alpakatools::make_workdiv(1, 512); @@ -501,7 +496,7 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) pixelTripletsDC_->const_view(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), - lstInputDC_->const_view(), + lstInputDC_->const_view().pixelSeeds(), rangesDC_->const_view()); // Pull nEligibleT5Modules from the device. @@ -520,8 +515,8 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, removeDupQuintupletsBeforeTC_workDiv, RemoveDupQuintupletsBeforeTC{}, - quintupletsDC_->view(), - quintupletsDC_->view(), + quintupletsDC_->view().quintuplets(), + quintupletsDC_->view().quintupletsOccupancy(), rangesDC_->const_view()); constexpr int threadsPerBlock = 32; @@ -531,9 +526,9 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, crossCleanT5_workDiv, CrossCleanT5{}, - modules_.const_view(), - quintupletsDC_->view(), - quintupletsDC_->const_view(), + modules_.const_view().modules(), + quintupletsDC_->view().quintuplets(), + quintupletsDC_->const_view().quintupletsOccupancy(), pixelQuintupletsDC_->const_view(), pixelTripletsDC_->const_view(), rangesDC_->const_view()); @@ -544,8 +539,8 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) addT5asTrackCandidate_workDiv, AddT5asTrackCandidate{}, nLowerModules_, - quintupletsDC_->const_view(), - quintupletsDC_->const_view(), + quintupletsDC_->const_view().quintuplets(), + quintupletsDC_->const_view().quintupletsOccupancy(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), rangesDC_->const_view()); @@ -563,8 +558,8 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, removeDupQuadrupletsBeforeTC_workDiv, RemoveDupQuadrupletsBeforeTC{}, - quadrupletsDC_->view(), - quadrupletsDC_->view(), + quadrupletsDC_->view().quadruplets(), + quadrupletsDC_->view().quadrupletsOccupancy(), rangesDC_->const_view()); auto const crossCleanT4_workDiv = cms::alpakatools::make_workdiv( @@ -573,17 +568,17 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, crossCleanT4_workDiv, CrossCleanT4{}, - modules_.const_view(), - quadrupletsDC_->view(), - quadrupletsDC_->const_view(), + modules_.const_view().modules(), + quadrupletsDC_->view().quadruplets(), + quadrupletsDC_->const_view().quadrupletsOccupancy(), pixelQuintupletsDC_->const_view(), pixelTripletsDC_->const_view(), - quintupletsDC_->const_view(), + quintupletsDC_->const_view().quintuplets(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), - miniDoubletsDC_->view(), - segmentsDC_->view(), - tripletsDC_->view(), + miniDoubletsDC_->view().miniDoublets(), + segmentsDC_->view().segments(), + tripletsDC_->view().triplets(), rangesDC_->const_view()); auto const addT4asTrackCandidate_workDiv = cms::alpakatools::make_workdiv({8, 10}, {8, 128}); @@ -592,9 +587,9 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) addT4asTrackCandidate_workDiv, AddT4asTrackCandidate{}, nLowerModules_, - quadrupletsDC_->view(), - quadrupletsDC_->const_view(), - tripletsDC_->const_view(), + quadrupletsDC_->view().quadruplets(), + quadrupletsDC_->const_view().quadrupletsOccupancy(), + tripletsDC_->const_view().triplets(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), rangesDC_->const_view()); @@ -605,9 +600,9 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, - modules_.const_view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + modules_.const_view().modules(), + segmentsDC_->const_view().segmentsOccupancy(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), true); } @@ -617,19 +612,19 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, crossCleanpLS_workDiv, CrossCleanpLS{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->const_view(), pixelTripletsDC_->const_view(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), - segmentsDC_->const_view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + segmentsDC_->const_view().segments(), + segmentsDC_->const_view().segmentsOccupancy(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), - miniDoubletsDC_->const_view(), - lstInputDC_->const_view(), - quintupletsDC_->const_view(), - quadrupletsDC_->const_view()); + miniDoubletsDC_->const_view().miniDoublets(), + lstInputDC_->const_view().hits(), + quintupletsDC_->const_view().quintuplets(), + quadrupletsDC_->const_view().quadruplets()); auto const addpLSasTrackCandidate_workDiv = cms::alpakatools::make_workdiv(max_blocks, 384); @@ -639,8 +634,8 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) nLowerModules_, trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + segmentsDC_->const_view().segmentsOccupancy(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->const_view(), tc_pls_triplets); @@ -657,10 +652,10 @@ void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) alpaka::exec(queue_, wd, ExtendTrackCandidatesFromDupT5{}, - modules_.const_view(), + modules_.const_view().modules(), rangesDC_->const_view(), - quintupletsDC_->const_view(), - quintupletsDC_->const_view(), + quintupletsDC_->const_view().quintuplets(), + quintupletsDC_->const_view().quintupletsOccupancy(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view()); @@ -713,8 +708,8 @@ void LSTEvent::createPixelTriplets() { cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->totOccupancyPixelTriplets()); alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u); } - SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); - PixelSeedsConst pixelSeeds = lstInputDC_->const_view(); + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view().segmentsOccupancy(); + PixelSeedsConst pixelSeeds = lstInputDC_->const_view().pixelSeeds(); auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); @@ -794,15 +789,15 @@ void LSTEvent::createPixelTriplets() { alpaka::exec(queue_, createPixelTripletsFromMap_workDiv, CreatePixelTripletsFromMap{}, - modules_.const_view(), - modules_.const_view(), + modules_.const_view().modules(), + modules_.const_view().modulesPixel(), rangesDC_->const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->const_view(), + tripletsDC_->view().triplets(), + tripletsDC_->const_view().tripletsOccupancy(), pixelTripletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), @@ -833,11 +828,11 @@ void LSTEvent::createQuintuplets() { alpaka::exec(queue_, countConn_workDiv, CountTripletConnections{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->const_view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + tripletsDC_->view().triplets(), + tripletsDC_->const_view().tripletsOccupancy(), rangesDC_->const_view(), ptCut_); @@ -846,10 +841,10 @@ void LSTEvent::createQuintuplets() { alpaka::exec(queue_, createEligibleModulesListForQuintuplets_workDiv, CreateEligibleModulesListForQuintuplets{}, - modules_.const_view(), - tripletsDC_->const_view(), + modules_.const_view().modules(), + tripletsDC_->const_view().tripletsOccupancy(), rangesDC_->view(), - tripletsDC_->view()); + tripletsDC_->view().triplets()); auto nEligibleT5Modules_buf = cms::alpakatools::make_host_buffer(queue_); auto nTotalQuintuplets_buf = cms::alpakatools::make_host_buffer(queue_); @@ -864,15 +859,14 @@ void LSTEvent::createQuintuplets() { auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); if (!quintupletsDC_) { - std::array const quintuplets_sizes{{static_cast(nTotalQuintuplets), static_cast(nLowerModules_)}}; - quintupletsDC_.emplace(quintuplets_sizes, queue_); - auto quintupletsOccupancy = quintupletsDC_->view(); + quintupletsDC_.emplace(queue_, static_cast(nTotalQuintuplets), static_cast(nLowerModules_)); + auto quintupletsOccupancy = quintupletsDC_->view().quintupletsOccupancy(); auto nQuintuplets_view = cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.nQuintuplets()); alpaka::memset(queue_, nQuintuplets_view, 0u); auto totOccupancyQuintuplets_view = cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.totOccupancyQuintuplets()); alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u); - auto quintuplets = quintupletsDC_->view(); + auto quintuplets = quintupletsDC_->view().quintuplets(); auto isDup_view = cms::alpakatools::make_device_view(queue_, quintuplets.isDup()); alpaka::memset(queue_, isDup_view, 0u); auto tightCutFlag_view = cms::alpakatools::make_device_view(queue_, quintuplets.tightCutFlag()); @@ -887,13 +881,13 @@ void LSTEvent::createQuintuplets() { alpaka::exec(queue_, createQuintuplets_workDiv, CreateQuintuplets{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->const_view(), - quintupletsDC_->view(), - quintupletsDC_->view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + tripletsDC_->view().triplets(), + tripletsDC_->const_view().tripletsOccupancy(), + quintupletsDC_->view().quintuplets(), + quintupletsDC_->view().quintupletsOccupancy(), rangesDC_->const_view(), nEligibleT5Modules, ptCut_); @@ -904,9 +898,9 @@ void LSTEvent::createQuintuplets() { alpaka::exec(queue_, removeDupQuintupletsAfterBuild_workDiv, RemoveDupQuintupletsAfterBuild{}, - modules_.const_view(), - quintupletsDC_->view(), - quintupletsDC_->const_view(), + modules_.const_view().modules(), + quintupletsDC_->view().quintuplets(), + quintupletsDC_->const_view().quintupletsOccupancy(), rangesDC_->const_view()); auto const addQuintupletRangesToEventExplicit_workDiv = cms::alpakatools::make_workdiv(1, 1024); @@ -914,8 +908,8 @@ void LSTEvent::createQuintuplets() { alpaka::exec(queue_, addQuintupletRangesToEventExplicit_workDiv, AddQuintupletRangesToEventExplicit{}, - modules_.const_view(), - quintupletsDC_->const_view(), + modules_.const_view().modules(), + quintupletsDC_->const_view().quintupletsOccupancy(), rangesDC_->view()); if (addObjects_) { @@ -930,9 +924,9 @@ void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) { alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, - modules_.const_view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + modules_.const_view().modules(), + segmentsDC_->const_view().segmentsOccupancy(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), false); } @@ -955,8 +949,8 @@ void LSTEvent::createPixelQuintuplets() { trackCandidatesExtendedDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); trackCandidatesExtendedDC_->zeroInitialise(queue_); } - SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); - PixelSeedsConst pixelSeeds = lstInputDC_->const_view(); + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view().segmentsOccupancy(); + PixelSeedsConst pixelSeeds = lstInputDC_->const_view().pixelSeeds(); auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); @@ -1037,15 +1031,15 @@ void LSTEvent::createPixelQuintuplets() { alpaka::exec(queue_, createPixelQuintupletsFromMap_workDiv, CreatePixelQuintupletsFromMap{}, - modules_.const_view(), - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - lstInputDC_->const_view(), + modules_.const_view().modules(), + modules_.const_view().modulesPixel(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + lstInputDC_->const_view().pixelSeeds(), pixelSegmentsDC_->view(), - tripletsDC_->view(), - quintupletsDC_->view(), - quintupletsDC_->const_view(), + tripletsDC_->view().triplets(), + quintupletsDC_->view().quintuplets(), + quintupletsDC_->const_view().quintupletsOccupancy(), pixelQuintupletsDC_->view(), connectedPixelSize_dev_buf.data(), connectedPixelIndex_dev_buf.data(), @@ -1070,7 +1064,7 @@ void LSTEvent::createPixelQuintuplets() { pixelQuintupletsDC_->const_view(), trackCandidatesBaseDC_->view(), trackCandidatesExtendedDC_->view(), - lstInputDC_->const_view(), + lstInputDC_->const_view().pixelSeeds(), rangesDC_->const_view()); #ifdef WARNINGS @@ -1091,11 +1085,11 @@ void LSTEvent::createQuadruplets() { alpaka::exec(queue_, countLSConn_workDiv, CountTripletLSConnections{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->const_view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + tripletsDC_->view().triplets(), + tripletsDC_->const_view().tripletsOccupancy(), rangesDC_->const_view(), ptCut_); @@ -1104,10 +1098,10 @@ void LSTEvent::createQuadruplets() { alpaka::exec(queue_, createEligibleModulesListForQuadruplets_workDiv, CreateEligibleModulesListForQuadruplets{}, - modules_.const_view(), - tripletsDC_->const_view(), + modules_.const_view().modules(), + tripletsDC_->const_view().tripletsOccupancy(), rangesDC_->view(), - tripletsDC_->view()); + tripletsDC_->view().triplets()); auto nEligibleT4Modules_buf = cms::alpakatools::make_host_buffer(queue_); auto nTotalQuadruplets_buf = cms::alpakatools::make_host_buffer(queue_); @@ -1122,16 +1116,15 @@ void LSTEvent::createQuadruplets() { auto nTotalQuadruplets = *nTotalQuadruplets_buf.data(); if (!quadrupletsDC_) { - std::array const quadruplets_sizes{{static_cast(nTotalQuadruplets), static_cast(nLowerModules_)}}; - quadrupletsDC_.emplace(quadruplets_sizes, queue_); - auto quadrupletsOccupancy = quadrupletsDC_->view(); + quadrupletsDC_.emplace(queue_, static_cast(nTotalQuadruplets), static_cast(nLowerModules_)); + auto quadrupletsOccupancy = quadrupletsDC_->view().quadrupletsOccupancy(); auto nQuadruplets_view = cms::alpakatools::make_device_view( queue_, quadrupletsOccupancy.nQuadruplets(), quadrupletsOccupancy.metadata().size()); alpaka::memset(queue_, nQuadruplets_view, 0u); auto totOccupancyQuadruplets_view = cms::alpakatools::make_device_view( queue_, quadrupletsOccupancy.totOccupancyQuadruplets(), quadrupletsOccupancy.metadata().size()); alpaka::memset(queue_, totOccupancyQuadruplets_view, 0u); - auto quadruplets = quadrupletsDC_->view(); + auto quadruplets = quadrupletsDC_->view().quadruplets(); auto isDup_view = cms::alpakatools::make_device_view(queue_, quadruplets.isDup(), quadruplets.metadata().size()); alpaka::memset(queue_, isDup_view, 0u); } @@ -1142,13 +1135,13 @@ void LSTEvent::createQuadruplets() { alpaka::exec(queue_, createQuadruplets_workDiv, CreateQuadruplets{}, - modules_.const_view(), - miniDoubletsDC_->const_view(), - segmentsDC_->const_view(), - tripletsDC_->view(), - tripletsDC_->const_view(), - quadrupletsDC_->view(), - quadrupletsDC_->view(), + modules_.const_view().modules(), + miniDoubletsDC_->const_view().miniDoublets(), + segmentsDC_->const_view().segments(), + tripletsDC_->view().triplets(), + tripletsDC_->const_view().tripletsOccupancy(), + quadrupletsDC_->view().quadruplets(), + quadrupletsDC_->view().quadrupletsOccupancy(), rangesDC_->const_view(), nEligibleT4Modules, ptCut_); @@ -1159,9 +1152,9 @@ void LSTEvent::createQuadruplets() { alpaka::exec(queue_, removeDupQuadrupletsAfterBuild_workDiv, RemoveDupQuadrupletsAfterBuild{}, - modules_.const_view(), - quadrupletsDC_->view(), - quadrupletsDC_->const_view(), + modules_.const_view().modules(), + quadrupletsDC_->view().quadruplets(), + quadrupletsDC_->const_view().quadrupletsOccupancy(), rangesDC_->const_view()); auto const addQuadrupletRangesToEventExplicit_workDiv = cms::alpakatools::make_workdiv(1, 1024); @@ -1169,8 +1162,8 @@ void LSTEvent::createQuadruplets() { alpaka::exec(queue_, addQuadrupletRangesToEventExplicit_workDiv, AddQuadrupletRangesToEventExplicit{}, - modules_.const_view(), - quadrupletsDC_->const_view(), + modules_.const_view().modules(), + quadrupletsDC_->const_view().quadrupletsOccupancy(), rangesDC_->view()); if (addObjects_) { @@ -1180,12 +1173,12 @@ void LSTEvent::createQuadruplets() { void LSTEvent::addMiniDoubletsToEventExplicit() { auto nMDsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); - auto mdsOccupancy = miniDoubletsDC_->const_view(); + auto mdsOccupancy = miniDoubletsDC_->const_view().miniDoubletsOccupancy(); auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_); - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); // FIXME: replace by ES host data auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); @@ -1218,10 +1211,10 @@ void LSTEvent::addMiniDoubletsToEventExplicit() { void LSTEvent::addSegmentsToEventExplicit() { auto nSegmentsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); auto nSegments_buf = cms::alpakatools::make_device_view( - queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + queue_, segmentsDC_->const_view().segmentsOccupancy().nSegments(), nLowerModules_); alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_); - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); // FIXME: replace by ES host data auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); @@ -1252,13 +1245,13 @@ void LSTEvent::addSegmentsToEventExplicit() { } void LSTEvent::addQuintupletsToEventExplicit() { - auto quintupletsOccupancy = quintupletsDC_->const_view(); + auto quintupletsOccupancy = quintupletsDC_->const_view().quintupletsOccupancy(); auto nQuintuplets_view = cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.nQuintuplets(), nLowerModules_); auto nQuintupletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view); - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); // FIXME: replace by ES host data auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); @@ -1296,12 +1289,12 @@ void LSTEvent::addQuintupletsToEventExplicit() { } void LSTEvent::addTripletsToEventExplicit() { - auto tripletsOccupancy = tripletsDC_->const_view(); + auto tripletsOccupancy = tripletsDC_->const_view().tripletsOccupancy(); auto nTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), nLowerModules_); auto nTripletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view); - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); // FIXME: replace by ES host data auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); @@ -1332,13 +1325,13 @@ void LSTEvent::addTripletsToEventExplicit() { } void LSTEvent::addQuadrupletsToEventExplicit() { - auto quadrupletsOccupancy = quadrupletsDC_->const_view(); + auto quadrupletsOccupancy = quadrupletsDC_->const_view().quadrupletsOccupancy(); auto nQuadruplets_view = cms::alpakatools::make_device_view(queue_, quadrupletsOccupancy.nQuadruplets(), nLowerModules_); auto nQuadrupletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); alpaka::memcpy(queue_, nQuadrupletsCPU_buf, nQuadruplets_view); - auto modules = modules_.const_view(); + auto modules = modules_.const_view().modules(); // FIXME: replace by ES host data auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); @@ -1577,17 +1570,16 @@ unsigned int LSTEvent::getNumberOfQuadrupletsByLayerEndcap(unsigned int layer) { template typename TSoA::ConstView LSTEvent::getInput(bool sync) { if constexpr (std::is_same_v) { - return lstInputDC_->const_view(); + return LSTInputViewAccessor::get(lstInputDC_->const_view()); } else { // In case getTrimmedInput was called first - if (!lstInputHC_ || lstInputHC_->sizes()[1] == 0) { + if (!lstInputHC_ || lstInputHC_->size()[1] == 0) { lstInputHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( - queue_, *lstInputDC_)); + cms::alpakatools::CopyToHost>::copyAsync(queue_, *lstInputDC_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return lstInputHC_->const_view(); + return LSTInputViewAccessor::get(lstInputDC_->const_view()); } } template HitsBaseConst LSTEvent::getInput(bool); @@ -1596,16 +1588,15 @@ template PixelSeedsConst LSTEvent::getInput(bool); template typename TSoA::ConstView LSTEvent::getHits(bool sync) { if constexpr (std::is_same_v) { - return hitsDC_->const_view(); + return HitsViewAccessor::get(hitsHC_->const_view()); } else { if (!hitsHC_) { hitsHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( - queue_, *hitsDC_)); + cms::alpakatools::CopyToHost>::copyAsync(queue_, *hitsDC_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return hitsHC_->const_view(); + return HitsViewAccessor::get(hitsHC_->const_view()); } } template HitsExtendedConst LSTEvent::getHits(bool); @@ -1630,17 +1621,16 @@ template ObjectRangesConst LSTEvent::getRanges<>(bool); template typename TSoA::ConstView LSTEvent::getMiniDoublets(bool sync) { if constexpr (std::is_same_v) { - return miniDoubletsDC_->const_view(); + return MiniDoubletsViewAccessor::get(miniDoubletsDC_->const_view()); } else { if (!miniDoubletsHC_) { miniDoubletsHC_.emplace( - cms::alpakatools::CopyToHost< - PortableMultiCollection>::copyAsync(queue_, - *miniDoubletsDC_)); + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *miniDoubletsDC_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return miniDoubletsHC_->const_view(); + return MiniDoubletsViewAccessor::get(miniDoubletsHC_->const_view()); } } template MiniDoubletsConst LSTEvent::getMiniDoublets(bool); @@ -1649,16 +1639,15 @@ template MiniDoubletsOccupancyConst LSTEvent::getMiniDoublets typename TSoA::ConstView LSTEvent::getSegments(bool sync) { if constexpr (std::is_same_v) { - return segmentsDC_->const_view(); + return SegmentsViewAccessor::get(segmentsDC_->const_view()); } else { if (!segmentsHC_) { - segmentsHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( - queue_, *segmentsDC_)); + segmentsHC_.emplace(cms::alpakatools::CopyToHost>::copyAsync( + queue_, *segmentsDC_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return segmentsHC_->const_view(); + return SegmentsViewAccessor::get(segmentsHC_->const_view()); } } template SegmentsConst LSTEvent::getSegments(bool); @@ -1684,18 +1673,16 @@ template PixelSegmentsConst LSTEvent::getPixelSegments<>(bool); template typename TSoA::ConstView LSTEvent::getTriplets(bool sync) { if constexpr (std::is_same_v) { - return tripletsDC_->const_view(); + return TripletsViewAccessor::get(tripletsDC_->const_view()); } else { if (!tripletsHC_) { - tripletsHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( - queue_, *tripletsDC_)); - + tripletsHC_.emplace(cms::alpakatools::CopyToHost>::copyAsync( + queue_, *tripletsDC_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } } - return tripletsHC_->const_view(); + return TripletsViewAccessor::get(tripletsHC_->const_view()); } template TripletsConst LSTEvent::getTriplets(bool); template TripletsOccupancyConst LSTEvent::getTriplets(bool); @@ -1703,18 +1690,17 @@ template TripletsOccupancyConst LSTEvent::getTriplets(bool template typename TSoA::ConstView LSTEvent::getQuadruplets(bool sync) { if constexpr (std::is_same_v) { - return quadrupletsDC_->const_view(); + return QuadrupletsViewAccessor::get(quadrupletsDC_->const_view()); } else { if (!quadrupletsHC_) { quadrupletsHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( + cms::alpakatools::CopyToHost>::copyAsync( queue_, *quadrupletsDC_)); - if (sync) alpaka::wait(queue_); // host consumers expect filled data } } - return quadrupletsHC_->const_view(); + return QuadrupletsViewAccessor::get(quadrupletsHC_->const_view()); } template QuadrupletsConst LSTEvent::getQuadruplets(bool); template QuadrupletsOccupancyConst LSTEvent::getQuadruplets(bool); @@ -1722,18 +1708,17 @@ template QuadrupletsOccupancyConst LSTEvent::getQuadruplets typename TSoA::ConstView LSTEvent::getQuintuplets(bool sync) { if constexpr (std::is_same_v) { - return quintupletsDC_->const_view(); + return QuintupletsViewAccessor::get(quintupletsDC_->const_view()); } else { if (!quintupletsHC_) { quintupletsHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( + cms::alpakatools::CopyToHost>::copyAsync( queue_, *quintupletsDC_)); - if (sync) alpaka::wait(queue_); // host consumers expect filled data } } - return quintupletsHC_->const_view(); + return QuintupletsViewAccessor::get(quintupletsHC_->const_view()); } template QuintupletsConst LSTEvent::getQuintuplets(bool); template QuintupletsOccupancyConst LSTEvent::getQuintuplets(bool); @@ -1816,16 +1801,15 @@ std::unique_ptr LSTEvent::releaseTrackCandi template typename TSoA::ConstView LSTEvent::getModules(bool sync) { if constexpr (std::is_same_v) { - return modules_.const_view(); + return ModulesViewAccessor::get(modules_.const_view()); } else { if (!modulesHC_) { modulesHC_.emplace( - cms::alpakatools::CopyToHost>::copyAsync( - queue_, modules_)); + cms::alpakatools::CopyToHost>::copyAsync(queue_, modules_)); if (sync) alpaka::wait(queue_); // host consumers expect filled data } - return modulesHC_->const_view(); + return ModulesViewAccessor::get(modulesHC_->const_view()); } } template ModulesConst LSTEvent::getModules(bool); diff --git a/RecoTracker/LSTCore/src/classes.cc b/RecoTracker/LSTCore/src/classes.cc index c92a9118b44ba..21680eb42b810 100644 --- a/RecoTracker/LSTCore/src/classes.cc +++ b/RecoTracker/LSTCore/src/classes.cc @@ -3,6 +3,6 @@ #include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" #ifndef LST_STANDALONE -SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(lst::LSTInputHostCollection); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(lst::LSTInputHostCollection); SET_PORTABLEHOSTCOLLECTION_READ_RULES(lst::TrackCandidatesBaseHostCollection); #endif diff --git a/RecoTracker/LSTCore/src/classes_def.xml b/RecoTracker/LSTCore/src/classes_def.xml index a86cf567d14f5..1974edaafe56d 100644 --- a/RecoTracker/LSTCore/src/classes_def.xml +++ b/RecoTracker/LSTCore/src/classes_def.xml @@ -1,17 +1,10 @@ + + + + - - - - - - - - - - - diff --git a/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h b/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h index 4cc5c3ffcaccf..fe9c3c2695dc2 100644 --- a/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h +++ b/RecoTracker/PixelSeeding/interface/CAGeometryDevice.h @@ -11,6 +11,6 @@ namespace reco { template - using CAGeometryDevice = PortableDeviceMultiCollection; + using CAGeometryDevice = PortableDeviceCollection; } #endif // RecoTracker_PixelSeeding_interface_CAGeometryDevice_H diff --git a/RecoTracker/PixelSeeding/interface/CAGeometryHost.h b/RecoTracker/PixelSeeding/interface/CAGeometryHost.h index 0a231b80ea6bf..48c59bce7007d 100644 --- a/RecoTracker/PixelSeeding/interface/CAGeometryHost.h +++ b/RecoTracker/PixelSeeding/interface/CAGeometryHost.h @@ -10,6 +10,6 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" namespace reco { - using CAGeometryHost = PortableHostMultiCollection; + using CAGeometryHost = PortableHostCollection; } #endif // RecoTracker_PixelSeeding_interface_CAGeometryHost_H diff --git a/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h b/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h index 3067a8cd5bbce..3f02cde4b233b 100644 --- a/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h +++ b/RecoTracker/PixelSeeding/interface/CAGeometrySoA.h @@ -4,6 +4,7 @@ #include #include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoABlocks.h" #include "DataFormats/GeometrySurface/interface/SOARotation.h" namespace reco { @@ -48,6 +49,11 @@ namespace reco { SOA_COLUMN(float, maxDR), SOA_COLUMN(float, ptCuts)) + GENERATE_SOA_BLOCKS(CALayoutTemplate, + SOA_BLOCK(layers, CALayersLayout), + SOA_BLOCK(graph, CAGraphLayout), + SOA_BLOCK(modules, CAModulesLayout)) + using CALayersSoA = CALayersLayout<>; using CALayersSoAView = CALayersSoA::View; using CALayersSoAConstView = CALayersSoA::ConstView; @@ -60,5 +66,9 @@ namespace reco { using CAModulesView = CAModulesSoA::View; using CAModulesConstView = CAModulesSoA::ConstView; + using CALayout = CALayoutTemplate<>; + using CALayoutView = CALayout::View; + using CALayoutConstView = CALayout::ConstView; + } // namespace reco #endif // RecoTracker_PixelSeeding_interface_CAGeometry_h diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc index 0a169a6365cec..729247ed9718d 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtuplet.cc @@ -259,11 +259,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif layerStarts[n_layers] = n_modules; - reco::CAGeometryHost product{{{n_layers + 1, n_pairs, n_modules}}, cms::alpakatools::host()}; + reco::CAGeometryHost product{cms::alpakatools::host(), n_layers + 1, n_pairs, n_modules}; - auto layerSoA = product.view(); - auto cellSoA = product.view<::reco::CAGraphSoA>(); - auto modulesSoA = product.view<::reco::CAModulesSoA>(); + auto layerSoA = product.view().layers(); + auto cellSoA = product.view().graph(); + auto modulesSoA = product.view().modules(); for (int i = 0; i < n_modules; ++i) { auto idx = moduleToindexInDets[i]; @@ -389,8 +389,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { << ") and all the starting pairs has BPix1 as inner layer.\nIt's useless " << "to run the CA. Returning with 0 tracks!"; auto& queue = iEvent.queue(); - reco::TracksSoACollection tracks({{0, 0}}, queue); - auto ntracks_d = cms::alpakatools::make_device_view(queue, tracks.view().nTracks()); + reco::TracksSoACollection tracks(queue, 0, 0); + auto ntracks_d = cms::alpakatools::make_device_view(queue, tracks.view().tracks().nTracks()); alpaka::memset(queue, ntracks_d, 0); iEvent.emplace(tokenTrack_, std::move(tracks)); } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc index 299b5208409e3..702839c0d1d5c 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGenerator.cc @@ -395,55 +395,48 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const int32_t H = m_params.algoParams_.avgHitsPerTrack_; - reco::TracksSoACollection tracks({{int(nTracks), int(nTracks * H)}}, queue); + reco::TracksSoACollection trackCollection(queue, static_cast(nTracks), static_cast(nTracks * H)); + + auto tracks = trackCollection.view().tracks(); + + auto trackingHits = hits_d.view().trackingHits(); + auto hitModules = hits_d.view().hitModules(); + + auto layers = geometry_d.view().layers(); + auto graph = geometry_d.view().graph(); + auto modules = geometry_d.view().modules(); // Don't bother if less than 2 this - if (hits_d.view().metadata().size() < 2) { + if (trackingHits.metadata().size() < 2) { const auto device = alpaka::getDev(queue); - auto ntracks_d = cms::alpakatools::make_device_view(device, tracks.view().nTracks()); + auto ntracks_d = cms::alpakatools::make_device_view(device, tracks.nTracks()); alpaka::memset(queue, ntracks_d, 0); - return tracks; + return trackCollection; } GPUKernels kernels( - m_params, hits_d.nHits(), hits_d.offsetBPIX2(), nDoublets, nTracks, geometry_d.view().metadata().size(), queue); - - kernels.prepareHits(hits_d.view(), hits_d.view<::reco::HitModuleSoA>(), geometry_d.view(), queue); - kernels.buildDoublets(hits_d.view(), - geometry_d.view<::reco::CAGraphSoA>(), - geometry_d.view<::reco::CALayersSoA>(), - hits_d.offsetBPIX2(), - queue); - kernels.launchKernels(hits_d.view(), - hits_d.offsetBPIX2(), - geometry_d.view().metadata().size(), - tracks.view(), - tracks.view(), - geometry_d.view<::reco::CALayersSoA>(), - geometry_d.view<::reco::CAGraphSoA>(), - queue); + m_params, hits_d.nHits(), hits_d.offsetBPIX2(), nDoublets, nTracks, layers.metadata().size(), queue); + + kernels.prepareHits(trackingHits, hitModules, layers, queue); + kernels.buildDoublets(trackingHits, graph, layers, hits_d.offsetBPIX2(), queue); + kernels.launchKernels( + trackingHits, hits_d.offsetBPIX2(), layers.metadata().size(), trackCollection.view(), layers, graph, queue); HelixFit fitter(bfield, m_params.algoParams_.fitNas4_); - fitter.allocate(kernels.tupleMultiplicity(), tracks.view(), kernels.hitContainer()); + fitter.allocate(kernels.tupleMultiplicity(), tracks, kernels.hitContainer()); if (m_params.algoParams_.useRiemannFit_) { - fitter.launchRiemannKernels(hits_d.view(), - geometry_d.view<::reco::CAModulesSoA>(), - hits_d.view().metadata().size(), - TrackerTraits::maxNumberOfQuadruplets, - queue); + fitter.launchRiemannKernels( + trackingHits, modules, trackingHits.metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); } else { - fitter.launchBrokenLineKernels(hits_d.view(), - geometry_d.view<::reco::CAModulesSoA>(), - hits_d.view().metadata().size(), - TrackerTraits::maxNumberOfQuadruplets, - queue); + fitter.launchBrokenLineKernels( + trackingHits, modules, trackingHits.metadata().size(), TrackerTraits::maxNumberOfQuadruplets, queue); } - kernels.classifyTuples(hits_d.view(), tracks.view(), queue); + kernels.classifyTuples(trackingHits, tracks, queue); #ifdef GPU_DEBUG alpaka::wait(queue); std::cout << "finished building pixel tracks on GPU" << std::endl; #endif - return tracks; + return trackCollection; } template class CAHitNtupletGenerator; diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc index 3b07ca6ebe1c2..3da0304b3f68d 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.dev.cc @@ -216,14 +216,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void CAHitNtupletGeneratorKernels::launchKernels(const HitsConstView &hh, uint32_t offsetBPIX2, uint16_t nLayers, - TkSoAView &tracks_view, - TkHitsSoAView &tracks_hits_view, + TkSoABlocksView &view, const reco::CALayersSoAConstView &ll, const reco::CAGraphSoAConstView &cc, Queue &queue) { using namespace caPixelDoublets; using namespace caHitNtupletGeneratorKernels; + auto tracks_view = view.tracks(); + auto tracks_hits_view = view.trackHits(); + uint32_t nhits = hh.metadata().size(); auto const maxDoublets = this->maxNumberOfDoublets_; auto const maxTuples = tracks_view.metadata().size(); @@ -388,8 +390,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv1D, Kernel_fillNLayers{}, - tracks_view, - tracks_hits_view, + view, this->device_layerStarts_->data(), nLayers, this->device_hitTuple_apc_); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h index 2634c1479b3f1..b8e35e5dabea0 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernels.h @@ -163,8 +163,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void launchKernels(const HitsConstView& hh, uint32_t offsetBPIX2, uint16_t nLayers, - TkSoAView& track_view, - TkHitsSoAView& track_hits_view, + TkSoABlocksView& view, const ::reco::CALayersSoAConstView& ll, const ::reco::CAGraphSoAConstView& cc, Queue& queue); diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h index f86c1656c0d5e..09111914614a0 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAHitNtupletGeneratorKernelsImpl.h @@ -729,26 +729,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::caHitNtupletGeneratorKernels { class Kernel_fillNLayers { public: ALPAKA_FN_ACC void operator()(Acc1D const &acc, - TkSoAView tracks_view, - TkHitSoAView track_hits_view, + TkSoABlocksView view, uint32_t const *__restrict__ layerStarts, uint16_t maxLayers, cms::alpakatools::AtomicPairCounter *apc) const { // clamp the number of tracks to the capacity of the SoA - auto ntracks = std::min(apc->get().first, tracks_view.metadata().size() - 1); + auto ntracks = std::min(apc->get().first, view.tracks().metadata().size() - 1); if (cms::alpakatools::once_per_grid(acc)) - tracks_view.nTracks() = ntracks; + view.tracks().nTracks() = ntracks; for (auto idx : cms::alpakatools::uniform_elements(acc, ntracks)) { - ALPAKA_ASSERT_ACC(reco::nHits(tracks_view, idx) >= 3); - tracks_view[idx].nLayers() = reco::nLayers(tracks_view, track_hits_view, maxLayers, layerStarts, idx); + ALPAKA_ASSERT_ACC(reco::nHits(view.tracks(), idx) >= 3); + view.tracks()[idx].nLayers() = reco::nLayers(view, maxLayers, layerStarts, idx); #ifdef CA_DEBUG printf("Kernel_fillNLayers %d %d %d - %d %d\n", idx, ntracks, - tracks_view[idx].nLayers(), + view.tracks()[idx].nLayers(), apc->get().first, - tracks_view.metadata().size() - 1); + view.tracks().metadata().size() - 1); #endif } } diff --git a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h index 10a1a876310b6..aeb2eb6fcef0f 100644 --- a/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h +++ b/RecoTracker/PixelSeeding/plugins/alpaka/CAStructures.h @@ -51,6 +51,7 @@ namespace caStructures { //Tracks data formats using TkSoAView = ::reco::TrackSoAView; using TkHitsSoAView = ::reco::TrackHitSoAView; + using TkSoABlocksView = ::reco::TrackBlocksView; //Indices for hits, tracks and cells using hindex_type = uint32_t; diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc index 6f4cc62edb662..c82effcf4a6d9 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackDumpAlpaka.cc @@ -17,7 +17,7 @@ template class PixelTrackDumpAlpakaT : public edm::global::EDAnalyzer<> { public: using TkSoAHost = reco::TracksHost; - using VertexSoAHost = ZVertexHost; + using VertexSoAHost = reco::ZVertexHost; explicit PixelTrackDumpAlpakaT(const edm::ParameterSet& iConfig); ~PixelTrackDumpAlpakaT() override = default; @@ -49,24 +49,24 @@ void PixelTrackDumpAlpakaT::analyze(edm::StreamID streamID, edm::Event const& iEvent, const edm::EventSetup& iSetup) const { auto const& tracks = iEvent.get(tokenSoATrack_); - assert(tracks.view().quality().data()); - assert(tracks.view().chi2().data()); - assert(tracks.view().nLayers().data()); - assert(tracks.view().eta().data()); - assert(tracks.view().pt().data()); - assert(tracks.view().state().data()); - assert(tracks.view().covariance().data()); - assert(tracks.view().nTracks()); + assert(tracks.view().tracks().quality().data()); + assert(tracks.view().tracks().chi2().data()); + assert(tracks.view().tracks().nLayers().data()); + assert(tracks.view().tracks().eta().data()); + assert(tracks.view().tracks().pt().data()); + assert(tracks.view().tracks().state().data()); + assert(tracks.view().tracks().covariance().data()); + assert(tracks.view().tracks().nTracks()); auto const& vertices = iEvent.get(tokenSoAVertex_); - assert(vertices.view().idv().data()); - assert(vertices.view().zv().data()); - assert(vertices.view().wv().data()); - assert(vertices.view().chi2().data()); - assert(vertices.view().ptv2().data()); - assert(vertices.view().ndof().data()); - assert(vertices.view().sortInd().data()); - assert(vertices.view().nvFinal()); + assert(vertices.view().zvertexTracks().idv().data()); + assert(vertices.view().zvertex().zv().data()); + assert(vertices.view().zvertex().wv().data()); + assert(vertices.view().zvertex().chi2().data()); + assert(vertices.view().zvertex().ptv2().data()); + assert(vertices.view().zvertexTracks().ndof().data()); + assert(vertices.view().zvertex().sortInd().data()); + assert(vertices.view().zvertex().nvFinal()); } using PixelTrackDumpAlpakaPhase1 = PixelTrackDumpAlpakaT; diff --git a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc index a86cf5134d748..09d1f829966e4 100644 --- a/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc +++ b/RecoTracker/PixelTrackFitting/plugins/PixelTrackProducerFromSoAAlpaka.cc @@ -363,11 +363,10 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, hits.reserve(5); //TODO move to a configurable parameter? auto const &tsoa = iEvent.get(trackSoAToken_); - auto const quality = tsoa.view().quality(); - auto const hitOffs = tsoa.view().hitOffsets(); - auto const hitIdxs = tsoa.template view().id(); - // auto const &hitIndices = tsoa.view().hitIndices(); - auto nTracks = tsoa.view().nTracks(); + auto const quality = tsoa.view().tracks().quality(); + auto const hitOffs = tsoa.view().tracks().hitOffsets(); + auto const hitIdxs = tsoa.view().trackHits().id(); + auto nTracks = tsoa.view().tracks().nTracks(); tracks.reserve(nTracks); @@ -379,7 +378,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, // sort good-quality tracks by pt, keep bad-quality tracks at the bottom std::sort(sortIdxs.begin(), sortIdxs.end(), [&](int32_t const i1, int32_t const i2) { if (quality[i1] >= minQuality_ && quality[i2] >= minQuality_) - return tsoa.view()[i1].pt() > tsoa.view()[i2].pt(); + return tsoa.view().tracks()[i1].pt() > tsoa.view().tracks()[i2].pt(); else return quality[i1] > quality[i2]; }); @@ -388,7 +387,7 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, // loop over (sorted) tracks for (const auto &it : sortIdxs) { - auto nHits = reco::nHits(tsoa.view(), it); + auto nHits = reco::nHits(tsoa.view().tracks(), it); assert(nHits >= 3); auto q = quality[it]; @@ -448,12 +447,12 @@ void PixelTrackProducerFromSoAAlpaka::produce(edm::StreamID streamID, ++nt; // mind: this values are respect the beamspot! - float chi2 = tsoa.view()[it].chi2(); - float phi = reco::phi(tsoa.view(), it); + float chi2 = tsoa.view().tracks()[it].chi2(); + float phi = reco::phi(tsoa.view().tracks(), it); riemannFit::Vector5d ipar, opar; riemannFit::Matrix5d icov, ocov; - reco::copyToDense(tsoa.view(), ipar, icov, it); + reco::copyToDense(tsoa.view().tracks(), ipar, icov, it); riemannFit::transformToPerigeePlane(ipar, icov, opar, ocov); LocalTrajectoryParameters lpar(opar(0), opar(1), opar(2), opar(3), opar(4), 1.); diff --git a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc index cf3d8aecdaafb..8c8a553345591 100644 --- a/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc +++ b/RecoVertex/PixelVertexFinding/plugins/PixelVertexProducerFromSoAAlpaka.cc @@ -32,7 +32,7 @@ class PixelVertexProducerFromSoAAlpaka : public edm::global::EDProducer<> { private: void produce(edm::StreamID streamID, edm::Event &iEvent, const edm::EventSetup &iSetup) const override; - edm::EDGetTokenT tokenVertex_; + edm::EDGetTokenT tokenVertex_; edm::EDGetTokenT tokenBeamSpot_; edm::EDGetTokenT tokenTracks_; edm::EDGetTokenT tokenIndToEdm_; @@ -82,7 +82,7 @@ void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, auto const &soa = iEvent.get(tokenVertex_); - int nv = soa.view().nvFinal(); + int nv = soa.view().zvertex().nvFinal(); #ifdef PIXVERTEX_DEBUG_PRODUCE std::cout << "converting " << nv << " vertices " @@ -91,20 +91,20 @@ void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, std::set uind; // for verifing index consistency for (int j = nv - 1; j >= 0; --j) { - auto i = soa.view()[j].sortInd(); // on gpu sorted in ascending order.... + auto i = soa.view().zvertex()[j].sortInd(); // on gpu sorted in ascending order.... assert(i < nv); uind.insert(i); assert(itrk.empty()); - auto z = soa.view()[i].zv(); + auto z = soa.view().zvertex()[i].zv(); auto x = x0 + dxdz * z; auto y = y0 + dydz * z; z += z0; reco::Vertex::Error err; - err(2, 2) = 1.f / soa.view()[i].wv(); + err(2, 2) = 1.f / soa.view().zvertex()[i].wv(); err(2, 2) *= 2.; // artifically inflate error //Copy also the tracks (no intention to be efficient....) for (auto k = 0U; k < indToEdm.size(); ++k) { - if (soa.view()[k].idv() == int16_t(i)) + if (soa.view().zvertexTracks()[k].idv() == int16_t(i)) itrk.push_back(k); } auto nt = itrk.size(); @@ -119,7 +119,7 @@ void PixelVertexProducerFromSoAAlpaka::produce(edm::StreamID streamID, continue; } // remove outliers (*vertexes).emplace_back( - reco::Vertex::Point(x, y, z), err, soa.view()[i].chi2(), soa.view()[i].ndof(), nt); + reco::Vertex::Point(x, y, z), err, soa.view().zvertex()[i].chi2(), soa.view().zvertexTracks()[i].ndof(), nt); auto &v = (*vertexes).back(); v.reserve(itrk.size()); for (auto it : itrk) { diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc b/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc index 28f9c5327e544..6ca8f1342e132 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/PixelVertexProducerAlpaka.cc @@ -50,7 +50,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const float ptMax_; device::EDGetToken tokenDeviceTrack_; - device::EDPutToken tokenDeviceVertex_; + device::EDPutToken tokenDeviceVertex_; }; template @@ -102,7 +102,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { const device::EventSetup& iSetup) const { auto const& hTracks = iEvent.get(tokenDeviceTrack_); - iEvent.emplace(tokenDeviceVertex_, algo_.makeAsync(iEvent.queue(), hTracks.view(), maxVertices_, ptMin_, ptMax_)); + iEvent.emplace(tokenDeviceVertex_, + algo_.makeAsync(iEvent.queue(), hTracks.view().tracks(), maxVertices_, ptMin_, ptMax_)); } using PixelVertexProducerAlpakaPhase1 = PixelVertexProducerAlpaka; diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc index 64a146a3eb400..5b86648fbd9c8 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.dev.cc @@ -38,14 +38,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { auto const quality = tracks_view.quality(); for (auto idx : cms::alpakatools::uniform_elements(acc, tracks_view.nTracks())) { - [[maybe_unused]] auto nHits = reco::nHits(tracks_view, idx); + [[maybe_unused]] auto nHits = ::reco::nHits(tracks_view, idx); ALPAKA_ASSERT_ACC(nHits >= 3); // initialize the track data trkdata[idx].idv() = -1; // do not use triplets - if (reco::isTriplet(tracks_view, idx)) + if (::reco::isTriplet(tracks_view, idx)) continue; // use only "high purity" track @@ -63,7 +63,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // load the track data into the workspace auto it = alpaka::atomicAdd(acc, &ws.ntrks(), 1u, alpaka::hierarchy::Blocks{}); ws[it].itrk() = idx; - ws[it].zt() = reco::zip(tracks_view, idx); + ws[it].zt() = ::reco::zip(tracks_view, idx); ws[it].ezt2() = tracks_view[idx].covariance()(14); ws[it].ptt2() = pt * pt; } @@ -125,15 +125,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { #endif template - ZVertexSoACollection Producer::makeAsync( + reco::ZVertexSoACollection Producer::makeAsync( Queue& queue, ::reco::TrackSoAConstView const& tracks_view, int maxVertices, float ptMin, float ptMax) const { #ifdef PIXVERTEX_DEBUG_PRODUCE std::cout << "producing Vertices on GPU" << std::endl; #endif // PIXVERTEX_DEBUG_PRODUCE const auto maxTracks = tracks_view.metadata().size(); - ZVertexSoACollection vertices({{maxVertices, maxTracks}}, queue); - auto data = vertices.view(); - auto trkdata = vertices.view(); + reco::ZVertexSoACollection vertices(queue, static_cast(maxVertices), static_cast(maxTracks)); + auto data = vertices.view().zvertex(); + auto trkdata = vertices.view().zvertexTracks(); PixelVertexWorkSpaceSoADevice workspace(maxTracks, queue); auto ws = workspace.view(); diff --git a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h index 7d4eedd16db1c..6568c7667fb7d 100644 --- a/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h +++ b/RecoVertex/PixelVertexFinding/plugins/alpaka/vertexFinder.h @@ -57,7 +57,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE::vertexFinder { ~Producer() = default; - ZVertexSoACollection makeAsync( + reco::ZVertexSoACollection makeAsync( Queue &queue, TkSoAConstView const &tracks_view, int maxVertices, float ptMin, float ptMax) const; private: diff --git a/RecoVertex/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc b/RecoVertex/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc index 4d475bda53a1a..1a576fabbad4f 100644 --- a/RecoVertex/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc +++ b/RecoVertex/PixelVertexFinding/test/alpaka/VertexFinder_t.dev.cc @@ -38,10 +38,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { explicit ClusterGenerator(float nvert, float ntrack) : rgen(-13., 13), errgen(0.005, 0.025), clusGen(nvert), trackGen(ntrack), gauss(0., 1.), ptGen(1.) {} - void operator()(vertexFinder::PixelVertexWorkSpaceSoAHost& pwsh, ZVertexHost& vtxh) { + void operator()(vertexFinder::PixelVertexWorkSpaceSoAHost& pwsh, reco::ZVertexHost& vtxh) { int nclus = clusGen(reng); - for (int zint = 0; zint < vtxh.view().metadata().size(); ++zint) { - vtxh.view().zv()[zint] = 3.5f * gauss(reng); + for (int zint = 0; zint < vtxh.view().zvertex().metadata().size(); ++zint) { + vtxh.view().zvertex()[zint].zv() = 3.5f * gauss(reng); } int aux = 0; @@ -50,7 +50,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { pwsh.view().itrk()[iv] = nt; for (int it = 0; it < nt; ++it) { auto err = errgen(reng); // reality is not flat.... - pwsh.view().zt()[aux] = vtxh.view().zv()[iv] + err * gauss(reng); + pwsh.view().zt()[aux] = vtxh.view().zvertex().zv()[iv] + err * gauss(reng); pwsh.view().ezt2()[aux] = err * err; pwsh.view().iv()[aux] = iv; pwsh.view().ptt2()[aux] = (iv == 5 ? 1.f : 0.5f) + ptGen(reng); @@ -118,13 +118,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void runKernels(Queue& queue) { // Run 3 values, used for testing - constexpr uint32_t maxTracks = 32 * 1024; - constexpr uint32_t maxVertices = 1024; + constexpr int32_t maxTracks = 32 * 1024; + constexpr int32_t maxVertices = 1024; vertexFinder::PixelVertexWorkSpaceSoADevice ws_d(maxTracks, queue); vertexFinder::PixelVertexWorkSpaceSoAHost ws_h(maxTracks, queue); - ZVertexHost vertices_h({{maxVertices, maxTracks}}, queue); - ZVertexSoACollection vertices_d({{maxVertices, maxTracks}}, queue); + reco::ZVertexHost vertices_h(queue, maxVertices, maxTracks); + reco::ZVertexSoACollection vertices_d(queue, maxVertices, maxTracks); float eps = 0.1f; std::array par{{eps, 0.01f, 9.0f}}; @@ -136,7 +136,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { gen(ws_h, vertices_h); auto workDiv1D = make_workdiv(1, 1); - alpaka::exec(queue, workDiv1D, vertexFinder::Init{}, vertices_d.view(), ws_d.view()); + alpaka::exec(queue, workDiv1D, vertexFinder::Init{}, vertices_d.view().zvertex(), ws_d.view()); // std::cout << "v,t size " << ws_h.view().zt()[0] << ' ' << vertices_h.view().zv()[0] << std::endl; alpaka::memcpy(queue, ws_d.buffer(), ws_h.buffer()); alpaka::wait(queue); @@ -152,15 +152,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { if ((i % 4) == 3) par = {{0.7f * eps, 0.01f, 9.0f}}; - alpaka::exec(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view()); + alpaka::exec(queue, workDiv1D, Kernel_print{}, vertices_d.view().zvertex(), ws_d.view()); auto workDivClusterizer = make_workdiv(1, 512 + 256); #ifdef ONE_KERNEL alpaka::exec(queue, workDivClusterizer, VertexFinderOneKernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), kk, par[0], @@ -170,8 +170,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivClusterizer, CLUSTERIZE{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), kk, par[0], @@ -179,7 +179,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { par[2]); #endif alpaka::wait(queue); - alpaka::exec(queue, workDiv1D, Kernel_print{}, vertices_d.view(), ws_d.view()); + alpaka::exec(queue, workDiv1D, Kernel_print{}, vertices_d.view().zvertex(), ws_d.view()); alpaka::wait(queue); auto workDivFitter = make_workdiv(1, 1024 - 256); @@ -187,47 +187,49 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivFitter, vertexFinder::FitVerticesKernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), 50.f); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); - if (vertices_h.view().nvFinal() == 0) { + if (vertices_h.view().zvertex().nvFinal() == 0) { std::cout << "NO VERTICES???" << std::endl; continue; } - for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + for (auto j = 0U; j < vertices_h.view().zvertex().nvFinal(); ++j) + if (vertices_h.view().zvertexTracks().ndof()[j] > 0) + vertices_h.view().zvertex().chi2()[j] /= float(vertices_h.view().zvertexTracks().ndof()[j]); { - auto mx = std::minmax_element(vertices_h.view().chi2().data(), - vertices_h.view().chi2().data() + vertices_h.view().nvFinal()); - std::cout << "after fit nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' - << *mx.second << std::endl; + auto mx = + std::minmax_element(vertices_h.view().zvertex().chi2().data(), + vertices_h.view().zvertex().chi2().data() + vertices_h.view().zvertex().nvFinal()); + std::cout << "after fit nv, min max chi2 " << vertices_h.view().zvertex().nvFinal() << " " << *mx.first + << ' ' << *mx.second << std::endl; } alpaka::exec(queue, workDivFitter, vertexFinder::FitVerticesKernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), 50.f); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); - for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + for (auto j = 0U; j < vertices_h.view().zvertex().nvFinal(); ++j) + if (vertices_h.view().zvertexTracks().ndof()[j] > 0) + vertices_h.view().zvertex().chi2()[j] /= float(vertices_h.view().zvertexTracks().ndof()[j]); { - auto mx = std::minmax_element(vertices_h.view().chi2().data(), - vertices_h.view().chi2().data() + vertices_h.view().nvFinal()); - std::cout << "before splitting nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' - << *mx.second << std::endl; + auto mx = + std::minmax_element(vertices_h.view().zvertex().chi2().data(), + vertices_h.view().zvertex().chi2().data() + vertices_h.view().zvertex().nvFinal()); + std::cout << "before splitting nv, min max chi2 " << vertices_h.view().zvertex().nvFinal() << " " + << *mx.first << ' ' << *mx.second << std::endl; } auto workDivSplitter = make_workdiv(1024, 64); @@ -236,8 +238,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivSplitter, vertexFinder::SplitVerticesKernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), 9.f); alpaka::memcpy(queue, ws_h.buffer(), ws_d.buffer()); @@ -247,8 +249,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivFitter, vertexFinder::FitVerticesKernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view(), 5000.f); @@ -256,47 +258,53 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDivSorter, vertexFinder::SortByPt2Kernel{}, - vertices_d.view(), - vertices_d.view(), + vertices_d.view().zvertex(), + vertices_d.view().zvertexTracks(), ws_d.view()); alpaka::memcpy(queue, vertices_h.buffer(), vertices_d.buffer()); alpaka::wait(queue); - if (vertices_h.view().nvFinal() == 0) { + if (vertices_h.view().zvertex().nvFinal() == 0) { std::cout << "NO VERTICES???" << std::endl; continue; } - for (auto j = 0U; j < vertices_h.view().nvFinal(); ++j) - if (vertices_h.view().ndof()[j] > 0) - vertices_h.view().chi2()[j] /= float(vertices_h.view().ndof()[j]); + for (auto j = 0U; j < vertices_h.view().zvertex().nvFinal(); ++j) + if (vertices_h.view().zvertexTracks().ndof()[j] > 0) + vertices_h.view().zvertex().chi2()[j] /= float(vertices_h.view().zvertexTracks().ndof()[j]); { - auto mx = std::minmax_element(vertices_h.view().chi2().data(), - vertices_h.view().chi2().data() + vertices_h.view().nvFinal()); - std::cout << "nv, min max chi2 " << vertices_h.view().nvFinal() << " " << *mx.first << ' ' << *mx.second - << std::endl; + auto mx = + std::minmax_element(vertices_h.view().zvertex().chi2().data(), + vertices_h.view().zvertex().chi2().data() + vertices_h.view().zvertex().nvFinal()); + std::cout << "nv, min max chi2 " << vertices_h.view().zvertex().nvFinal() << " " << *mx.first << ' ' + << *mx.second << std::endl; } { - auto mx = std::minmax_element(vertices_h.view().wv().data(), - vertices_h.view().wv().data() + vertices_h.view().nvFinal()); + auto mx = + std::minmax_element(vertices_h.view().zvertex().wv().data(), + vertices_h.view().zvertex().wv().data() + vertices_h.view().zvertex().nvFinal()); std::cout << "min max error " << 1. / std::sqrt(*mx.first) << ' ' << 1. / std::sqrt(*mx.second) << std::endl; } { - auto mx = std::minmax_element(vertices_h.view().ptv2().data(), - vertices_h.view().ptv2().data() + vertices_h.view().nvFinal()); + auto mx = + std::minmax_element(vertices_h.view().zvertex().ptv2().data(), + vertices_h.view().zvertex().ptv2().data() + vertices_h.view().zvertex().nvFinal()); std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl; - std::cout << "min max ptv2 " << vertices_h.view().ptv2()[vertices_h.view().sortInd()[0]] << ' ' - << vertices_h.view().ptv2()[vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1]] - << " at " << vertices_h.view().sortInd()[0] << ' ' - << vertices_h.view().sortInd()[vertices_h.view().nvFinal() - 1] << std::endl; + std::cout << "min max ptv2 " << vertices_h.view().zvertex().ptv2()[vertices_h.view().zvertex().sortInd()[0]] + << ' ' + << vertices_h.view() + .zvertex() + .ptv2()[vertices_h.view().zvertex().sortInd()[vertices_h.view().zvertex().nvFinal() - 1]] + << " at " << vertices_h.view().zvertex().sortInd()[0] << ' ' + << vertices_h.view().zvertex().sortInd()[vertices_h.view().zvertex().nvFinal() - 1] << std::endl; } - float dd[vertices_h.view().nvFinal()]; - for (auto kv = 0U; kv < vertices_h.view().nvFinal(); ++kv) { - auto zr = vertices_h.view().zv()[kv]; + float dd[vertices_h.view().zvertex().nvFinal()]; + for (auto kv = 0U; kv < vertices_h.view().zvertex().nvFinal(); ++kv) { + auto zr = vertices_h.view().zvertex().zv()[kv]; auto md = 500.0f; for (int zint = 0; zint < ws_h.view().metadata().size(); ++zint) { auto d = std::abs(zr - ws_h.view().zt()[zint]); @@ -309,11 +317,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { std::cout << d << ' '; std::cout << std::endl; } - auto mx = std::minmax_element(dd, dd + vertices_h.view().nvFinal()); + auto mx = std::minmax_element(dd, dd + vertices_h.view().zvertex().nvFinal()); float rms = 0; for (auto d : dd) rms += d * d; - rms = std::sqrt(rms) / (vertices_h.view().nvFinal() - 1); + rms = std::sqrt(rms) / (vertices_h.view().zvertex().nvFinal() - 1); std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl; } // loop on events