diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 5888cd04a6128..3beeaa4830c83 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -7,29 +7,18 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" #include "CUDADataFormats/Common/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" #include "DataFormats/SoATemplate/interface/SoALayout.h" -GENERATE_SOA_LAYOUT(SiPixelDigisSoALayout, - SOA_COLUMN(int32_t, clus), - SOA_COLUMN(uint32_t, pdigi), - SOA_COLUMN(uint32_t, rawIdArr), - SOA_COLUMN(uint16_t, adc), - SOA_COLUMN(uint16_t, xx), - SOA_COLUMN(uint16_t, yy), - SOA_COLUMN(uint16_t, moduleId)) - -using SiPixelDigisCUDASOA = SiPixelDigisSoALayout<>; -using SiPixelDigisCUDASOAView = SiPixelDigisCUDASOA::View; -using SiPixelDigisCUDASOAConstView = SiPixelDigisCUDASOA::ConstView; - // TODO: The class is created via inheritance of the PortableDeviceCollection. // This is generally discouraged, and should be done via composition. // See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 -class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection> { +class SiPixelDigisCUDA : public cms::cuda::PortableDeviceCollection { public: SiPixelDigisCUDA() = default; explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) - : PortableDeviceCollection>(maxFedWords + 1, stream) {} + : PortableDeviceCollection(maxFedWords + 1, stream) {} + ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(SiPixelDigisCUDA &&) = default; diff --git a/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h new file mode 100644 index 0000000000000..f0f2e5f5103ab --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h @@ -0,0 +1,14 @@ +#ifndef CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h +#define CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h + +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelGainCalibrationForHLTSoARcd + : public edm::eventsetup::DependentRecordImplementation< + SiPixelGainCalibrationForHLTSoARcd, + edm::mpl::Vector> {}; + +#endif // CalibTracker_Records_SiPixelGainCalibrationForHLTSoARcd_h diff --git a/CalibTracker/Records/interface/SiPixelMappingSoARecord.h b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h new file mode 100644 index 0000000000000..d8c31754cd8d9 --- /dev/null +++ b/CalibTracker/Records/interface/SiPixelMappingSoARecord.h @@ -0,0 +1,17 @@ +#ifndef CalibTracker_Records_interface_SiPixelMappingSoARecord_h +#define CalibTracker_Records_interface_SiPixelMappingSoARecord_h + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" + +class SiPixelMappingSoARecord + : public edm::eventsetup::DependentRecordImplementation> {}; + +#endif // CalibTracker_Records_interface_SiPixelMappingSoARecord_h diff --git a/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc new file mode 100644 index 0000000000000..6634cee007301 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelGainCalibrationForHLTSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelGainCalibrationForHLTSoARcd); diff --git a/CalibTracker/Records/src/SiPixelMappingSoARcd.cc b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc new file mode 100644 index 0000000000000..fea2c978c1539 --- /dev/null +++ b/CalibTracker/Records/src/SiPixelMappingSoARcd.cc @@ -0,0 +1,5 @@ +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(SiPixelMappingSoARecord); diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index 05446593b6229..8de546ff8856b 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -1,4 +1,3 @@ - @@ -11,6 +10,14 @@ + + + + + + + + diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc new file mode 100644 index 0000000000000..37f4bc6bd5945 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelCablingSoAESProducer.cc @@ -0,0 +1,140 @@ +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + + class SiPixelCablingSoAESProducer : public ESProducer { + public: + SiPixelCablingSoAESProducer(edm::ParameterSet const& iConfig) + : ESProducer(iConfig), useQuality_(iConfig.getParameter("UseQualityInfo")) { + auto cc = setWhatProduced(this); + cablingMapToken_ = cc.consumes(edm::ESInputTag{"", iConfig.getParameter("CablingMapLabel")}); + if (useQuality_) { + qualityToken_ = cc.consumes(); + } + geometryToken_ = cc.consumes(); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UseQualityInfo", false); + descriptions.addWithDefaultLabel(desc); + } + + std::optional produce(const SiPixelMappingSoARecord& iRecord) { + auto cablingMap = iRecord.getTransientHandle(cablingMapToken_); + const SiPixelQuality* quality = nullptr; + if (useQuality_) { + auto qualityInfo = iRecord.getTransientHandle(qualityToken_); + quality = qualityInfo.product(); + } + + auto geom = iRecord.getTransientHandle(geometryToken_); + SiPixelMappingHost product(pixelgpudetails::MAX_SIZE, cms::alpakatools::host()); + std::vector const& fedIds = cablingMap->fedIds(); + std::unique_ptr const& cabling = cablingMap->cablingTree(); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back(); + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + auto mapView = product.view(); + + mapView.hasQuality() = useQuality_; + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + mapView[index].fed() = fed; + mapView[index].link() = link; + mapView[index].roc() = roc; + if (pixelRoc != nullptr) { + mapView[index].rawId() = pixelRoc->rawId(); + mapView[index].rocInDet() = pixelRoc->idInDetUnit(); + mapView[index].modToUnpDefault() = false; + if (quality != nullptr) + mapView[index].badRocs() = quality->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit()); + else + mapView[index].badRocs() = false; + } else { // store some dummy number + mapView[index].rawId() = pixelClustering::invalidModuleId; + mapView[index].rocInDet() = pixelClustering::invalidModuleId; + mapView[index].badRocs() = true; + mapView[index].modToUnpDefault() = true; + } + index++; + } + } + } // end of FED loop + // Given FedId, Link and idinLnk; use the following formula + // to get the rawId and idinDU + // index = (FedID-1200) * MAX_LINK* MAX_ROC + (Link-1)* MAX_ROC + idinLnk; + // where, MAX_LINK = 48, MAX_ROC = 8 + // FedID varies between 1200 to 1338 (In total 108 FED's) + // Link varies between 1 to 48 + // idinLnk varies between 1 to 8 + + auto trackerGeom = iRecord.getTransientHandle(geometryToken_); + + for (int i = 1; i < index; i++) { + if (mapView[i].rawId() == pixelClustering::invalidModuleId) { + mapView[i].moduleId() = pixelClustering::invalidModuleId; + } else { + auto gdet = trackerGeom->idToDetUnit(mapView[i].rawId()); + if (!gdet) { + LogDebug("SiPixelCablingSoAESProducer") << " Not found: " << mapView[i].rawId() << std::endl; + continue; + } + mapView[i].moduleId() = gdet->index(); + } + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + LogDebug("SiPixelCablingSoAESProducer") << i << std::setw(20) << mapView[i].fed() << std::setw(20) + << mapView[i].link() << std::setw(20) << mapView[i].roc() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].rawId() << std::setw(20) << mapView[i].rocInDet() << std::setw(20) + << mapView[i].moduleId() << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << i << std::setw(20) << mapView[i].badRocs() << std::setw(20) << std::endl; + LogDebug("SiPixelCablingSoAESProducer") + << "----------------------------------------------------------------------------" << std::endl; + } + + mapView.size() = index - 1; + + return product; + } + + private: + edm::ESGetToken cablingMapToken_; + edm::ESGetToken qualityToken_; + edm::ESGetToken geometryToken_; + const bool useQuality_; + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelCablingSoAESProducer); diff --git a/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc new file mode 100644 index 0000000000000..935d141793a40 --- /dev/null +++ b/CalibTracker/SiPixelESProducers/plugins/alpaka/SiPixelGainCalibrationForHLTSoAESProducer.cc @@ -0,0 +1,128 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGainCalibrationForHLTRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/ESHandle.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/CommonDetUnit/interface/GeomDetType.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelGainCalibrationForHLTSoAESProducer : public ESProducer { + public: + explicit SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig); + std::unique_ptr produce(const SiPixelGainCalibrationForHLTSoARcd& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken gainsToken_; + edm::ESGetToken geometryToken_; + }; + + SiPixelGainCalibrationForHLTSoAESProducer::SiPixelGainCalibrationForHLTSoAESProducer(const edm::ParameterSet& iConfig) + : ESProducer(iConfig) { + auto cc = setWhatProduced(this); + gainsToken_ = cc.consumes(); + geometryToken_ = cc.consumes(); + } + + void SiPixelGainCalibrationForHLTSoAESProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr SiPixelGainCalibrationForHLTSoAESProducer::produce( + const SiPixelGainCalibrationForHLTSoARcd& iRecord) { + auto const& gains = iRecord.get(gainsToken_); + auto const& geom = iRecord.get(geometryToken_); + + auto product = std::make_unique(gains.data().size(), cms::alpakatools::host()); + + // bizzarre logic (looking for fist strip-det) don't ask + auto const& dus = geom.detUnits(); + unsigned int n_detectors = dus.size(); + for (unsigned int i = 1; i < 7; ++i) { + const auto offset = geom.offsetDU(GeomDetEnumerators::tkDetEnum[i]); + if (offset != dus.size() && dus[offset]->type().isTrackerStrip()) { + if (n_detectors > offset) + n_detectors = offset; + } + } + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "caching calibs for " << n_detectors << " pixel detectors of size " << gains.data().size() << '\n' + << "sizes " << sizeof(char) << ' ' << sizeof(uint8_t) << ' ' << sizeof(siPixelGainsSoA::DecodingStructure); + + for (size_t i = 0; i < gains.data().size(); i = i + 2) { + product->view().v_pedestals()[i / 2].gain = gains.data()[i]; + product->view().v_pedestals()[i / 2].ped = gains.data()[i + 1]; + } + + //std::copy here + // do not read back from the (possibly write-combined) memory buffer + auto minPed = gains.getPedLow(); + auto maxPed = gains.getPedHigh(); + auto minGain = gains.getGainLow(); + auto maxGain = gains.getGainHigh(); + auto nBinsToUseForEncoding = 253; + + // we will simplify later (not everything is needed....) + product->view().minPed() = minPed; + product->view().maxPed() = maxPed; + product->view().minGain() = minGain; + product->view().maxGain() = maxGain; + + product->view().numberOfRowsAveragedOver() = 80; + product->view().nBinsToUseForEncoding() = nBinsToUseForEncoding; + product->view().deadFlag() = 255; + product->view().noisyFlag() = 254; + + product->view().pedPrecision() = static_cast(maxPed - minPed) / nBinsToUseForEncoding; + product->view().gainPrecision() = static_cast(maxGain - minGain) / nBinsToUseForEncoding; + + LogDebug("SiPixelGainCalibrationForHLTSoA") + << "precisions g " << product->view().pedPrecision() << ' ' << product->view().gainPrecision(); + + // fill the index map + auto const& ind = gains.getIndexes(); + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind.size() << " " << n_detectors; + + for (auto i = 0U; i < n_detectors; ++i) { + auto p = std::lower_bound( + ind.begin(), ind.end(), dus[i]->geographicalId().rawId(), SiPixelGainCalibrationForHLT::StrictWeakOrdering()); + assert(p != ind.end() && p->detid == dus[i]->geographicalId()); + assert(p->iend <= gains.data().size()); + assert(p->iend >= p->ibegin); + assert(0 == p->ibegin % 2); + assert(0 == p->iend % 2); + assert(p->ibegin != p->iend); + assert(p->ncols > 0); + + product->view().modStarts()[i] = p->ibegin; + product->view().modEnds()[i] = p->iend; + product->view().modCols()[i] = p->ncols; + + if (ind[i].detid != dus[i]->geographicalId()) + LogDebug("SiPixelGainCalibrationForHLTSoA") << ind[i].detid << "!=" << dus[i]->geographicalId(); + } + + return product; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(SiPixelGainCalibrationForHLTSoAESProducer); diff --git a/CondFormats/SiPixelObjects/BuildFile.xml b/CondFormats/SiPixelObjects/BuildFile.xml index 1d9b8d6b19f53..ddd87c956d217 100644 --- a/CondFormats/SiPixelObjects/BuildFile.xml +++ b/CondFormats/SiPixelObjects/BuildFile.xml @@ -1,3 +1,4 @@ + @@ -12,6 +13,9 @@ + + + diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h new file mode 100644 index 0000000000000..28361ab184073 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h @@ -0,0 +1,9 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h +#define CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +using SiPixelGainCalibrationForHLTHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelGainCalibrationForHLTHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h new file mode 100644 index 0000000000000..03c1c37c61046 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h @@ -0,0 +1,42 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +namespace siPixelGainsSoA { + struct DecodingStructure { + uint8_t gain; + uint8_t ped; + }; + + using Ranges = std::array; + using Cols = std::array; +} // namespace siPixelGainsSoA + +GENERATE_SOA_LAYOUT(SiPixelGainCalibrationForHLTLayout, + SOA_COLUMN(siPixelGainsSoA::DecodingStructure, v_pedestals), + + SOA_SCALAR(siPixelGainsSoA::Ranges, modStarts), + SOA_SCALAR(siPixelGainsSoA::Ranges, modEnds), + SOA_SCALAR(siPixelGainsSoA::Cols, modCols), + + SOA_SCALAR(float, minPed), + SOA_SCALAR(float, maxPed), + SOA_SCALAR(float, minGain), + SOA_SCALAR(float, maxGain), + SOA_SCALAR(float, pedPrecision), + SOA_SCALAR(float, gainPrecision), + + SOA_SCALAR(unsigned int, numberOfRowsAveragedOver), + SOA_SCALAR(unsigned int, nBinsToUseForEncoding), + SOA_SCALAR(unsigned int, deadFlag), + SOA_SCALAR(unsigned int, noisyFlag), + SOA_SCALAR(float, link)) + +using SiPixelGainCalibrationForHLTSoA = SiPixelGainCalibrationForHLTLayout<>; +using SiPixelGainCalibrationForHLTSoAView = SiPixelGainCalibrationForHLTSoA::View; +using SiPixelGainCalibrationForHLTSoAConstView = SiPixelGainCalibrationForHLTSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelGainCalibrationForHLTLayout_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h new file mode 100644 index 0000000000000..772a7a97e267b --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h @@ -0,0 +1,10 @@ +#ifndef CondFormats_SiPixelObjects_SiPixelMappingHost_h +#define CondFormats_SiPixelObjects_SiPixelMappingHost_h + +#include +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" + +using SiPixelMappingHost = PortableHostCollection; + +#endif // CondFormats_SiPixelObjects_SiPixelMappingHost_h diff --git a/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h new file mode 100644 index 0000000000000..ef123d443c795 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h @@ -0,0 +1,24 @@ +#ifndef CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h +#define CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h + +#include +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelROCsStatusAndMapping.h" + +GENERATE_SOA_LAYOUT(SiPixelMappingLayout, + SOA_COLUMN(unsigned int, fed), + SOA_COLUMN(unsigned int, link), + SOA_COLUMN(unsigned int, roc), + SOA_COLUMN(unsigned int, rawId), + SOA_COLUMN(unsigned int, rocInDet), + SOA_COLUMN(unsigned int, moduleId), + SOA_COLUMN(bool, badRocs), + SOA_COLUMN(unsigned char, modToUnpDefault), + SOA_SCALAR(unsigned int, size), + SOA_SCALAR(bool, hasQuality)) + +using SiPixelMappingSoA = SiPixelMappingLayout<>; +using SiPixelMappingSoAView = SiPixelMappingSoA::View; +using SiPixelMappingSoAConstView = SiPixelMappingSoA::ConstView; + +#endif // CondFormats_SiPixelObjects_interface_SiPixelMappingLayout_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h new file mode 100644 index 0000000000000..3c5e7094654c6 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h @@ -0,0 +1,13 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h + +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelGainCalibrationForHLTDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h new file mode 100644 index 0000000000000..1fbce15dbe231 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h @@ -0,0 +1,41 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" + +struct SiPixelGainUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static std::pair getPedAndGain( + const SiPixelGainCalibrationForHLTSoAConstView& view, + uint32_t moduleInd, + int col, + int row, + bool& isDeadColumn, + bool& isNoisyColumn) { + auto start = view.modStarts()[moduleInd]; + auto end = view.modEnds()[moduleInd]; + auto nCols = view.modCols()[moduleInd]; + // determine what averaged data block we are in (there should be 1 or 2 of these depending on if plaquette is 1 by X or 2 by X + unsigned int lengthOfColumnData = (end - start) / nCols; + unsigned int lengthOfAveragedDataInEachColumn = 2; // we always only have two values per column averaged block + unsigned int numberOfDataBlocksToSkip = row / view.numberOfRowsAveragedOver(); + + auto offset = start + col * lengthOfColumnData + lengthOfAveragedDataInEachColumn * numberOfDataBlocksToSkip; + assert(offset < end); + assert(offset < 3088384); + assert(0 == offset % 2); + + auto lp = view.v_pedestals(); + auto s = lp[offset / 2]; + + isDeadColumn = (s.ped & 0xFF) == view.deadFlag(); + isNoisyColumn = (s.ped & 0xFF) == view.noisyFlag(); + float decodeGain = float(s.gain & 0xFF) * view.gainPrecision() + view.minGain(); + float decodePed = float(s.ped & 0xFF) * view.pedPrecision() + view.minPed(); + + return std::make_pair(decodePed, decodeGain); + }; +}; + +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelGainCalibrationForHLTUtilities_h \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h new file mode 100644 index 0000000000000..8a16caa0d7368 --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h @@ -0,0 +1,17 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingDevice_h + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelMappingDevice = PortableCollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // DataFormats_SiPixelMappingSoA_alpaka_SiPixelClustersDevice_h diff --git a/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h new file mode 100644 index 0000000000000..800cf0ac671cd --- /dev/null +++ b/CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h @@ -0,0 +1,53 @@ +#ifndef CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h +#define CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h + +#include +#include +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + struct SiPixelMappingUtilities { + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static bool hasQuality(const SiPixelMappingSoAConstView& view) { + return view.hasQuality(); + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_ACC ALPAKA_FN_INLINE static cms::alpakatools::device_buffer + getModToUnpRegionalAsync(std::set const& modules, + const SiPixelFedCablingTree* cabling, + std::vector const& fedIds, + Queue& queue) { + auto modToUnpDevice = cms::alpakatools::make_device_buffer(queue, pixelgpudetails::MAX_SIZE); + auto modToUnpHost = cms::alpakatools::make_host_buffer(queue, pixelgpudetails::MAX_SIZE); + + unsigned int startFed = fedIds.front(); + unsigned int endFed = fedIds.back() - 1; + + sipixelobjects::CablingPathToDetUnit path; + int index = 1; + + for (unsigned int fed = startFed; fed <= endFed; fed++) { + for (unsigned int link = 1; link <= pixelgpudetails::MAX_LINK; link++) { + for (unsigned int roc = 1; roc <= pixelgpudetails::MAX_ROC; roc++) { + path = {fed, link, roc}; + const sipixelobjects::PixelROC* pixelRoc = cabling->findItem(path); + if (pixelRoc != nullptr) { + modToUnpHost[index] = (not modules.empty()) and (modules.find(pixelRoc->rawId()) == modules.end()); + } else { // store some dummy number + modToUnpHost[index] = true; + } + index++; + } + } + } + + alpaka::memcpy(queue, modToUnpDevice, modToUnpHost); + + return modToUnpDevice; + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif //CondFormats_SiPixelObjects_interface_alpaka_SiPixelMappingUtilities_h diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc new file mode 100644 index 0000000000000..be54c23dd8df6 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelGainCalibrationForHLTHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelGainCalibrationForHLTHost); diff --git a/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc new file mode 100644 index 0000000000000..27201b65add22 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/T_EventSetup_SiPixelMappingHost.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingHost.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(SiPixelMappingHost); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc new file mode 100644 index 0000000000000..fec7ca3ba1c52 --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelGainCalibrationForHLTDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelGainCalibrationForHLTDevice); \ No newline at end of file diff --git a/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc new file mode 100644 index 0000000000000..0b86fdf64978b --- /dev/null +++ b/CondFormats/SiPixelObjects/src/alpaka/T_EventSetup_SiPixelMappingDevice.cc @@ -0,0 +1,4 @@ +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(SiPixelMappingDevice); diff --git a/DataFormats/SiPixelClusterSoA/BuildFile.xml b/DataFormats/SiPixelClusterSoA/BuildFile.xml new file mode 100644 index 0000000000000..c9b7e4ef81817 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h new file mode 100644 index 0000000000000..6726c1d29d5c9 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h +#define DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h + +#include +#include + +//TODO: move this to TrackerTraits! +namespace pixelClustering { +#ifdef GPU_SMALL_EVENTS + // kept for testing and debugging + constexpr uint32_t maxHitsInIter() { return 64; } +#else + // optimized for real data PU 50 + // tested on MC events with 55-75 pileup events + constexpr uint32_t maxHitsInIter() { return 160; } //TODO better tuning for PU 140-200 +#endif + constexpr uint32_t maxHitsInModule() { return 1024; } + + constexpr uint16_t clusterThresholdLayerOne = 2000; + constexpr uint16_t clusterThresholdOtherLayers = 4000; + + constexpr uint16_t clusterThresholdPhase2LayerOne = 4000; + constexpr uint16_t clusterThresholdPhase2OtherLayers = 4000; + + constexpr uint32_t maxNumDigis = 3 * 256 * 1024; // @PU=200 µ=530k σ=50k this is >4σ away + constexpr uint16_t maxNumModules = 4000; + + constexpr int32_t maxNumClustersPerModules = maxHitsInModule(); + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + constexpr int invalidClusterId = -9999; + static_assert(invalidModuleId > maxNumModules); // invalidModuleId must be > maxNumModules + +} // namespace pixelClustering + +#endif // DataFormats_SiPixelClusterSoA_interface_ClusteringConstants_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h new file mode 100644 index 0000000000000..2593475bf5c3a --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h @@ -0,0 +1,38 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +template +class SiPixelClustersDevice : public PortableDeviceCollection { +public: + SiPixelClustersDevice() = default; + + template + explicit SiPixelClustersDevice(size_t maxModules, TQueue queue) + : PortableDeviceCollection(maxModules + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelClustersDevice(size_t maxModules, TDev const &device) + : PortableDeviceCollection(maxModules + 1, device) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersDevice_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h new file mode 100644 index 0000000000000..eb086160a6188 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h +#define DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h + +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelClustersHost : public PortableHostCollection { +public: + SiPixelClustersHost() = default; + + template + explicit SiPixelClustersHost(size_t maxModules, TQueue queue) + : PortableHostCollection(maxModules + 1, queue) {} + + void setNClusters(uint32_t nClusters, int32_t offsetBPIX2) { + nClusters_h = nClusters; + offsetBPIX2_h = offsetBPIX2; + } + + uint32_t nClusters() const { return nClusters_h; } + int32_t offsetBPIX2() const { return offsetBPIX2_h; } + +private: + uint32_t nClusters_h = 0; + int32_t offsetBPIX2_h = 0; +}; + +#endif // DataFormats_SiPixelClusterSoA_interface_SiPixelClustersHost_h diff --git a/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h new file mode 100644 index 0000000000000..c44c0148662ff --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h @@ -0,0 +1,16 @@ +#ifndef DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h +#define DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelClustersLayout, + SOA_COLUMN(uint32_t, moduleStart), + SOA_COLUMN(uint32_t, clusInModule), + SOA_COLUMN(uint32_t, moduleId), + SOA_COLUMN(uint32_t, clusModuleStart)) + +using SiPixelClustersSoA = SiPixelClustersLayout<>; +using SiPixelClustersSoAView = SiPixelClustersSoA::View; +using SiPixelClustersSoAConstView = SiPixelClustersSoA::ConstView; + +#endif // DataFormats_SiPixelClusterSoA_SiPixelClustersLayout_h diff --git a/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h new file mode 100644 index 0000000000000..c5e35475b5330 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h @@ -0,0 +1,35 @@ +#ifndef DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h +#define DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using SiPixelClustersSoACollection = + std::conditional_t, SiPixelClustersHost, SiPixelClustersDevice>; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue &queue, SiPixelClustersDevice const &srcData) { + SiPixelClustersHost dstData(srcData->metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNClusters(srcData.nClusters(), srcData.offsetBPIX2()); +#ifdef GPU_DEBUG //keeping this untiil copies are in the Tracer + printf("SiPixelClustersSoACollection: I'm copying to host.\n"); +#endif + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelClustersSoACollection, SiPixelClustersHost); +#endif // DataFormats_SiPixelClusterSoA_interface_alpaka_SiPixelClustersSoACollection_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..e54864699fb73 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..b9858c3fbffdd --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..bd510fa1618b0 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,8 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h +#define DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" + +#endif // DataFormats_SiPixelClusterSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..d27887904579c --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/src/classes.cc b/DataFormats/SiPixelClusterSoA/src/classes.cc new file mode 100644 index 0000000000000..70b4f7b100cb4 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes.cc @@ -0,0 +1,4 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); \ No newline at end of file diff --git a/DataFormats/SiPixelClusterSoA/src/classes.h b/DataFormats/SiPixelClusterSoA/src/classes.h new file mode 100644 index 0000000000000..8514c7732375b --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes.h @@ -0,0 +1,7 @@ +#ifndef DataFormats_SiPixelClusterSoA_src_classes_h +#define DataFormats_SiPixelClusterSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelClusterSoA/src/classes_def.xml b/DataFormats/SiPixelClusterSoA/src/classes_def.xml new file mode 100644 index 0000000000000..96b9df2725473 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/src/classes_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/BuildFile.xml b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..ed54aae76ecab --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc new file mode 100644 index 0000000000000..d96469858b916 --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.cc @@ -0,0 +1,45 @@ +#include + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testClusterSoA { + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelClustersSoACollection clusters_d(100, queue); + testClusterSoA::runKernels(clusters_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelClustersHost clusters_h(clusters_d.view().metadata().size(), queue); + + std::cout << clusters_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, clusters_h.buffer(), clusters_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc new file mode 100644 index 0000000000000..684380dcbdfbc --- /dev/null +++ b/DataFormats/SiPixelClusterSoA/test/alpaka/Clusters_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testClusterSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAView clust_view) const { + for (int32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + clust_view[j].moduleStart() = j; + clust_view[j].clusInModule() = j * 2; + clust_view[j].moduleId() = j * 3; + clust_view[j].clusModuleStart() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelClustersSoAConstView clust_view) const { + for (uint32_t j : elements_with_stride(acc, clust_view.metadata().size())) { + assert(clust_view[j].moduleStart() == j); + assert(clust_view[j].clusInModule() == j * 2); + assert(clust_view[j].moduleId() == j * 3); + assert(clust_view[j].clusModuleStart() == j * 4); + } + } + }; + + void runKernels(SiPixelClustersSoAView clust_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(clust_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, clust_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, clust_view); + } + + } // namespace testClusterSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h index f352754e31d17..a97dfadea52c4 100644 --- a/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h +++ b/DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h @@ -5,36 +5,46 @@ #include #include -// The main purpose of this class is to deliver digi and cluster data -// from an EDProducer that transfers the data from GPU to host to an -// EDProducer that converts the SoA to legacy data products. The class -// is independent of any GPU technology, and in prunciple could be -// produced by host code, and be used for other purposes than -// conversion-to-legacy as well. -class SiPixelDigisSoA { -public: - SiPixelDigisSoA() = default; - explicit SiPixelDigisSoA( - size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus); - ~SiPixelDigisSoA() = default; - - auto size() const { return pdigi_.size(); } - - uint32_t pdigi(size_t i) const { return pdigi_[i]; } - uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } - uint16_t adc(size_t i) const { return adc_[i]; } - int32_t clus(size_t i) const { return clus_[i]; } - - const std::vector& pdigiVector() const { return pdigi_; } - const std::vector& rawIdArrVector() const { return rawIdArr_; } - const std::vector& adcVector() const { return adc_; } - const std::vector& clusVector() const { return clus_; } - -private: - std::vector pdigi_; // packed digi (row, col, adc) of each pixel - std::vector rawIdArr_; // DetId of each pixel - std::vector adc_; // ADC of each pixel - std::vector clus_; // cluster id of each pixel -}; - -#endif +namespace legacy { + + // The main purpose of this class is to deliver digi and cluster data + // from an EDProducer that transfers the data from GPU to host to an + // EDProducer that converts the SoA to legacy data products. The class + // is independent of any GPU technology, and in prunciple could be + // produced by host code, and be used for other purposes than + // conversion-to-legacy as well. + + class SiPixelDigisSoA { + public: + SiPixelDigisSoA() = default; + explicit SiPixelDigisSoA( + size_t nDigis, const uint32_t* pdigi, const uint32_t* rawIdArr, const uint16_t* adc, const int32_t* clus) + : pdigi_(pdigi, pdigi + nDigis), + rawIdArr_(rawIdArr, rawIdArr + nDigis), + adc_(adc, adc + nDigis), + clus_(clus, clus + nDigis) {} + + ~SiPixelDigisSoA() = default; + + auto size() const { return pdigi_.size(); } + + uint32_t pdigi(size_t i) const { return pdigi_[i]; } + uint32_t rawIdArr(size_t i) const { return rawIdArr_[i]; } + uint16_t adc(size_t i) const { return adc_[i]; } + int32_t clus(size_t i) const { return clus_[i]; } + + const std::vector& pdigiVector() const { return pdigi_; } + const std::vector& rawIdArrVector() const { return rawIdArr_; } + const std::vector& adcVector() const { return adc_; } + const std::vector& clusVector() const { return clus_; } + + private: + std::vector pdigi_; // packed digi (row, col, adc) of each pixel + std::vector rawIdArr_; // DetId of each pixel + std::vector adc_; // ADC of each pixel + std::vector clus_; // cluster id of each pixel + }; + +} // namespace legacy + +#endif // DataFormats_SiPixelDigi_interface_SiPixelDigisSoA_h diff --git a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc b/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc deleted file mode 100644 index b95c004a50a25..0000000000000 --- a/DataFormats/SiPixelDigi/src/SiPixelDigisSoA.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" - -#include - -SiPixelDigisSoA::SiPixelDigisSoA( - size_t nDigis, const uint32_t *pdigi, const uint32_t *rawIdArr, const uint16_t *adc, const int32_t *clus) - : pdigi_(pdigi, pdigi + nDigis), - rawIdArr_(rawIdArr, rawIdArr + nDigis), - adc_(adc, adc + nDigis), - clus_(clus, clus + nDigis) {} diff --git a/DataFormats/SiPixelDigi/src/classes.h b/DataFormats/SiPixelDigi/src/classes.h index 1360ee6e469d9..be707668d0dfc 100644 --- a/DataFormats/SiPixelDigi/src/classes.h +++ b/DataFormats/SiPixelDigi/src/classes.h @@ -1,6 +1,8 @@ #ifndef SIPIXELDIGI_CLASSES_H #define SIPIXELDIGI_CLASSES_H +#include + #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelDigi/interface/PixelDigiCollection.h" #include "DataFormats/SiPixelDigi/interface/SiPixelCalibDigi.h" @@ -9,6 +11,5 @@ #include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/DetSetVector.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" -#include #endif // SIPIXELDIGI_CLASSES_H diff --git a/DataFormats/SiPixelDigi/src/classes_def.xml b/DataFormats/SiPixelDigi/src/classes_def.xml index e6bc08de161fa..697b6c467d799 100755 --- a/DataFormats/SiPixelDigi/src/classes_def.xml +++ b/DataFormats/SiPixelDigi/src/classes_def.xml @@ -50,6 +50,6 @@ - - + + diff --git a/DataFormats/SiPixelDigiSoA/BuildFile.xml b/DataFormats/SiPixelDigiSoA/BuildFile.xml new file mode 100644 index 0000000000000..538802f92c3ca --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h new file mode 100644 index 0000000000000..36c7d0be7e88a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h @@ -0,0 +1,33 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigiErrorsDevice : public PortableDeviceCollection { +public: + SiPixelDigiErrorsDevice() = default; + template + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigiErrorsDevice(size_t maxFedWords, TDev const& device) + : PortableDeviceCollection(maxFedWords, device) {} + + auto& error_data() const { return (*this->view().pixelErrors()); } + auto maxFedWords() const { return maxFedWords_; } + +private: + int maxFedWords_; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h new file mode 100644 index 0000000000000..ac706dea4b544 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +class SiPixelDigiErrorsHost : public PortableHostCollection { +public: + SiPixelDigiErrorsHost() = default; + template + explicit SiPixelDigiErrorsHost(int maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords, queue), maxFedWords_(maxFedWords) {} + + int maxFedWords() const { return maxFedWords_; } + + auto& error_data() { return (*view().pixelErrors()); } + auto const& error_data() const { return (*view().pixelErrors()); } + +private: + int maxFedWords_ = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h new file mode 100644 index 0000000000000..b6398bc840c5b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h @@ -0,0 +1,14 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +GENERATE_SOA_LAYOUT(SiPixelDigiErrorsLayout, SOA_COLUMN(SiPixelErrorCompact, pixelErrors), SOA_SCALAR(uint32_t, size)) + +using SiPixelDigiErrorsSoA = SiPixelDigiErrorsLayout<>; +using SiPixelDigiErrorsSoAView = SiPixelDigiErrorsSoA::View; +using SiPixelDigiErrorsSoAConstView = SiPixelDigiErrorsSoA::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigiErrorsSoA_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h new file mode 100644 index 0000000000000..1748069685923 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h @@ -0,0 +1,37 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class SiPixelDigisDevice : public PortableDeviceCollection { +public: + SiPixelDigisDevice() = default; + template + explicit SiPixelDigisDevice(size_t maxFedWords, TQueue queue) + : PortableDeviceCollection(maxFedWords + 1, queue) {} + + // Constructor which specifies the SoA size + explicit SiPixelDigisDevice(size_t maxFedWords, TDev const &device) + : PortableDeviceCollection(maxFedWords + 1, device) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisDevice_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h new file mode 100644 index 0000000000000..4e4650efac1cb --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h @@ -0,0 +1,30 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +// TODO: The class is created via inheritance of the PortableDeviceCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +class SiPixelDigisHost : public PortableHostCollection { +public: + SiPixelDigisHost() = default; + template + explicit SiPixelDigisHost(size_t maxFedWords, TQueue queue) + : PortableHostCollection(maxFedWords + 1, queue) {} + + void setNModulesDigis(uint32_t nModules, uint32_t nDigis) { + nModules_h = nModules; + nDigis_h = nDigis; + } + + uint32_t nModules() const { return nModules_h; } + uint32_t nDigis() const { return nDigis_h; } + +private: + uint32_t nModules_h = 0; + uint32_t nDigis_h = 0; +}; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisHost_h diff --git a/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h new file mode 100644 index 0000000000000..2c7c5e1079513 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h @@ -0,0 +1,19 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h +#define DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +GENERATE_SOA_LAYOUT(SiPixelDigisLayout, + SOA_COLUMN(int32_t, clus), + SOA_COLUMN(uint32_t, pdigi), + SOA_COLUMN(uint32_t, rawIdArr), + SOA_COLUMN(uint16_t, adc), + SOA_COLUMN(uint16_t, xx), + SOA_COLUMN(uint16_t, yy), + SOA_COLUMN(uint16_t, moduleId)) + +using SiPixelDigisSoA = SiPixelDigisLayout<>; +using SiPixelDigisSoAView = SiPixelDigisSoA::View; +using SiPixelDigisSoAConstView = SiPixelDigisSoA::ConstView; + +#endif // DataFormats_SiPixelDigiSoA_interface_SiPixelDigisSoA_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h new file mode 100644 index 0000000000000..673a22bd23a1e --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h @@ -0,0 +1,39 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelDigiErrorsSoACollection = + std::conditional_t, SiPixelDigiErrorsHost, SiPixelDigiErrorsDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, SiPixelDigiErrorsDevice const& srcData) { + SiPixelDigiErrorsHost dstData(srcData.maxFedWords(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); +#ifdef GPU_DEBUG + printf("SiPixelDigiErrorsSoACollection: I'm copying to host.\n"); +#endif + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelDigiErrorsSoACollection, SiPixelDigiErrorsHost); + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigiErrorsSoACollection_h diff --git a/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h new file mode 100644 index 0000000000000..2fe60454d553f --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h @@ -0,0 +1,36 @@ +#ifndef DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h +#define DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h + +#include + +#include + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using SiPixelDigisSoACollection = + std::conditional_t, SiPixelDigisHost, SiPixelDigisDevice>; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue &queue, SiPixelDigisDevice const &srcData) { + SiPixelDigisHost dstData(srcData.view().metadata().size(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + dstData.setNModulesDigis(srcData.nModules(), srcData.nDigis()); + return dstData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(SiPixelDigisSoACollection, SiPixelDigisHost); + +#endif // DataFormats_SiPixelDigiSoA_interface_alpaka_SiPixelDigisSoACollection_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..d2fb20448545c --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..7315bc37eeb1b --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..db5bf9385f99d --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,13 @@ +#ifndef DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h +#define DataFormats_SiPixelDigiSoA_Alpaka_Classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" + +#endif // DataFormats_SiPixelDigiSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..21deb7bbd46dc --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/src/classes.cc b/DataFormats/SiPixelDigiSoA/src/classes.cc new file mode 100644 index 0000000000000..9022a3102107e --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes.cc @@ -0,0 +1,6 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection); diff --git a/DataFormats/SiPixelDigiSoA/src/classes.h b/DataFormats/SiPixelDigiSoA/src/classes.h new file mode 100644 index 0000000000000..427a4c972863d --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes.h @@ -0,0 +1,10 @@ +#ifndef DataFormats_SiPixelDigisSoA_src_classes_h +#define DataFormats_SiPixelDigisSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" + +#endif // DataFormats_SiPixelClusterSoA_src_classes_h diff --git a/DataFormats/SiPixelDigiSoA/src/classes_def.xml b/DataFormats/SiPixelDigiSoA/src/classes_def.xml new file mode 100644 index 0000000000000..c68be4a01bf5a --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/src/classes_def.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/BuildFile.xml b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..b4bd8297f5011 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/BuildFile.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc new file mode 100644 index 0000000000000..4703e68630f35 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.cc @@ -0,0 +1,54 @@ +#include +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue); + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigiErrorsSoACollection digiErrors_d(1000, queue); + testDigisSoA::runKernels(digiErrors_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigiErrorsHost digiErrors_h(digiErrors_d.view().metadata().size(), queue); + alpaka::memcpy(queue, digiErrors_h.buffer(), digiErrors_d.const_buffer()); + std::cout << "digiErrors_h.view().metadata().size(): " << digiErrors_h.view().metadata().size() << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().rawId: " << digiErrors_h.view()[100].pixelErrors().rawId + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().word: " << digiErrors_h.view()[100].pixelErrors().word + << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().errorType: " + << digiErrors_h.view()[100].pixelErrors().errorType << std::endl; + std::cout << "digiErrors_h.view()[100].pixelErrors().fedId: " << digiErrors_h.view()[100].pixelErrors().fedId + << std::endl; + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc new file mode 100644 index 0000000000000..c7add92dab018 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/DigiErrors_test.dev.cc @@ -0,0 +1,50 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + digiErrors_view[j].pixelErrors().rawId = j; + digiErrors_view[j].pixelErrors().word = j; + digiErrors_view[j].pixelErrors().errorType = j; + digiErrors_view[j].pixelErrors().fedId = j; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigiErrorsSoAConstView digiErrors_view) const { + for (uint32_t j : elements_with_stride(acc, digiErrors_view.metadata().size())) { + assert(digiErrors_view[j].pixelErrors().rawId == j); + assert(digiErrors_view[j].pixelErrors().word == j); + assert(digiErrors_view[j].pixelErrors().errorType == j % 256); + assert(digiErrors_view[j].pixelErrors().fedId == j % 256); + } + } + }; + + void runKernels(SiPixelDigiErrorsSoAView digiErrors_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digiErrors_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digiErrors_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digiErrors_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc new file mode 100644 index 0000000000000..f1d9ce9cd2b37 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testDigisSoA { + + void runKernels(SiPixelDigisSoAView digis_view, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // Inner scope to deallocate memory before destroying the stream + { + // Instantiate tracks on device. PortableDeviceCollection allocates + // SoA on device automatically. + SiPixelDigisSoACollection digis_d(1000, queue); + testDigisSoA::runKernels(digis_d.view(), queue); + + // Instantate tracks on host. This is where the data will be + // copied to from device. + SiPixelDigisHost digis_h(digis_d.view().metadata().size(), queue); + + std::cout << digis_h.view().metadata().size() << std::endl; + alpaka::memcpy(queue, digis_h.buffer(), digis_d.const_buffer()); + alpaka::wait(queue); + } + + return 0; +} diff --git a/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc new file mode 100644 index 0000000000000..9bb35bfc4d7f8 --- /dev/null +++ b/DataFormats/SiPixelDigiSoA/test/alpaka/Digis_test.dev.cc @@ -0,0 +1,49 @@ +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testDigisSoA { + + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAView digi_view) const { + for (int32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + digi_view[j].clus() = j; + digi_view[j].rawIdArr() = j * 2; + digi_view[j].xx() = j * 3; + digi_view[j].moduleId() = j * 4; + } + } + }; + + class TestVerifyKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, SiPixelDigisSoAConstView digi_view) const { + for (uint32_t j : elements_with_stride(acc, digi_view.metadata().size())) { + assert(digi_view[j].clus() == int(j)); + assert(digi_view[j].rawIdArr() == j * 2); + assert(digi_view[j].xx() == j * 3); + assert(digi_view[j].moduleId() == j * 4); + } + } + }; + + void runKernels(SiPixelDigisSoAView digi_view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(digi_view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, digi_view); + alpaka::exec(queue, workDiv, TestVerifyKernel{}, digi_view); + } + + } // namespace testDigisSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/SiPixelRawData/src/classes.h b/DataFormats/SiPixelRawData/src/classes.h index 7a07e9f35f388..9adc3a440e27b 100644 --- a/DataFormats/SiPixelRawData/src/classes.h +++ b/DataFormats/SiPixelRawData/src/classes.h @@ -1,10 +1,13 @@ #ifndef SIPIXELRAWDATA_CLASSES_H #define SIPIXELRAWDATA_CLASSES_H -#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" -#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" -#include "DataFormats/Common/interface/Wrapper.h" -#include "DataFormats/Common/interface/DetSetVector.h" #include +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorsSoA.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" + #endif // SIPIXELRAWDATA_CLASSES_H diff --git a/DataFormats/SiPixelRawData/src/classes_def.xml b/DataFormats/SiPixelRawData/src/classes_def.xml index fd2b5dcf27965..3535bbc430a53 100644 --- a/DataFormats/SiPixelRawData/src/classes_def.xml +++ b/DataFormats/SiPixelRawData/src/classes_def.xml @@ -5,16 +5,23 @@ - + + - + + + + + - - - - - + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/BuildFile.xml b/DataFormats/TrackingRecHitSoA/BuildFile.xml new file mode 100644 index 0000000000000..a7c80171ef4df --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/BuildFile.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h new file mode 100644 index 0000000000000..06205906d8d2f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h @@ -0,0 +1,20 @@ +#ifndef DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H +#define DataFormats_TrackingRecHitSoA_SiPixelHitStatus_H + +#include + +// more information on bit fields : https://en.cppreference.com/w/cpp/language/bit_field +struct SiPixelHitStatus { + bool isBigX : 1; // ∈[0,1] + bool isOneX : 1; // ∈[0,1] + bool isBigY : 1; // ∈[0,1] + bool isOneY : 1; // ∈[0,1] + uint8_t qBin : 3; // ∈[0,1,...,7] +}; + +struct SiPixelHitStatusAndCharge { + SiPixelHitStatus status; + uint32_t charge : 24; +}; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h new file mode 100644 index 0000000000000..c0fc252729df7 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h @@ -0,0 +1,44 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitSoADevice_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitSoADevice_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableDeviceCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitDevice : public PortableDeviceCollection, TDev> { +public: + using hitSoA = TrackingRecHitSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableDeviceCollection, TDev>::view; + using PortableDeviceCollection, TDev>::const_view; + using PortableDeviceCollection, TDev>::buffer; + + TrackingRecHitDevice() = default; + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitDevice(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableDeviceCollection, TDev>(nHits, queue) { + const auto device = alpaka::getDev(queue); + + auto start_h = cms::alpakatools::make_host_view(hitsModuleStart, TrackerTraits::numberOfModules + 1); + auto start_d = + cms::alpakatools::make_device_view(device, view().hitsModuleStart().data(), TrackerTraits::numberOfModules + 1); + alpaka::memcpy(queue, start_d, start_h); + + auto off_h = cms::alpakatools::make_host_view(offsetBPIX2); + auto off_d = cms::alpakatools::make_device_view(device, view().offsetBPIX2()); + alpaka::memcpy(queue, off_d, off_h); + alpaka::wait(queue); + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; +#endif // DataFormats_RecHits_interface_TrackingRecHitSoADevice_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h new file mode 100644 index 0000000000000..ce3f57232ac93 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h @@ -0,0 +1,43 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h + +#include + +#include + +#include "DataFormats/Portable/interface/PortableHostCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +template +class TrackingRecHitHost : public PortableHostCollection> { +public: + using hitSoA = TrackingRecHitSoA; + //Need to decorate the class with the inherited portable accessors being now a template + using PortableHostCollection>::view; + using PortableHostCollection>::const_view; + using PortableHostCollection>::buffer; + + TrackingRecHitHost() = default; + + template + explicit TrackingRecHitHost(uint32_t nHits, TQueue queue) + : PortableHostCollection>(nHits, queue) {} + + // Constructor which specifies the SoA size + template + explicit TrackingRecHitHost(uint32_t nHits, int32_t offsetBPIX2, uint32_t const* hitsModuleStart, TQueue queue) + : PortableHostCollection>(nHits, queue) { + std::copy(hitsModuleStart, hitsModuleStart + TrackerTraits::numberOfModules + 1, view().hitsModuleStart().data()); + view().offsetBPIX2() = offsetBPIX2; + } + + uint32_t nHits() const { return view().metadata().size(); } + uint32_t const* hitsModuleStart() const { return view().hitsModuleStart().data(); } +}; + +using TrackingRecHitHostPhase1 = TrackingRecHitHost; +using TrackingRecHitHostPhase2 = TrackingRecHitHost; +using TrackingRecHitHostHIonPhase1 = TrackingRecHitHost; + +#endif // DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsHost_h diff --git a/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h new file mode 100644 index 0000000000000..7e45a75043951 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h @@ -0,0 +1,55 @@ +#ifndef DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsSoA_h +#define DataFormats_TrackingRecHitSoA_interface_TrackingRecHitsSoA_h + +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +struct TrackingRecHitSoA { + using hindex_type = typename TrackerTraits::hindex_type; + using PhiBinner = cms::alpakatools::HistoContainer; //28 for phase2 geometry + using PhiBinnerView = typename PhiBinner::View; + using PhiBinnerStorageType = typename PhiBinner::index_type; + using AverageGeometry = pixelTopology::AverageGeometryT; + using HitLayerStartArray = std::array; + using HitModuleStartArray = std::array; + + GENERATE_SOA_LAYOUT(Layout, + SOA_COLUMN(float, xLocal), + SOA_COLUMN(float, yLocal), + SOA_COLUMN(float, xerrLocal), + SOA_COLUMN(float, yerrLocal), + SOA_COLUMN(float, xGlobal), + SOA_COLUMN(float, yGlobal), + SOA_COLUMN(float, zGlobal), + SOA_COLUMN(float, rGlobal), + SOA_COLUMN(int16_t, iphi), + SOA_COLUMN(SiPixelHitStatusAndCharge, chargeAndStatus), + SOA_COLUMN(int16_t, clusterSizeX), + SOA_COLUMN(int16_t, clusterSizeY), + SOA_COLUMN(uint16_t, detectorIndex), + SOA_SCALAR(int32_t, offsetBPIX2), + SOA_COLUMN(PhiBinnerStorageType, phiBinnerStorage), + SOA_SCALAR(HitModuleStartArray, hitsModuleStart), + SOA_SCALAR(HitLayerStartArray, hitsLayerStart), + SOA_SCALAR(AverageGeometry, averageGeometry), + SOA_SCALAR(PhiBinner, phiBinner)); +}; + +template +using TrackingRecHitLayout = typename TrackingRecHitSoA::template Layout<>; +template +using TrackingRecHitSoAView = typename TrackingRecHitSoA::template Layout<>::View; +template +using TrackingRecHitSoAConstView = typename TrackingRecHitSoA::template Layout<>::ConstView; + +#endif diff --git a/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h new file mode 100644 index 0000000000000..0e0e848afcfd9 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h @@ -0,0 +1,46 @@ +#ifndef DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection +#define DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection + +#include +#include +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + using TrackingRecHitsSoACollection = std::conditional_t, + TrackingRecHitHost, + TrackingRecHitDevice>; + + //Classes definition for Phase1/Phase2, to make the classes_def lighter. Not actually used in the code. + using TrackingRecHitSoAPhase1 = TrackingRecHitsSoACollection; + using TrackingRecHitSoAPhase2 = TrackingRecHitsSoACollection; + using TrackingRecHitSoAHIonPhase1 = TrackingRecHitsSoACollection; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, TrackingRecHitDevice const& deviceData) { + TrackingRecHitHost hostData(deviceData.view().metadata().size(), queue); + alpaka::memcpy(queue, hostData.buffer(), deviceData.buffer()); +#ifdef GPU_DEBUG + printf("TrackingRecHitsSoACollection: I'm copying to host.\n"); +#endif + return hostData; + } + }; +} // namespace cms::alpakatools + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase1, TrackingRecHitHostPhase1); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAPhase2, TrackingRecHitHostPhase2); +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(TrackingRecHitSoAHIonPhase1, TrackingRecHitHostHIonPhase1); + +#endif // DataFormats_RecHits_interface_alpakaTrackingRecHitsSoACollection \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h new file mode 100644 index 0000000000000..402be81b7081e --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h +#define DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_alpaka_classes_cuda_h diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml new file mode 100644 index 0000000000000..80c267b57d585 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_cuda_def.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h new file mode 100644 index 0000000000000..6af162021dd47 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm.h @@ -0,0 +1,12 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h +#define DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h + +#include "DataFormats/Common/interface/DeviceProduct.h" +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_alpaka_classes_rocm_h diff --git a/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml new file mode 100644 index 0000000000000..bc4c969137121 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/alpaka/classes_rocm_def.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/src/classes.cc b/DataFormats/TrackingRecHitSoA/src/classes.cc new file mode 100644 index 0000000000000..58167c21cef4f --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes.cc @@ -0,0 +1,7 @@ +#include "DataFormats/Portable/interface/PortableHostCollectionReadRules.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); +SET_PORTABLEHOSTCOLLECTION_READ_RULES(PortableHostCollection>); \ No newline at end of file diff --git a/DataFormats/TrackingRecHitSoA/src/classes.h b/DataFormats/TrackingRecHitSoA/src/classes.h new file mode 100644 index 0000000000000..d405a88ed6ace --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes.h @@ -0,0 +1,11 @@ +#ifndef DataFormats_TrackingRecHitSoA_src_classes_h +#define DataFormats_TrackingRecHitSoA_src_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using namespace pixelTopology; + +#endif // DataFormats_TrackingRecHitSoA_src_classes_h diff --git a/DataFormats/TrackingRecHitSoA/src/classes_def.xml b/DataFormats/TrackingRecHitSoA/src/classes_def.xml new file mode 100644 index 0000000000000..f3107e8587327 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/src/classes_def.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/BuildFile.xml b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml new file mode 100644 index 0000000000000..5b61a3460fb7d --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/BuildFile.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc new file mode 100644 index 0000000000000..378bb95db7b30 --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.cc @@ -0,0 +1,47 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/devices.h" +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +#include +#include + +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace testTrackingRecHitSoA { + + template + void runKernels(TrackingRecHitSoAView& hits, Queue& queue); + + } +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +int main() { + const auto host = cms::alpakatools::host(); + const auto device = cms::alpakatools::devices()[0]; + Queue queue(device); + + // inner scope to deallocate memory before destroying the queue + { + uint32_t nHits = 2000; + int32_t offset = 100; + uint32_t moduleStart[pixelTopology::Phase1::numberOfModules + 1]; + + for (size_t i = 0; i < pixelTopology::Phase1::numberOfModules + 1; i++) { + moduleStart[i] = i * 2; + } + TrackingRecHitsSoACollection tkhit(nHits, offset, &moduleStart[0], queue); + + testTrackingRecHitSoA::runKernels(tkhit.view(), queue); + alpaka::wait(queue); + } + return 0; +} diff --git a/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc new file mode 100644 index 0000000000000..79d8bd69cbc3a --- /dev/null +++ b/DataFormats/TrackingRecHitSoA/test/alpaka/Hits_test.dev.cc @@ -0,0 +1,65 @@ +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" + +using namespace alpaka; + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + using namespace cms::alpakatools; + namespace testTrackingRecHitSoA { + + template + class TestFillKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitSoAView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + soa.offsetBPIX2() = 22; + soa[10].xLocal() = 1.11; + } + + soa[i].iphi() = i % 10; + soa.hitsLayerStart()[j] = j; + } + }; + + template + class ShowKernel { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, TrackingRecHitSoAConstView soa) const { + const uint32_t i(alpaka::getIdx(acc)[0u]); + const uint32_t j(alpaka::getIdx(acc)[0u]); + + if (i == 0 and j == 0) { + printf("nbins = %d \n", soa.phiBinner().nbins()); + printf("offsetBPIX %d ->%d \n", i, soa.offsetBPIX2()); + printf("nHits %d ->%d \n", i, soa.metadata().size()); + //printf("hitsModuleStart %d ->%d \n", i, soa.hitsModuleStart().at(28)); + } + + if (i < 10) // can be increased to soa.nHits() for debugging + printf("iPhi %d ->%d \n", i, soa[i].iphi()); + } + }; + + template + void runKernels(TrackingRecHitSoAView& view, Queue& queue) { + uint32_t items = 64; + uint32_t groups = divide_up_by(view.metadata().size(), items); + auto workDiv = make_workdiv(groups, items); + alpaka::exec(queue, workDiv, TestFillKernel{}, view); + alpaka::exec(queue, workDiv, ShowKernel{}, view); + } + + template void runKernels(TrackingRecHitSoAView& view, Queue& queue); + template void runKernels(TrackingRecHitSoAView& view, Queue& queue); + + } // namespace testTrackingRecHitSoA +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 212738e941533..87123219d44e4 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -2,6 +2,7 @@ + diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc new file mode 100644 index 0000000000000..ab762b8f4d97c --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsFromSoAAlpaka.cc @@ -0,0 +1,130 @@ +#include + +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/DetId/interface/DetIdCollection.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsHost.h" + +class SiPixelDigiErrorsFromSoAAlpaka : public edm::stream::EDProducer<> { +public: + explicit SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigiErrorsFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::ESGetToken cablingToken_; + const edm::EDGetTokenT digiErrorsSoAGetToken_; + const edm::EDGetTokenT fmtErrorsGetToken_; + const edm::EDPutTokenT> errorPutToken_; + const edm::EDPutTokenT tkErrorPutToken_; + const edm::EDPutTokenT userErrorPutToken_; + const edm::EDPutTokenT> disabledChannelPutToken_; + + edm::ESWatcher cablingWatcher_; + std::unique_ptr cabling_; + + const std::vector tkerrorlist_; + const std::vector usererrorlist_; + + const bool usePhase1_; +}; + +SiPixelDigiErrorsFromSoAAlpaka::SiPixelDigiErrorsFromSoAAlpaka(const edm::ParameterSet& iConfig) + : cablingToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + digiErrorsSoAGetToken_{consumes(iConfig.getParameter("digiErrorSoASrc"))}, + fmtErrorsGetToken_{consumes(iConfig.getParameter("fmtErrorsSoASrc"))}, + errorPutToken_{produces>()}, + tkErrorPutToken_{produces()}, + userErrorPutToken_{produces("UserErrorModules")}, + disabledChannelPutToken_{produces>()}, + tkerrorlist_(iConfig.getParameter>("ErrorList")), + usererrorlist_(iConfig.getParameter>("UserErrorList")), + usePhase1_(iConfig.getParameter("UsePhase1")) {} + +void SiPixelDigiErrorsFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("digiErrorSoASrc", edm::InputTag("siPixelDigis")); + desc.add("fmtErrorsSoASrc", edm::InputTag("siPixelDigis")); + // the configuration parameters here are named following those in SiPixelRawToDigi + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); + desc.add("UsePhase1", false)->setComment("## Use phase1"); + desc.add>("ErrorList", std::vector{29}) + ->setComment("## ErrorList: list of error codes used by tracking to invalidate modules"); + desc.add>("UserErrorList", std::vector{40}) + ->setComment("## UserErrorList: list of error codes used by Pixel experts for investigation"); + descriptions.addWithDefaultLabel(desc); +} + +void SiPixelDigiErrorsFromSoAAlpaka::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // pack errors into collection + + // initialize cabling map or update if necessary + if (cablingWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + const SiPixelFedCablingMap* cablingMap = &iSetup.getData(cablingToken_); + cabling_ = cablingMap->cablingTree(); + LogDebug("map version:") << cabling_->version(); + } + + const auto& digiErrors = iEvent.get(digiErrorsSoAGetToken_); + const auto& formatterErrors = iEvent.get(fmtErrorsGetToken_); + + edm::DetSetVector errorcollection{}; + DetIdCollection tkerror_detidcollection{}; + DetIdCollection usererror_detidcollection{}; + edmNew::DetSetVector disabled_channelcollection{}; + + PixelDataFormatter formatter(cabling_.get(), usePhase1_); // for phase 1 & 0 + auto errors = formatterErrors; // make a copy + PixelDataFormatter::DetErrors nodeterrors; + + // if (digiErrors.view().size() > 0) { // TODO: need to know if this size will be useful or not and how to use it + uint32_t size = digiErrors.view().metadata().size(); + for (auto i = 0U; i < size; i++) { + SiPixelErrorCompact err = digiErrors.view()[i].pixelErrors(); + if (err.errorType != 0) { + SiPixelRawDataError error(err.word, err.errorType, err.fedId + FEDNumbering::MINSiPixeluTCAFEDID); + errors[err.rawId].push_back(error); + } + } + // } + + formatter.unpackFEDErrors(errors, + tkerrorlist_, + usererrorlist_, + errorcollection, + tkerror_detidcollection, + usererror_detidcollection, + disabled_channelcollection, + nodeterrors); + + const uint32_t dummydetid = 0xffffffff; + edm::DetSet& errorDetSet = errorcollection.find_or_insert(dummydetid); + errorDetSet.data = nodeterrors; + + iEvent.emplace(errorPutToken_, std::move(errorcollection)); + iEvent.emplace(tkErrorPutToken_, std::move(tkerror_detidcollection)); + iEvent.emplace(userErrorPutToken_, std::move(usererror_detidcollection)); + iEvent.emplace(disabledChannelPutToken_, std::move(disabled_channelcollection)); +} + +DEFINE_FWK_MODULE(SiPixelDigiErrorsFromSoAAlpaka); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 5b23f2dbda104..67b1b519d4089 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -26,16 +26,16 @@ class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer> digiGetToken_; - edm::EDPutTokenT digiPutToken_; + edm::EDPutTokenT digiPutToken_; - cms::cuda::PortableHostCollection> digis_h_; + cms::cuda::PortableHostCollection digis_h_; int nDigis_; }; SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) : digiGetToken_(consumes>(iConfig.getParameter("src"))), - digiPutToken_(produces()) {} + digiPutToken_(produces()) {} void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; @@ -52,8 +52,7 @@ void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const auto& digis_d = ctx.get(iEvent, digiGetToken_); nDigis_ = digis_d.nDigis(); - nDigis_ = digis_d.nDigis(); - digis_h_ = cms::cuda::PortableHostCollection>(digis_d.view().metadata().size(), ctx.stream()); + digis_h_ = cms::cuda::PortableHostCollection(digis_d.view().metadata().size(), ctx.stream()); cudaCheck(cudaMemcpyAsync(digis_h_.buffer().get(), digis_d.const_buffer().get(), digis_d.bufferSize(), diff --git a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py index b5484afd2fafa..f5139f1cb418b 100644 --- a/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py +++ b/EventFilter/SiPixelRawToDigi/python/siPixelDigis_cff.py @@ -23,6 +23,12 @@ from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoA_cfi import siPixelDigiErrorsFromSoA as _siPixelDigiErrorsFromSoA siPixelDigiErrors = _siPixelDigiErrorsFromSoA.clone() +# Alpaka modifier +from Configuration.ProcessModifiers.alpaka_cff import alpaka +from EventFilter.SiPixelRawToDigi.siPixelDigiErrorsFromSoAAlpaka_cfi import siPixelDigiErrorsFromSoAAlpaka as _siPixelDigiErrorsFromSoAAlpaka + +alpaka.toReplaceWith(siPixelDigiErrors, _siPixelDigiErrorsFromSoAAlpaka.clone()) + # use the Phase 1 settings from Configuration.Eras.Modifier_phase1Pixel_cff import phase1Pixel phase1Pixel.toModify(siPixelDigiErrors, diff --git a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h index e02f4e92f813e..ad950999517f4 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h +++ b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h @@ -806,6 +806,210 @@ namespace cms::alpakatools { return alpaka::getIdx(acc) == Vec>::zeros(); } + /********************************************* + * RANGE COMPUTATION + ********************************************/ + + /* + * Computes the range of the elements indexes, local to the block. + * Warning: the max index is not truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_block(const TAcc& acc, + const Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Take into account the thread index in block. + const Idx threadIdxLocal(alpaka::getIdx(acc)[dimIndex]); + const Idx threadDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Compute the elements indexes in block. + // Obviously relevant for CPU only. + // For GPU, threadDimension == 1, and elementIdx == firstElementIdx == threadIdx + elementIdxShift. + const Idx firstElementIdxLocal = threadIdxLocal * threadDimension; + const Idx firstElementIdx = firstElementIdxLocal + elementIdxShift; // Add the shift! + const Idx endElementIdxUncut = firstElementIdx + threadDimension; + + // Return element indexes, shifted by elementIdxShift. + return {firstElementIdx, endElementIdxUncut}; + } + + /* + * Computes the range of the elements indexes, local to the block. + * Truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_block_truncated(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Check dimension + //static_assert(alpaka::Dim::value == Dim1::value, + // "Accelerator and maxNumberOfElements need to have same dimension."); + auto [firstElementIdxLocal, endElementIdxLocal] = element_index_range_in_block(acc, elementIdxShift, dimIndex); + + // Truncate + endElementIdxLocal = std::min(endElementIdxLocal, maxNumberOfElements); + + // Return element indexes, shifted by elementIdxShift, and truncated by maxNumberOfElements. + return {firstElementIdxLocal, endElementIdxLocal}; + } + + /* + * Computes the range of the elements indexes in grid. + * Warning: the max index is not truncated by the max number of elements of interest. + */ + template + ALPAKA_FN_ACC std::pair element_index_range_in_grid(const TAcc& acc, + Idx elementIdxShift, + const unsigned int dimIndex = 0u) { + // Take into account the block index in grid. + const Idx blockIdxInGrid(alpaka::getIdx(acc)[dimIndex]); + const Idx blockDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Shift to get global indices in grid (instead of local to the block) + elementIdxShift += blockIdxInGrid * blockDimension; + + // Return element indexes, shifted by elementIdxShift. + return element_index_range_in_block(acc, elementIdxShift, dimIndex); + } + + /* + * Loop on all (CPU) elements. + * Elements loop makes sense in CPU case only. In GPU case, elementIdx = firstElementIdx = threadIdx + shift. + * Indexes are local to the BLOCK. + */ + template + ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + const auto& [firstElementIdx, endElementIdx] = + element_index_range_in_block_truncated(acc, maxNumberOfElements, elementIdxShift, dimIndex); + + for (Idx elementIdx = firstElementIdx; elementIdx < endElementIdx; ++elementIdx) { + func(elementIdx); + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_block(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_block(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + + /************************************************************** + * LOOP ON ALL ELEMENTS WITH ONE LOOP + **************************************************************/ + + /* + * Case where the input index i has reached the end of threadDimension: strides the input index. + * Otherwise: do nothing. + * NB 1: This helper function is used as a trick to only have one loop (like in legacy), instead of 2 loops + * (like in all the other Alpaka helpers, 'for_each_element_in_block_strided' for example, + * because of the additional loop over elements in Alpaka model). + * This allows to keep the 'continue' and 'break' statements as-is from legacy code, + * and hence avoids a lot of legacy code reshuffling. + * NB 2: Modifies i, firstElementIdx and endElementIdx. + */ + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool next_valid_element_index_strided( + Idx& i, Idx& firstElementIdx, Idx& endElementIdx, const Idx stride, const Idx maxNumberOfElements) { + bool isNextStrideElementValid = true; + if (i == endElementIdx) { + firstElementIdx += stride; + endElementIdx += stride; + i = firstElementIdx; + if (i >= maxNumberOfElements) { + isNextStrideElementValid = false; + } + } + return isNextStrideElementValid; + } + + template + ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + // Get thread / element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + element_index_range_in_block(acc, elementIdxShift, dimIndex); + + // Stride = block size. + const Idx blockDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Strided access. + for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride; + threadIdx < maxNumberOfElements; + threadIdx += blockDimension, endElementIdx += blockDimension) { + // (CPU) Loop on all elements. + if (endElementIdx > maxNumberOfElements) { + endElementIdx = maxNumberOfElements; + } + for (Idx i = threadIdx; i < endElementIdx; ++i) { + func(i); + } + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_block_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_block_strided(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + + template + ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Idx elementIdxShift, + const Func func, + const unsigned int dimIndex = 0) { + // Get thread / element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + element_index_range_in_grid(acc, elementIdxShift, dimIndex); + + // Stride = grid size. + const Idx gridDimension(alpaka::getWorkDiv(acc)[dimIndex]); + + // Strided access. + for (Idx threadIdx = firstElementIdxNoStride, endElementIdx = endElementIdxNoStride; + threadIdx < maxNumberOfElements; + threadIdx += gridDimension, endElementIdx += gridDimension) { + // (CPU) Loop on all elements. + if (endElementIdx > maxNumberOfElements) { + endElementIdx = maxNumberOfElements; + } + for (Idx i = threadIdx; i < endElementIdx; ++i) { + func(i); + } + } + } + + /* + * Overload for elementIdxShift = 0 + */ + template + ALPAKA_FN_ACC void for_each_element_in_grid_strided(const TAcc& acc, + const Idx maxNumberOfElements, + const Func func, + const unsigned int dimIndex = 0) { + const Idx elementIdxShift = 0; + for_each_element_in_grid_strided(acc, maxNumberOfElements, elementIdxShift, func, dimIndex); + } + } // namespace cms::alpakatools #endif // HeterogeneousCore_AlpakaInterface_interface_workdivision_h diff --git a/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h new file mode 100644 index 0000000000000..971e74f4cd683 --- /dev/null +++ b/RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h @@ -0,0 +1,27 @@ +#ifndef RecoLocalTracker_Records_PixelCPEFastParamsRecord_h +#define RecoLocalTracker_Records_PixelCPEFastParamsRecord_h + +#include "FWCore/Framework/interface/EventSetupRecordImplementation.h" +#include "FWCore/Framework/interface/DependentRecordImplementation.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/IdealGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "CondFormats/DataRecord/interface/SiPixelLorentzAngleRcd.h" +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "CalibTracker/Records/interface/SiPixelTemplateDBObjectESProducerRcd.h" +#include "CalibTracker/Records/interface/SiPixel2DTemplateDBObjectESProducerRcd.h" + +#include "FWCore/Utilities/interface/mplVector.h" + +class PixelCPEFastParamsRecord + : public edm::eventsetup::DependentRecordImplementation > {}; + +#endif // RecoLocalTracker_Records_PixelCPEFastParamsRecord_h diff --git a/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc new file mode 100644 index 0000000000000..1410d7c1e66bf --- /dev/null +++ b/RecoLocalTracker/Records/src/PixelCPEFastParamsRecord.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/Utilities/interface/typelookup.h" + +EVENTSETUP_RECORD_REG(PixelCPEFastParamsRecord); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h similarity index 82% rename from RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h rename to RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h index f9ebb16ea2c7c..c224483bda40a 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h @@ -1,5 +1,9 @@ -#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h -#define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h +#ifndef RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h +#define RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h + +/* This struct is an implementation detail of this package. + * It's in the interface directory because it needs to be shared by the legacy, CUDA, and Alpaka plugins. + */ struct SiPixelClusterThresholds { inline constexpr int32_t getThresholdForLayerOnCondition(bool isLayer1) const noexcept { @@ -51,4 +55,4 @@ struct SiPixelClusterThresholds { phase2KinkADC(phase2KinkADC) {} }; -#endif // RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelClusterThresholds_h +#endif // RecoLocalTracker_SiPixelClusterizer_interface_SiPixelClusterThresholds_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml index 1bc0c60a0d298..83bdae62636e0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/BuildFile.xml @@ -1,16 +1,20 @@ - - + + + - + + + + @@ -18,3 +22,14 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc index 0bf734b6cd589..820b6b237c7e5 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoA.cc @@ -14,13 +14,14 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "Geometry/Records/interface/TrackerTopologyRcd.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // local include(s) #include "PixelClusterizerBase.h" -#include "SiPixelClusterThresholds.h" +// #define GPU_DEBUG template class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { public: @@ -34,7 +35,7 @@ class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { const edm::ESGetToken topoToken_; - edm::EDGetTokenT digiGetToken_; + edm::EDGetTokenT digiGetToken_; edm::EDPutTokenT> digiPutToken_; edm::EDPutTokenT clusterPutToken_; @@ -48,7 +49,7 @@ class SiPixelDigisClustersFromSoAT : public edm::global::EDProducer<> { template SiPixelDigisClustersFromSoAT::SiPixelDigisClustersFromSoAT(const edm::ParameterSet& iConfig) : topoToken_(esConsumes()), - digiGetToken_(consumes(iConfig.getParameter("src"))), + digiGetToken_(consumes(iConfig.getParameter("src"))), clusterPutToken_(produces()), clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), iConfig.getParameter("clusterThreshold_otherLayers")), @@ -122,7 +123,7 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, for (int32_t ic = 0; ic < nclus + 1; ++ic) { auto const& acluster = aclusters[ic]; // in any case we cannot go out of sync with gpu... - if (!std::is_base_of::value and acluster.charge < clusterThreshold) + if (acluster.charge < clusterThreshold) edm::LogWarning("SiPixelDigisClustersFromSoA") << "cluster below charge Threshold " << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' << acluster.charge; @@ -148,6 +149,10 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, spc.abort(); }; +#ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = " << nDigis << std::endl; +#endif + for (uint32_t i = 0; i < nDigis; i++) { // check for uninitialized digis if (digis.rawIdArr(i) == 0) @@ -161,6 +166,9 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, assert(digis.rawIdArr(i) > 109999); #endif if (detId != digis.rawIdArr(i)) { +#ifdef GPU_DEBUG + std::cout << ">> Closed module --" << detId << "; nclus = " << nclus << std::endl; +#endif // new module fillClusters(detId); #ifdef EDM_ML_DEBUG @@ -178,6 +186,12 @@ void SiPixelDigisClustersFromSoAT::produce(edm::StreamID, } } PixelDigi dig(digis.pdigi(i)); + +#ifdef GPU_DEBUG + std::cout << i << ";" << digis.rawIdArr(i) << ";" << digis.clus(i) << ";" << digis.pdigi(i) << ";" << digis.adc(i) + << ";" << dig.row() << ";" << dig.column() << std::endl; +#endif + if (storeDigis_) (*detDigis).data.emplace_back(dig); // fill clusters diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc new file mode 100644 index 0000000000000..ad05ad3ff60c9 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelDigisClustersFromSoAAlpaka.cc @@ -0,0 +1,240 @@ +#include + +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/DetId/interface/DetId.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisHost.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// local include(s) +#include "PixelClusterizerBase.h" + +// #define EDM_ML_DEBUG +// #define GPU_DEBUG +template +class SiPixelDigisClustersFromSoAAlpaka : public edm::global::EDProducer<> { +public: + explicit SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelDigisClustersFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + edm::ESGetToken const topoToken_; + edm::EDGetTokenT const digisHostToken_; + const SiPixelClusterThresholds clusterThresholds_; // Cluster threshold in electrons + const bool produceDigis_; + const bool storeDigis_; + + edm::EDPutTokenT> digisPutToken_; + edm::EDPutTokenT clustersPutToken_; +}; + +template +SiPixelDigisClustersFromSoAAlpaka::SiPixelDigisClustersFromSoAAlpaka(const edm::ParameterSet& iConfig) + : topoToken_(esConsumes()), + digisHostToken_(consumes(iConfig.getParameter("src"))), + clusterThresholds_(iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers")), + produceDigis_(iConfig.getParameter("produceDigis")), + storeDigis_(produceDigis_ && iConfig.getParameter("storeDigis")), + clustersPutToken_(produces()) { + if (produceDigis_) + digisPutToken_ = produces>(); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("src", edm::InputTag("siPixelDigisSoA")); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("produceDigis", true); + desc.add("storeDigis", true); + + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelDigisClustersFromSoAAlpaka::produce(edm::StreamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + const auto& digisHost = iEvent.get(digisHostToken_); + const auto& digisView = digisHost.const_view(); + const uint32_t nDigis = digisHost.nDigis(); + + const auto& ttopo = iSetup.getData(topoToken_); + constexpr auto maxModules = TrackerTraits::numberOfModules; + + std::unique_ptr> outputDigis; + if (produceDigis_) + outputDigis = std::make_unique>(); + if (storeDigis_) + outputDigis->reserve(maxModules); + auto outputClusters = std::make_unique(); + outputClusters->reserve(maxModules, nDigis / 2); + + edm::DetSet* detDigis = nullptr; + uint32_t detId = 0; + + for (uint32_t i = 0; i < nDigis; i++) { + // check for uninitialized digis + // this is set in RawToDigi_kernel in SiPixelRawToClusterGPUKernel.cu + if (digisView[i].rawIdArr() == 0) + continue; + + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + + detId = digisView[i].rawIdArr(); + if (storeDigis_) { + detDigis = &outputDigis->find_or_insert(detId); + + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + } + + break; + } + + int32_t nclus = -1; + PixelClusterizerBase::AccretionCluster aclusters[TrackerTraits::maxNumClustersPerModules]; +#ifdef EDM_ML_DEBUG + auto totClustersFilled = 0; +#endif + + auto fillClusters = [&](uint32_t detId) { + if (nclus < 0) + return; // this in reality should never happen + edmNew::DetSetVector::FastFiller spc(*outputClusters, detId); + auto layer = (DetId(detId).subdetId() == 1) ? ttopo.pxbLayer(detId) : 0; + auto clusterThreshold = clusterThresholds_.getThresholdForLayerOnCondition(layer == 1); + for (int32_t ic = 0; ic < nclus + 1; ++ic) { + auto const& acluster = aclusters[ic]; + // in any case we cannot go out of sync with gpu... + if (acluster.charge < clusterThreshold) + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") + << "cluster below charge Threshold " + << "Layer/DetId/clusId " << layer << '/' << detId << '/' << ic << " size/charge " << acluster.isize << '/' + << acluster.charge << "\n"; + // sort by row (x) + spc.emplace_back(acluster.isize, acluster.adc, acluster.x, acluster.y, acluster.xmin, acluster.ymin, ic); + aclusters[ic].clear(); +#ifdef EDM_ML_DEBUG + ++totClustersFilled; + const auto& cluster{spc.back()}; + // LogDebug("SiPixelDigisClustersFromSoAAlpaka") + std::cout << "putting in this cluster " << ic << " " << cluster.charge() << " " << cluster.pixelADC().size() + << "\n"; +#endif + std::push_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + } + nclus = -1; + // sort by row (x) + std::sort_heap(spc.begin(), spc.end(), [](SiPixelCluster const& cl1, SiPixelCluster const& cl2) { + return cl1.minPixelRow() < cl2.minPixelRow(); + }); + if (spc.empty()) + spc.abort(); + }; + +#ifdef GPU_DEBUG + std::cout << "Dumping all digis. nDigis = " << nDigis << std::endl; +#endif + for (uint32_t i = 0; i < nDigis; i++) { +#ifdef GPU_DEBUG + PixelDigi dig2{digisView[i].pdigi()}; + std::cout << i << ";" << digisView[i].rawIdArr() << ";" << digisView[i].clus() << ";" << digisView[i].pdigi() << ";" + << digisView[i].adc() << ";" << dig2.row() << ";" << dig2.column() << std::endl; +#endif + + // check for uninitialized digis + if (digisView[i].rawIdArr() == 0) + continue; + // check for noisy/dead pixels (electrons set to 0) + if (digisView[i].adc() == 0) + continue; + if (digisView[i].clus() >= -pixelClustering::invalidClusterId) + continue; // not in cluster; TODO add an assert for the size + if (digisView[i].clus() == pixelClustering::invalidModuleId) + continue; // from clusters killed by charge cut +#ifdef EDM_ML_DEBUG + assert(digisView[i].rawIdArr() > 109999); +#endif + if (detId != digisView[i].rawIdArr()) { +#ifdef GPU_DEBUG + std::cout << ">> Closed module --" << detId << "; nclus = " << nclus << std::endl; +#endif + // new module + fillClusters(detId); +#ifdef EDM_ML_DEBUG + assert(nclus == -1); +#endif + detId = digisView[i].rawIdArr(); + if (storeDigis_) { + detDigis = &outputDigis->find_or_insert(detId); + if ((*detDigis).empty()) + (*detDigis).data.reserve(64); // avoid the first relocations + else { + edm::LogWarning("SiPixelDigisClustersFromSoAAlpaka") + << "Problem det present twice in input! " << (*detDigis).detId(); + } + } + } + PixelDigi dig{digisView[i].pdigi()}; + + if (storeDigis_) + (*detDigis).data.emplace_back(dig); + // fill clusters +#ifdef EDM_ML_DEBUG + assert(digisView[i].clus() >= 0); + assert(digisView[i].clus() < static_cast(TrackerTraits::maxNumClustersPerModules)); +#endif + nclus = std::max(digisView[i].clus(), nclus); + auto row = dig.row(); + auto col = dig.column(); + SiPixelCluster::PixelPos pix(row, col); + aclusters[digisView[i].clus()].add(pix, digisView[i].adc()); + } + + // fill final clusters + if (detId > 0) + fillClusters(detId); + +#ifdef EDM_ML_DEBUG + LogDebug("SiPixelDigisClustersFromSoAAlpaka") << "filled " << totClustersFilled << " clusters"; +#endif + + if (produceDigis_) + iEvent.put(digisPutToken_, std::move(outputDigis)); + + iEvent.put(clustersPutToken_, std::move(outputClusters)); +} + +#include "FWCore/Framework/interface/MakerMacros.h" + +using SiPixelDigisClustersFromSoAAlpakaPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase1); + +using SiPixelDigisClustersFromSoAAlpakaPhase2 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaPhase2); + +using SiPixelDigisClustersFromSoAAlpakaHIonPhase1 = SiPixelDigisClustersFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelDigisClustersFromSoAAlpakaHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc index e68c8074d8535..e270d31515842 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelPhase2DigiToClusterCUDA.cc @@ -9,37 +9,20 @@ #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" -#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" #include "DataFormats/FEDRawData/interface/FEDNumbering.h" -#include "DataFormats/FEDRawData/interface/FEDRawData.h" -#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" #include "FWCore/Framework/interface/ConsumesCollector.h" -#include "FWCore/Framework/interface/ESHandle.h" -#include "FWCore/Framework/interface/ESTransientHandle.h" -#include "FWCore/Framework/interface/ESWatcher.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" -#include "FWCore/ServiceRegistry/interface/Service.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" -#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" -#include "RecoTracker/Record/interface/CkfComponentsRecord.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // local includes -#include "SiPixelClusterThresholds.h" #include "SiPixelRawToClusterGPUKernel.h" class SiPixelPhase2DigiToClusterCUDA : public edm::stream::EDProducer { @@ -176,4 +159,5 @@ void SiPixelPhase2DigiToClusterCUDA::produce(edm::Event& iEvent, const edm::Even } // define as framework plugin +#include "FWCore/Framework/interface/MakerMacros.h" DEFINE_FWK_MODULE(SiPixelPhase2DigiToClusterCUDA); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index e426661eb3c33..0a763793d35fd 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -10,8 +10,8 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTGPURcd.h" -#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelROCsStatusAndMappingWrapper.h" #include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" @@ -36,10 +36,10 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" // local includes -#include "SiPixelClusterThresholds.h" #include "SiPixelRawToClusterGPUKernel.h" template diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 7b92dfc267e79..56718b4bdae14 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -26,11 +26,12 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" + // local includes #include "SiPixelRawToClusterGPUKernel.h" +#include "gpuCalibPixel.h" +#include "gpuClusterChargeCut.h" +#include "gpuClustering.h" // #define GPU_DEBUG @@ -288,7 +289,7 @@ namespace pixelgpudetails { const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds, - SiPixelDigisCUDASOAView digisView, + SiPixelDigisSoA::View digisView, cms::cuda::SimpleVector *err, bool useQualityInfo, bool includeErrors) { diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 802ad2eb42c7e..06b30da68c8cd 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -2,25 +2,24 @@ #define RecoLocalTracker_SiPixelClusterizer_plugins_SiPixelRawToClusterGPUKernel_h #include + #include +#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" +#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" #include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" #include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" -#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" -#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "FWCore/Utilities/interface/typedefs.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" -#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_noncached_unique_ptr.h" -#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" // #define GPU_DEBUG -// local include(s) -#include "SiPixelClusterThresholds.h" - struct SiPixelROCsStatusAndMapping; class SiPixelGainForHLTonGPU; @@ -131,6 +130,14 @@ namespace pixelgpudetails { digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); assert(nModules_Clusters_h[2] <= nModules_Clusters_h[1]); clusters_d.setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + +#ifdef GPU_DEBUG + std::cout << "SiPixelClusterizerCUDA results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif // need to explicitly deallocate while the associated CUDA // stream is still alive // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h new file mode 100644 index 0000000000000..ff885b5bad07f --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/CalibPixel.h @@ -0,0 +1,136 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h +#define RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h + +#include +#include +#include +#include + +#include + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTUtilities.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG + +namespace calibPixel { + using namespace cms::alpakatools; + + constexpr uint16_t InvId = std::numeric_limits::max() - 1; + // must be > MaxNumModules + + struct CalibDigis { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView view, + SiPixelClustersSoAView clus_view, + const SiPixelGainCalibrationForHLTSoAConstView gains, + int numElements) const { + const float VCaltoElectronGain = clusterThresholds.vCaltoElectronGain; + const float VCaltoElectronGain_L1 = clusterThresholds.vCaltoElectronGain_L1; + const float VCaltoElectronOffset = clusterThresholds.vCaltoElectronOffset; + const float VCaltoElectronOffset_L1 = clusterThresholds.vCaltoElectronOffset_L1; + + // zero for next kernels... + if (cms::alpakatools::once_per_grid(acc)) { + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + } + + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase1PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (dvgi.moduleId() != InvId) { + bool isDeadColumn = false, isNoisyColumn = false; + int row = dvgi.xx(); + int col = dvgi.yy(); + auto ret = SiPixelGainUtilities::getPedAndGain(gains, dvgi.moduleId(), col, row, isDeadColumn, isNoisyColumn); + float pedestal = ret.first; + float gain = ret.second; + if (isDeadColumn | isNoisyColumn) { + dvgi.moduleId() = InvId; + dvgi.adc() = 0; + printf("bad pixel at %d in %d\n", i, dvgi.moduleId()); + } else { + float vcal = dvgi.adc() * gain - pedestal * gain; + + float conversionFactor = dvgi.moduleId() < 96 ? VCaltoElectronGain_L1 : VCaltoElectronGain; + float offset = dvgi.moduleId() < 96 ? VCaltoElectronOffset_L1 : VCaltoElectronOffset; +#ifdef GPU_DEBUG + auto old_adc = dvgi.adc(); +#endif + dvgi.adc() = std::max(100, int(vcal * conversionFactor + offset)); +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf( + "module %d pixel %d -> old_adc = %d; vcal = %.2f; conversionFactor = %.2f; offset = %.2f; new_adc = " + "%d \n", + dvgi.moduleId(), + i, + old_adc, + vcal, + conversionFactor, + offset, + dvgi.adc()); + } +#endif + } + } + }); + } + }; + struct CalibDigisPhase2 { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView view, + SiPixelClustersSoAView clus_view, + int numElements) const { + const float ElectronPerADCGain = clusterThresholds.electronPerADCGain; + const int8_t Phase2ReadoutMode = clusterThresholds.phase2ReadoutMode; + const uint16_t Phase2DigiBaseline = clusterThresholds.phase2DigiBaseline; + const uint8_t Phase2KinkADC = clusterThresholds.phase2KinkADC; + + // zero for next kernels... + if (cms::alpakatools::once_per_grid(acc)) { + clus_view[0].clusModuleStart() = clus_view[0].moduleStart() = 0; + } + + cms::alpakatools::for_each_element_in_grid_strided( + acc, phase2PixelTopology::numberOfModules, [&](uint32_t i) { clus_view[i].clusInModule() = 0; }); + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + auto dvgi = view[i]; + if (pixelClustering::invalidModuleId != dvgi.moduleId()) { + const int mode = (Phase2ReadoutMode < -1 ? -1 : Phase2ReadoutMode); + int adc_int = dvgi.adc(); + if (mode < 0) + adc_int = int(adc_int * ElectronPerADCGain); + else { + if (adc_int < Phase2KinkADC) + adc_int = int((adc_int + 0.5) * ElectronPerADCGain); + else { + const int8_t dspp = (Phase2ReadoutMode < 10 ? Phase2ReadoutMode : 10); + const int8_t ds = int8_t(dspp <= 1 ? 1 : (dspp - 1) * (dspp - 1)); + adc_int -= Phase2KinkADC; + adc_int *= ds; + adc_int += Phase2KinkADC; + adc_int = ((adc_int + 0.5 * ds) * ElectronPerADCGain); + } + adc_int += int(Phase2DigiBaseline); + } + dvgi.adc() = std::min(adc_int, int(std::numeric_limits::max())); + } + }); + } + }; +} // namespace calibPixel + +#endif // RecoLocalTracker_SiPixelClusterizer_plugins_alpaka_CalibPixel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h new file mode 100644 index 0000000000000..c149707e41d9a --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/ClusterChargeCut.h @@ -0,0 +1,207 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_ClusterChargeCut_h + +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersSoA.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisSoA.h" +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// #define GPU_DEBUG + +namespace pixelClustering { + + template + struct ClusterChargeCut { + template + ALPAKA_FN_ACC void operator()( + const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + SiPixelClusterThresholds + clusterThresholds, // charge cut on cluster in electrons (for layer 1 and for other layers) + const uint32_t numElements) const { + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + constexpr int32_t maxNumClustersPerModules = TrackerTraits::maxNumClustersPerModules; + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + if (blockIdx >= endModule) + return; + + auto& charge = alpaka::declareSharedVar(acc); + auto& ok = alpaka::declareSharedVar(acc); + auto& newclusId = alpaka::declareSharedVar(acc); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + + ALPAKA_ASSERT_OFFLOAD(nMaxModules < maxNumModules); + ALPAKA_ASSERT_OFFLOAD(startBPIX2 < nMaxModules); + + uint32_t nclus = clus_view[thisModuleId].clusInModule(); + if (nclus == 0) + return; + + if (cms::alpakatools::once_per_block(acc) && nclus > maxNumClustersPerModules) + printf("Warning too many clusters in module %d in block %d: %d > %d\n", + thisModuleId, + module, + nclus, + maxNumClustersPerModules); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + + if (nclus > maxNumClustersPerModules) { + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + // remove excess FIXME find a way to cut charge first.... + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (digi_view[i].clus() >= maxNumClustersPerModules) { + digi_view[i].moduleId() = invalidModuleId; + digi_view[i].clus() = invalidModuleId; + } + } + nclus = maxNumClustersPerModules; + } + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("start cluster charge cut for module %d in block %d\n", thisModuleId, module); +#endif + + ALPAKA_ASSERT_OFFLOAD(nclus <= maxNumClustersPerModules); + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { charge[i] = 0; }); + alpaka::syncBlockThreads(acc); + + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + alpaka::atomicAdd(acc, + &charge[digi_view[i].clus()], + static_cast(digi_view[i].adc()), + alpaka::hierarchy::Threads{}); + } + alpaka::syncBlockThreads(acc); + + auto chargeCut = clusterThresholds.getThresholdForLayerOnCondition(thisModuleId < startBPIX2); + bool allGood = true; + + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { + newclusId[i] = ok[i] = (charge[i] > chargeCut) ? 1 : 0; + if (ok[i] == 0) + allGood = allGood && false; + + // #ifdef GPU_DEBUG + // printf("module %d -> chargeCut = %d; cluster %d; charge = %d; ok = %s\n",thisModuleId, chargeCut,i,charge[i],ok[i] > 0 ? " -> good" : "-> cut"); + // #endif + }); + alpaka::syncBlockThreads(acc); + + // if all clusters above threshold do nothing + // if (allGood) + // continue; + + // renumber + auto& ws = alpaka::declareSharedVar(acc); + constexpr uint32_t maxThreads = 1024; + auto minClust = std::min(nclus, maxThreads); + + cms::alpakatools::blockPrefixScan(acc, newclusId, minClust, ws); + + if constexpr (maxNumClustersPerModules > maxThreads) //only if needed + { + for (uint32_t offset = maxThreads; offset < nclus; offset += maxThreads) { + cms::alpakatools::blockPrefixScan(acc, newclusId + offset, nclus - offset, ws); + + cms::alpakatools::for_each_element_in_block_strided(acc, nclus - offset, [&](uint32_t i) { + uint32_t prevBlockEnd = ((i + offset / maxThreads) * maxThreads) - 1; + newclusId[i] += newclusId[prevBlockEnd]; + }); + alpaka::syncBlockThreads(acc); + } + } + + ALPAKA_ASSERT_OFFLOAD(nclus >= newclusId[nclus - 1]); + + if (nclus == newclusId[nclus - 1]) + return; + + clus_view[thisModuleId].clusInModule() = newclusId[nclus - 1]; + alpaka::syncBlockThreads(acc); + +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("module %d -> chargeCut = %d; nclus (pre cut) = %d; nclus (after cut) = %d\n", + thisModuleId, + chargeCut, + nclus, + clus_view[thisModuleId].clusInModule()); +#endif + // mark bad cluster again + cms::alpakatools::for_each_element_in_block_strided(acc, nclus, [&](uint32_t i) { + if (0 == ok[i]) + newclusId[i] = invalidModuleId + 1; + }); + + alpaka::syncBlockThreads(acc); + + // reassign id + firstElementIdx = firstElementIdxNoStride; + endElementIdx = endElementIdxNoStride; + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + if (digi_view[i].moduleId() == invalidModuleId) + continue; // not valid + if (digi_view[i].moduleId() != thisModuleId) + break; // end of module + if (0 == ok[digi_view[i].clus()]) + digi_view[i].moduleId() = digi_view[i].clus() = invalidModuleId; + else + digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // digi_view[i].clus() = newclusId[digi_view[i].clus()] - 1; + // if (digi_view[i].clus() == invalidModuleId) + // digi_view[i].moduleId() = invalidModuleId; + } + + alpaka::syncBlockThreads(acc); + + //done + } + } + }; + +} // namespace pixelClustering + +#endif // diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h new file mode 100644 index 0000000000000..616ccbd3eb8c7 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/PixelClustering.h @@ -0,0 +1,454 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h +#define RecoLocalTracker_SiPixelClusterizer_alpaka_PixelClustering_h + +#include +#include +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + namespace pixelClustering { + +#ifdef GPU_DEBUG + template >> + ALPAKA_STATIC_ACC_MEM_GLOBAL uint32_t gMaxHit = 0; +#endif + + namespace pixelStatus { + // Phase-1 pixel modules + constexpr uint32_t pixelSizeX = pixelTopology::Phase1::numRowsInModule; + constexpr uint32_t pixelSizeY = pixelTopology::Phase1::numColsInModule; + + // Use 0x00, 0x01, 0x03 so each can be OR'ed on top of the previous ones + enum Status : uint32_t { kEmpty = 0x00, kFound = 0x01, kDuplicate = 0x03 }; + + constexpr uint32_t bits = 2; + constexpr uint32_t mask = (0x01 << bits) - 1; + constexpr uint32_t valuesPerWord = sizeof(uint32_t) * 8 / bits; + constexpr uint32_t size = pixelSizeX * pixelSizeY / valuesPerWord; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getIndex(uint16_t x, uint16_t y) { + return (pixelSizeX * y + x) / valuesPerWord; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr uint32_t getShift(uint16_t x, uint16_t y) { + return (x % valuesPerWord) * 2; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr Status getStatus(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + return Status{(status[index] >> shift) & mask}; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr bool isDuplicate(uint32_t const* __restrict__ status, + uint16_t x, + uint16_t y) { + return getStatus(status, x, y) == kDuplicate; + } + + /* FIXME + * In the more general case (e.g. a multithreaded CPU backend) there is a potential race condition + * between the read of status[index] at line NNN and the atomicCas at line NNN. + * We should investigate: + * - if `status` should be read through a `volatile` pointer (CUDA/ROCm) + * - if `status` should be read with an atomic load (CPU) + */ + template >> + ALPAKA_FN_ACC ALPAKA_FN_INLINE constexpr void promote(TAcc const& acc, + uint32_t* __restrict__ status, + const uint16_t x, + const uint16_t y) { + uint32_t index = getIndex(x, y); + uint32_t shift = getShift(x, y); + uint32_t old_word = status[index]; + uint32_t expected = old_word; + do { + expected = old_word; + Status old_status{(old_word >> shift) & mask}; + if (kDuplicate == old_status) { + // nothing to do + return; + } + Status new_status = (kEmpty == old_status) ? kFound : kDuplicate; + uint32_t new_word = old_word | (static_cast(new_status) << shift); + old_word = alpaka::atomicCas(acc, &status[index], expected, new_word, alpaka::hierarchy::Blocks{}); + } while (expected != old_word); + } + + } // namespace pixelStatus + + template + struct CountModules { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + [[maybe_unused]] constexpr int nMaxModules = TrackerTraits::numberOfModules; + +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_grid(acc)) { + printf("Starting to count modules to set module starts:"); + } +#endif + cms::alpakatools::for_each_element_in_grid_strided(acc, numElements, [&](uint32_t i) { + digi_view[i].clus() = i; + if (::pixelClustering::invalidModuleId != digi_view[i].moduleId()) { + int j = i - 1; + while (j >= 0 and digi_view[j].moduleId() == ::pixelClustering::invalidModuleId) + --j; + if (j < 0 or digi_view[j].moduleId() != digi_view[i].moduleId()) { + // boundary... + auto loc = alpaka::atomicInc( + acc, clus_view.moduleStart(), std::decay_t(nMaxModules), alpaka::hierarchy::Blocks{}); +#ifdef GPU_DEBUG + printf("> New module (no. %d) found at digi %d \n", loc, i); +#endif + clus_view[loc + 1].moduleStart() = i; + } + } + }); + } + }; + + template + struct FindClus { + template + ALPAKA_FN_ACC void operator()(const TAcc& acc, + SiPixelDigisSoAView digi_view, + SiPixelClustersSoAView clus_view, + const unsigned int numElements) const { + constexpr bool isPhase2 = std::is_base_of::value; + constexpr const uint32_t pixelStatusSize = isPhase2 ? 1 : pixelStatus::size; + + // packed words array used to store the pixelStatus of each pixel + auto& status = alpaka::declareSharedVar(acc); + + // find the index of the first pixel not belonging to this module (or invalid) + auto& msize = alpaka::declareSharedVar(acc); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + if (blockIdx >= clus_view[0].moduleStart()) + return; + + auto firstModule = blockIdx; + auto endModule = clus_view[0].moduleStart(); + + const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + + for (auto module = firstModule; module < endModule; module += gridDimension) { + auto firstPixel = clus_view[1 + module].moduleStart(); + auto thisModuleId = digi_view[firstPixel].moduleId(); + ALPAKA_ASSERT_OFFLOAD(thisModuleId < TrackerTraits::numberOfModules); +#ifdef GPU_DEBUG + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("start clusterizer for module %d in block %d\n", thisModuleId, module); +#endif + + msize = numElements; + alpaka::syncBlockThreads(acc); + + // Stride = block size. + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + + // Get thread / CPU element indices in block. + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, firstPixel); + uint32_t firstElementIdx = firstElementIdxNoStride; + uint32_t endElementIdx = endElementIdxNoStride; + + // skip threads not associated to an existing pixel + for (uint32_t i = firstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, firstElementIdx, endElementIdx, blockDimension, numElements)) + break; + auto id = digi_view[i].moduleId(); + if (id == ::pixelClustering::invalidModuleId) // skip invalid pixels + continue; + if (id != thisModuleId) { // find the first pixel in a different module + alpaka::atomicMin(acc, &msize, i, alpaka::hierarchy::Threads{}); + break; + } + } + //init hist (ymax=416 < 512 : 9bits) + constexpr uint32_t maxPixInModule = TrackerTraits::maxPixInModule; + constexpr auto nbins = TrackerTraits::clusterBinning; + constexpr auto nbits = TrackerTraits::clusterBits; + using Hist = cms::alpakatools::HistoContainer; + auto& hist = alpaka::declareSharedVar(acc); + auto& ws = alpaka::declareSharedVar(acc); + cms::alpakatools::for_each_element_in_block_strided( + acc, Hist::totbins(), [&](uint32_t j) { hist.off[j] = 0; }); + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD((msize == numElements) or + ((msize < numElements) and (digi_view[msize].moduleId() != thisModuleId))); + // limit to maxPixInModule (FIXME if recurrent (and not limited to simulation with low threshold) one will need to implement something cleverer) + if (cms::alpakatools::once_per_grid(acc)) { + if (msize - firstPixel > maxPixInModule) { + printf("too many pixels in module %d: %d > %d\n", thisModuleId, msize - firstPixel, maxPixInModule); + msize = maxPixInModule + firstPixel; + } + } + alpaka::syncBlockThreads(acc); + ALPAKA_ASSERT_OFFLOAD(msize - firstPixel <= maxPixInModule); + +#ifdef GPU_DEBUG + auto& totGood = alpaka::declareSharedVar(acc); + totGood = 0; + alpaka::syncBlockThreads(acc); +#endif + // remove duplicate pixels + if constexpr (not isPhase2) { //FIXME remove THIS + if (msize > 1) { + cms::alpakatools::for_each_element_in_block_strided( + acc, pixelStatus::size, [&](uint32_t i) { status[i] = 0; }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + pixelStatus::promote(acc, status, digi_view[i].xx(), digi_view[i].yy()); + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, msize - 1, firstElementIdx, [&](uint32_t i) { + // skip invalid pixels + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) + return; + if (pixelStatus::isDuplicate(status, digi_view[i].xx(), digi_view[i].yy())) { + digi_view[i].moduleId() = ::pixelClustering::invalidModuleId; + digi_view[i].rawIdArr() = 0; + } + }); + alpaka::syncBlockThreads(acc); + } + } + // fill histo + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.count(acc, digi_view[i].yy()); +#ifdef GPU_DEBUG + alpaka::atomicAdd(acc, &totGood, 1u, alpaka::hierarchy::Blocks{}); +#endif + } + }); + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block(acc, 32u, [&](uint32_t i) { + ws[i] = 0; // used by prefix scan... + }); + alpaka::syncBlockThreads(acc); + hist.finalize(acc, ws); + alpaka::syncBlockThreads(acc); +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(hist.size() == totGood); + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("histo size %d\n", hist.size()); +#endif + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + hist.fill(acc, digi_view[i].yy(), i - firstPixel); + } + }); + // Assume that we can cover the whole module with up to 16 blockDimension-wide iterations + // This maxiter value was tuned for GPU, with 256 or 512 threads per block. + // Hence, also works for CPU case, with 256 or 512 elements per thread. + // Real constrainst is maxiter = hist.size() / blockDimension, + // with blockDimension = threadPerBlock * elementsPerThread. + // Hence, maxiter can be tuned accordingly to the workdiv. + constexpr unsigned int maxiter = 16; + ALPAKA_ASSERT_OFFLOAD((hist.size() / blockDimension) <= maxiter); + + // NB: can be tuned. + constexpr uint32_t threadDimension = cms::alpakatools::requires_single_thread_per_block_v ? 1 : 256; + +#ifndef NDEBUG + [[maybe_unused]] const uint32_t runTimeThreadDimension( + alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(runTimeThreadDimension <= threadDimension); +#endif + + // nearest neighbour + // allocate space for duplicate pixels: a pixel can appear more than once with different charge in the same event + constexpr int maxNeighbours = 10; + uint16_t nn[maxiter][threadDimension][maxNeighbours]; + uint8_t nnn[maxiter][threadDimension]; // number of nn + for (uint32_t elementIdx = 0; elementIdx < threadDimension; ++elementIdx) { + for (uint32_t k = 0; k < maxiter; ++k) { + nnn[k][elementIdx] = 0; + } + } + + alpaka::syncBlockThreads(acc); // for hit filling! + +#ifdef GPU_DEBUG + // look for anomalous high occupancy + auto& n40 = alpaka::declareSharedVar(acc); + auto& n60 = alpaka::declareSharedVar(acc); + n40 = n60 = 0; + alpaka::syncBlockThreads(acc); + cms::alpakatools::for_each_element_in_block_strided(acc, Hist::nbins(), [&](uint32_t j) { + if (hist.size(j) > 60) + alpaka::atomicAdd(acc, &n60, 1u, alpaka::hierarchy::Blocks{}); + if (hist.size(j) > 40) + alpaka::atomicAdd(acc, &n40, 1u, alpaka::hierarchy::Blocks{}); + }); + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + if (n60 > 0) + printf("columns with more than 60 px %d in %d\n", n60, thisModuleId); + else if (n40 > 0) + printf("columns with more than 40 px %d in %d\n", n40, thisModuleId); + } + alpaka::syncBlockThreads(acc); +#endif + // fill NN + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + const uint32_t jEquivalentClass = j % threadDimension; + k = j / blockDimension; + ALPAKA_ASSERT_OFFLOAD(k < maxiter); + auto p = hist.begin() + j; + auto i = *p + firstPixel; + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() != ::pixelClustering::invalidModuleId); + ALPAKA_ASSERT_OFFLOAD(digi_view[i].moduleId() == thisModuleId); // same module + int be = Hist::bin(digi_view[i].yy() + 1); + auto e = hist.end(be); + ++p; + ALPAKA_ASSERT_OFFLOAD(0 == nnn[k][jEquivalentClass]); + for (; p < e; ++p) { + auto m = (*p) + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) >= 0); + ALPAKA_ASSERT_OFFLOAD(int(digi_view[m].yy()) - int(digi_view[i].yy()) <= 1); + if (std::abs(int(digi_view[m].xx()) - int(digi_view[i].xx())) <= 1) { + auto l = nnn[k][jEquivalentClass]++; + ALPAKA_ASSERT_OFFLOAD(l < maxNeighbours); + nn[k][jEquivalentClass][l] = *p; + } + } + }); + // for each pixel, look at all the pixels until the end of the module; + // when two valid pixels within +/- 1 in x or y are found, set their id to the minimum; + // after the loop, all the pixel in each cluster should have the id equeal to the lowest + // pixel in the cluster ( clus[i] == i ). + bool more = true; + int nloops = 0; + while (alpaka::syncBlockThreadsPredicate(acc, more)) { + if (1 == nloops % 2) { + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + auto p = hist.begin() + j; + auto i = *p + firstPixel; + auto m = digi_view[i].clus(); + while (m != digi_view[m].clus()) + m = digi_view[m].clus(); + digi_view[i].clus() = m; + }); + } else { + more = false; + uint32_t k = 0u; + cms::alpakatools::for_each_element_in_block_strided(acc, hist.size(), [&](uint32_t j) { + k = j / blockDimension; + const uint32_t jEquivalentClass = j % threadDimension; + auto p = hist.begin() + j; + auto i = *p + firstPixel; + for (int kk = 0; kk < nnn[k][jEquivalentClass]; ++kk) { + auto l = nn[k][jEquivalentClass][kk]; + auto m = l + firstPixel; + ALPAKA_ASSERT_OFFLOAD(m != i); + auto old = + alpaka::atomicMin(acc, &digi_view[m].clus(), digi_view[i].clus(), alpaka::hierarchy::Blocks{}); + if (old != digi_view[i].clus()) { + // end the loop only if no changes were applied + more = true; + } + alpaka::atomicMin(acc, &digi_view[i].clus(), old, alpaka::hierarchy::Blocks{}); + } // nnloop + }); // pixel loop + } + ++nloops; + } // end while +#ifdef GPU_DEBUG + { + auto& n0 = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) + n0 = nloops; + alpaka::syncBlockThreads(acc); +#ifndef NDEBUG + [[maybe_unused]] auto ok = n0 == nloops; + ALPAKA_ASSERT_OFFLOAD(alpaka::syncBlockThreadsPredicate(acc, ok)); +#endif + if (thisModuleId % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf("# loops %d\n", nloops); + } +#endif + auto& foundClusters = alpaka::declareSharedVar(acc); + foundClusters = 0; + alpaka::syncBlockThreads(acc); + + // find the number of different clusters, identified by a pixels with clus[i] == i; + // mark these pixels with a negative id. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() == static_cast(i)) { + auto old = alpaka::atomicInc(acc, &foundClusters, 0xffffffff, alpaka::hierarchy::Threads{}); + digi_view[i].clus() = -(old + 1); + } + } + }); + alpaka::syncBlockThreads(acc); + + // propagate the negative id to all the pixels in the cluster. + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() != ::pixelClustering::invalidModuleId) { // skip invalid pixels + if (digi_view[i].clus() >= 0) { + // mark each pixel in a cluster with the same id as the first one + digi_view[i].clus() = digi_view[digi_view[i].clus()].clus(); + } + } + }); + alpaka::syncBlockThreads(acc); + + // adjust the cluster id to be a positive value starting from 0 + cms::alpakatools::for_each_element_in_block_strided(acc, msize, firstPixel, [&](uint32_t i) { + if (digi_view[i].moduleId() == ::pixelClustering::invalidModuleId) { // skip invalid pixels + digi_view[i].clus() = ::pixelClustering::invalidClusterId; + } else { + digi_view[i].clus() = -digi_view[i].clus() - 1; + } + }); + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + clus_view[thisModuleId].clusInModule() = foundClusters; + clus_view[module].moduleId() = thisModuleId; +#ifdef GPU_DEBUG + if (foundClusters > gMaxHit) { + gMaxHit = foundClusters; + if (foundClusters > 8) + printf("max hit %d in %d\n", foundClusters, thisModuleId); + } + // if (thisModuleId % 100 == 1) + printf("%d clusters in module %d\n", foundClusters, thisModuleId); +#endif + } + } // module loop + } + }; + } // namespace pixelClustering +} // namespace ALPAKA_ACCELERATOR_NAMESPACE +#endif // plugin_SiPixelClusterizer_alpaka_PixelClustering.h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc new file mode 100644 index 0000000000000..5d0b355d1eebc --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelPhase2DigiToCluster.cc @@ -0,0 +1,158 @@ +// C++ includes +#include +#include +#include +#include +#include + +#include "DataFormats/Common/interface/DetSetVector.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigi/interface/PixelDigi.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class SiPixelPhase2DigiToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig); + ~SiPixelPhase2DigiToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT> pixelDigiToken_; + + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + Algo Algo_; + + const bool includeErrors_; + const SiPixelClusterThresholds clusterThresholds_; + uint32_t nDigis_ = 0; + + SiPixelDigisSoACollection digis_d; + }; + + SiPixelPhase2DigiToCluster::SiPixelPhase2DigiToCluster(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + pixelDigiToken_(consumes>(iConfig.getParameter("InputDigis"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + includeErrors_(iConfig.getParameter("IncludeErrors")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("ElectronPerADCGain")), + static_cast(iConfig.getParameter("Phase2ReadoutMode")), + static_cast(iConfig.getParameter("Phase2DigiBaseline")), + static_cast(iConfig.getParameter("Phase2KinkADC"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + } + } + + void SiPixelPhase2DigiToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("IncludeErrors", true); + desc.add("clusterThreshold_layer1", + pixelClustering::clusterThresholdPhase2LayerOne); //FIXME (fix the CUDA) + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdPhase2OtherLayers); + desc.add("ElectronPerADCGain", 1500.); + desc.add("Phase2ReadoutMode", 3); + desc.add("Phase2DigiBaseline", 1000); + desc.add("Phase2KinkADC", 8); + desc.add("InputDigis", edm::InputTag("simSiPixelDigis:Pixel")); + descriptions.addWithDefaultLabel(desc); + } + + void SiPixelPhase2DigiToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + auto const& input = iEvent.get(pixelDigiToken_); + + const TrackerGeometry* geom_ = &iSetup.getData(geomToken_); + + uint32_t nDigis = 0; + + for (const auto& det : input) { + nDigis += det.size(); + } + + if (nDigis_ == 0) + return; + + SiPixelDigisHost digis_h(nDigis, iEvent.queue()); + nDigis_ = nDigis; + + nDigis = 0; + for (const auto& det : input) { + unsigned int detid = det.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom_->idToDetUnit(detIdObject); + auto const gind = genericDet->index(); + for (auto const& px : det) { + digis_h.view()[nDigis].moduleId() = uint16_t(gind); + + digis_h.view()[nDigis].xx() = uint16_t(px.row()); + digis_h.view()[nDigis].yy() = uint16_t(px.column()); + digis_h.view()[nDigis].adc() = uint16_t(px.adc()); + + digis_h.view()[nDigis].pdigi() = uint32_t(px.packedData()); + + digis_h.view()[nDigis].rawIdArr() = uint32_t(detid); + + nDigis++; + } + } + + digis_d = SiPixelDigisSoACollection(nDigis, iEvent.queue()); + alpaka::memcpy(iEvent.queue(), digis_d.buffer(), digis_h.buffer()); + + Algo_.makePhase2ClustersAsync(iEvent.queue(), clusterThresholds_, digis_d.view(), nDigis); + } + + void SiPixelPhase2DigiToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + SiPixelClustersSoACollection clusters_d{pixelTopology::Phase1::numberOfModules, iEvent.queue()}; + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, std::move(clusters_d)); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, SiPixelDigiErrorsSoACollection()); + } + return; + } + + digis_d.setNModulesDigis(Algo_.nModules(), nDigis_); + + iEvent.emplace(digiPutToken_, std::move(digis_d)); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + } + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelPhase2DigiToCluster); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc new file mode 100644 index 0000000000000..f3e13bade8e10 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToCluster.cc @@ -0,0 +1,289 @@ +#include +#include +#include +#include +#include + +#include "CalibTracker/Records/interface/SiPixelGainCalibrationForHLTSoARcd.h" +#include "CalibTracker/Records/interface/SiPixelMappingSoARecord.h" +#include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingUtilities.h" +#include "DataFormats/FEDRawData/interface/FEDNumbering.h" +#include "DataFormats/FEDRawData/interface/FEDRawData.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" +#include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +#include "SiPixelRawToClusterKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + class SiPixelRawToCluster : public stream::SynchronizingEDProducer<> { + public: + explicit SiPixelRawToCluster(const edm::ParameterSet& iConfig); + ~SiPixelRawToCluster() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + using Algo = pixelDetails::SiPixelRawToClusterKernel; + + private: + void acquire(device::Event const& iEvent, device::EventSetup const& iSetup) override; + void produce(device::Event& iEvent, device::EventSetup const& iSetup) override; + + edm::EDGetTokenT rawGetToken_; + edm::EDPutTokenT fmtErrorToken_; + device::EDPutToken digiPutToken_; + device::EDPutToken digiErrorPutToken_; + device::EDPutToken clusterPutToken_; + + edm::ESWatcher recordWatcher_; + const device::ESGetToken mapToken_; + const device::ESGetToken gainsToken_; + const edm::ESGetToken cablingMapToken_; + + std::unique_ptr cabling_; + std::vector fedIds_; + const SiPixelFedCablingMap* cablingMap_ = nullptr; + std::unique_ptr regions_; + + Algo Algo_; + PixelDataFormatter::Errors errors_; + + const bool includeErrors_; + const bool useQuality_; + uint32_t nDigis_; + const SiPixelClusterThresholds clusterThresholds_; + }; + + template + SiPixelRawToCluster::SiPixelRawToCluster(const edm::ParameterSet& iConfig) + : rawGetToken_(consumes(iConfig.getParameter("InputLabel"))), + digiPutToken_(produces()), + clusterPutToken_(produces()), + mapToken_(esConsumes()), + gainsToken_(esConsumes()), + cablingMapToken_(esConsumes( + edm::ESInputTag("", iConfig.getParameter("CablingMapLabel")))), + includeErrors_(iConfig.getParameter("IncludeErrors")), + useQuality_(iConfig.getParameter("UseQualityInfo")), + clusterThresholds_{iConfig.getParameter("clusterThreshold_layer1"), + iConfig.getParameter("clusterThreshold_otherLayers"), + static_cast(iConfig.getParameter("VCaltoElectronGain")), + static_cast(iConfig.getParameter("VCaltoElectronGain_L1")), + static_cast(iConfig.getParameter("VCaltoElectronOffset")), + static_cast(iConfig.getParameter("VCaltoElectronOffset_L1"))} { + if (includeErrors_) { + digiErrorPutToken_ = produces(); + fmtErrorToken_ = produces(); + } + + // regions + if (!iConfig.getParameter("Regions").getParameterNames().empty()) { + regions_ = std::make_unique(iConfig, consumesCollector()); + } + } + + template + void SiPixelRawToCluster::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("IncludeErrors", true); + desc.add("UseQualityInfo", false); + // Note: this parameter is obsolete: it is ignored and will have no effect. + // It is kept to avoid breaking older configurations, and will not be printed in the generated cfi.py file. + desc.addOptionalNode(edm::ParameterDescription("MaxFEDWords", 0, true), false) + ->setComment("This parameter is obsolete and will be ignored."); + desc.add("clusterThreshold_layer1", pixelClustering::clusterThresholdLayerOne); + desc.add("clusterThreshold_otherLayers", pixelClustering::clusterThresholdOtherLayers); + desc.add("VCaltoElectronGain", 47.f); + desc.add("VCaltoElectronGain_L1", 50.f); + desc.add("VCaltoElectronOffset", -60.f); + desc.add("VCaltoElectronOffset_L1", -670.f); + + desc.add("InputLabel", edm::InputTag("rawDataCollector")); + { + edm::ParameterSetDescription psd0; + psd0.addOptional>("inputs"); + psd0.addOptional>("deltaPhi"); + psd0.addOptional>("maxZ"); + psd0.addOptional("beamSpot"); + desc.add("Regions", psd0) + ->setComment("## Empty Regions PSet means complete unpacking"); + } + desc.add("CablingMapLabel", "")->setComment("CablingMap label"); //Tav + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRawToCluster::acquire(device::Event const& iEvent, device::EventSetup const& iSetup) { + [[maybe_unused]] auto const& hMap = iSetup.getData(mapToken_); + auto const& dGains = iSetup.getData(gainsToken_); + auto gains = SiPixelGainCalibrationForHLTDevice(1, iEvent.queue()); + auto modulesToUnpackRegional = + cms::alpakatools::make_device_buffer(iEvent.queue(), ::pixelgpudetails::MAX_SIZE); + const unsigned char* modulesToUnpack; + // initialize cabling map or update if necessary + if (recordWatcher_.check(iSetup)) { + // cabling map, which maps online address (fed->link->ROC->local pixel) to offline (DetId->global pixel) + cablingMap_ = &iSetup.getData(cablingMapToken_); + fedIds_ = cablingMap_->fedIds(); + cabling_ = cablingMap_->cablingTree(); + LogDebug("map version:") << cablingMap_->version(); + } + if (regions_) { + regions_->run(iEvent, iSetup); + LogDebug("SiPixelRawToCluster") << "region2unpack #feds: " << regions_->nFEDs(); + LogDebug("SiPixelRawToCluster") << "region2unpack #modules (BPIX,EPIX,total): " << regions_->nBarrelModules() + << " " << regions_->nForwardModules() << " " << regions_->nModules(); + + modulesToUnpackRegional = SiPixelMappingUtilities::getModToUnpRegionalAsync( + *(regions_->modulesToUnpack()), cabling_.get(), fedIds_, iEvent.queue()); + modulesToUnpack = modulesToUnpackRegional.data(); + } else { + modulesToUnpack = hMap->modToUnpDefault(); + } + + const auto& buffers = iEvent.get(rawGetToken_); + + errors_.clear(); + + // GPU specific: Data extraction for RawToDigi GPU + unsigned int wordCounter = 0; + unsigned int fedCounter = 0; + bool errorsInEvent = false; + std::vector index(fedIds_.size(), 0); + std::vector start(fedIds_.size(), nullptr); + std::vector words(fedIds_.size(), 0); + // In CPU algorithm this loop is part of PixelDataFormatter::interpretRawData() + ErrorChecker errorcheck; + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + const int fedId = fedIds_[i]; + if (regions_ && !regions_->mayUnpackFED(fedId)) + continue; + + // for GPU + // first 150 index stores the fedId and next 150 will store the + // start index of word in that fed + assert(fedId >= FEDNumbering::MINSiPixeluTCAFEDID); + fedCounter++; + + // get event data for this fed + const FEDRawData& rawData = buffers.FEDData(fedId); + + // GPU specific + int nWords = rawData.size() / sizeof(cms_uint64_t); + if (nWords == 0) { + continue; + } + // check CRC bit + const cms_uint64_t* trailer = reinterpret_cast(rawData.data()) + (nWords - 1); + if (not errorcheck.checkCRC(errorsInEvent, fedId, trailer, errors_)) { + continue; + } + // check headers + const cms_uint64_t* header = reinterpret_cast(rawData.data()); + header--; + bool moreHeaders = true; + while (moreHeaders) { + header++; + bool headerStatus = errorcheck.checkHeader(errorsInEvent, fedId, header, errors_); + moreHeaders = headerStatus; + } + + // check trailers + bool moreTrailers = true; + trailer++; + while (moreTrailers) { + trailer--; + bool trailerStatus = errorcheck.checkTrailer(errorsInEvent, fedId, nWords, trailer, errors_); + moreTrailers = trailerStatus; + } + + const cms_uint32_t* bw = (const cms_uint32_t*)(header + 1); + const cms_uint32_t* ew = (const cms_uint32_t*)(trailer); + + assert(0 == (ew - bw) % 2); + index[i] = wordCounter; + start[i] = bw; + words[i] = (ew - bw); + wordCounter += (ew - bw); + + } // end of for loop + nDigis_ = wordCounter; + if (nDigis_ == 0) + return; + + // copy the FED data to a single cpu buffer + pixelDetails::WordFedAppender wordFedAppender(nDigis_); + for (uint32_t i = 0; i < fedIds_.size(); ++i) { + wordFedAppender.initializeWordFed(fedIds_[i], index[i], start[i], words[i]); + } + Algo_.makePhase1ClustersAsync(iEvent.queue(), + clusterThresholds_, + hMap.const_view(), + modulesToUnpack, + dGains.const_view(), + wordFedAppender, + wordCounter, + fedCounter, + useQuality_, + includeErrors_, + edm::MessageDrop::instance()->debugEnabled); + } + + template + void SiPixelRawToCluster::produce(device::Event& iEvent, device::EventSetup const& iSetup) { + if (nDigis_ == 0) { + // Cannot use the default constructor here, as it would not allocate memory. + // In the case of no digis, clusters_d are not being instantiated, but are + // still used downstream to initialize TrackingRecHitSoADevice. If there + // are no valid pointers to clusters' Collection columns, instantiation + // of TrackingRecHits fail. Example: workflow 11604.0 + + iEvent.emplace(digiPutToken_, nDigis_, iEvent.queue()); + iEvent.emplace(clusterPutToken_, pixelTopology::Phase1::numberOfModules, iEvent.queue()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_); + iEvent.emplace(fmtErrorToken_); + } + return; + } + + iEvent.emplace(digiPutToken_, Algo_.getDigis()); + iEvent.emplace(clusterPutToken_, Algo_.getClusters()); + if (includeErrors_) { + iEvent.emplace(digiErrorPutToken_, Algo_.getErrors()); + iEvent.emplace(fmtErrorToken_, std::move(errors_)); + } + } + + using SiPixelRawToClusterPhase1 = SiPixelRawToCluster; + using SiPixelRawToClusterHIonPhase1 = SiPixelRawToCluster; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +// define as framework plugin +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRawToClusterHIonPhase1); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc new file mode 100644 index 0000000000000..3e7caf8b2b3a4 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.dev.cc @@ -0,0 +1,799 @@ +// C++ includes +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// CMSSW includes +#include "HeterogeneousCore/AlpakaInterface/interface/prefixScan.h" +#include "HeterogeneousCore/AlpakaInterface/interface/SimpleVector.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelMappingLayout.h" +#include "DataFormats/SiPixelDigi/interface/SiPixelDigiConstants.h" + +// local includes +#include "CalibPixel.h" +#include "ClusterChargeCut.h" +#include "PixelClustering.h" +#include "SiPixelRawToClusterKernel.h" + +// #define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + //////////////////// + + ALPAKA_FN_ACC uint32_t getLink(uint32_t ww) { + return ((ww >> ::sipixelconstants::LINK_shift) & ::sipixelconstants::LINK_mask); + } + + ALPAKA_FN_ACC uint32_t getRoc(uint32_t ww) { + return ((ww >> ::sipixelconstants::ROC_shift) & ::sipixelconstants::ROC_mask); + } + + ALPAKA_FN_ACC uint32_t getADC(uint32_t ww) { + return ((ww >> ::sipixelconstants::ADC_shift) & ::sipixelconstants::ADC_mask); + } + + ALPAKA_FN_ACC bool isBarrel(uint32_t rawId) { return (1 == ((rawId >> 25) & 0x7)); } + + ALPAKA_FN_ACC ::pixelDetails::DetIdGPU getRawId(const SiPixelMappingSoAConstView &cablingMap, + uint8_t fed, + uint32_t link, + uint32_t roc) { + using namespace ::pixelDetails; + uint32_t index = fed * MAX_LINK * MAX_ROC + (link - 1) * MAX_ROC + roc; + ::pixelDetails::DetIdGPU detId = { + cablingMap.rawId()[index], cablingMap.rocInDet()[index], cablingMap.moduleId()[index]}; + return detId; + } + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelDetails::global pixel + ALPAKA_FN_ACC ::pixelDetails::Pixel frameConversion( + bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, ::pixelDetails::Pixel local) { + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } // if roc + } else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = rocIdInDetUnit * ::pixelDetails::numColsInRoc; + } else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } + } + + } else { // fpix + if (side == -1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + } else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8 - rocIdInDetUnit) * ::pixelDetails::numColsInRoc - 1; + } else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2 * ::pixelDetails::numRowsInRoc - 1; + colOffset = (rocIdInDetUnit - 8) * ::pixelDetails::numColsInRoc; + } + + } // side + } + + uint32_t gRow = rowOffset + slopeRow * local.row; + uint32_t gCol = colOffset + slopeCol * local.col; + ::pixelDetails::Pixel global = {gRow, gCol}; + return global; + } + + ALPAKA_FN_ACC uint8_t conversionError(uint8_t fedId, uint8_t status, bool debug = false) { + uint8_t errorType = 0; + + switch (status) { + case 1: { + if (debug) + printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId); + errorType = 35; + break; + } + case 2: { + if (debug) + printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case 3: { + if (debug) + printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case 4: { + if (debug) + printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: + if (debug) + printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + } + + ALPAKA_FN_ACC bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) { + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; + + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + } + + ALPAKA_FN_ACC bool dcolIsValid(uint32_t dcol, uint32_t pxid) { return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); } + + ALPAKA_FN_ACC uint8_t checkROC(uint32_t errorWord, + uint8_t fedId, + uint32_t link, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint8_t errorType = (errorWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ERROR_mask; + if (errorType < 25) + return 0; + bool errorFound = false; + + switch (errorType) { + case (25): { + errorFound = true; + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + 1; + if (index > 1 && index <= cablingMap.size()) { + if (!(link == cablingMap.link()[index] && 1 == cablingMap.roc()[index])) + errorFound = false; + } + if (debug and errorFound) + printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case (26): { + if (debug) + printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case (27): { + if (debug) + printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case (28): { + if (debug) + printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case (29): { + if (debug) + printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> ::pixelDetails::OMIT_ERR_shift) & ::pixelDetails::OMIT_ERR_mask) { + if (debug) + printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case (30): { + if (debug) + printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if (StateMatch != 1 && StateMatch != 8) { + if (debug) + printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if (StateMatch == 1) + errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case (31): { + if (debug) + printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: + errorFound = false; + }; + + return errorFound ? errorType : 0; + } + + ALPAKA_FN_ACC uint32_t getErrRawID(uint8_t fedId, + uint32_t errWord, + uint32_t errorType, + const SiPixelMappingSoAConstView &cablingMap, + bool debug = false) { + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25: + case 30: + case 31: + case 36: + case 40: { + uint32_t roc = 1; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 29: { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1 * CH1 + 2 * CH2 + 3 * CH3 + 4 * CH4 + 5 * CH5; + if (BLOCK % 2 == 0) + chanNmbr = (BLOCK / 2) * 9 + localCH; + else + chanNmbr = ((BLOCK - 1) / 2) * 9 + 4 + localCH; + if ((chanNmbr < 1) || (chanNmbr > 36)) + break; // signifies unexpected result + + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + case 37: + case 38: { + uint32_t roc = (errWord >> ::pixelDetails::ROC_shift) & ::pixelDetails::ROC_mask; + uint32_t link = (errWord >> ::pixelDetails::LINK_shift) & ::pixelDetails::LINK_mask; + uint32_t rID_temp = getRawId(cablingMap, fedId, link, roc).RawId; + if (rID_temp != 9999) + rID = rID_temp; + break; + } + default: + break; + }; + + return rID; + } + + // Kernel to perform Raw to Digi conversion + struct RawToDigi_kernel { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const uint32_t wordCounter, + const uint32_t *word, + const uint8_t *fedIds, + SiPixelDigisSoAView digisView, + SiPixelDigiErrorsSoAView err, + bool useQualityInfo, + bool includeErrors, + bool debug) const { + cms::alpakatools::for_each_element_in_grid_strided(acc, wordCounter, [&](uint32_t iloop) { + auto gIndex = iloop; + auto dvgi = digisView[gIndex]; + dvgi.xx() = 0; + dvgi.yy() = 0; + dvgi.adc() = 0; + bool skipROC = false; + + if (gIndex == 0) + err[gIndex].size() = 0; + + err[gIndex].pixelErrors() = SiPixelErrorCompact{0, 0, 0, 0}; + + uint8_t fedId = fedIds[gIndex / 2]; // +1200; + + // initialize (too many coninue below) + dvgi.pdigi() = 0; + dvgi.rawIdArr() = 0; + constexpr uint16_t invalidModuleId = std::numeric_limits::max() - 1; + dvgi.moduleId() = invalidModuleId; + + uint32_t ww = word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + // 0 is an indicator of a noise/dead channel, skip these pixels during clusterization + return; + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + ::pixelDetails::DetIdGPU detId = getRawId(cablingMap, fedId, link, roc); + + uint8_t errorType = checkROC(ww, fedId, link, cablingMap, debug); + skipROC = (roc < ::pixelDetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) { + uint32_t rID = getErrRawID(fedId, ww, errorType, cablingMap, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rID, ww, errorType, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + return; + } + + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = + fedId * ::pixelDetails::MAX_LINK * ::pixelDetails::MAX_ROC + (link - 1) * ::pixelDetails::MAX_ROC + roc; + if (useQualityInfo) { + skipROC = cablingMap.badRocs()[index]; + if (skipROC) + return; + } + skipROC = modToUnp[index]; + if (skipROC) + return; + + uint32_t layer = 0; //, ladder =0; + int side = 0, panel = 0, module = 0; //disk = 0, blade = 0 + + if (barrel) { + layer = (rawId >> ::pixelDetails::layerStartBit) & ::pixelDetails::layerMask; + module = (rawId >> ::pixelDetails::moduleStartBit) & ::pixelDetails::moduleMask; + side = (module < 5) ? -1 : 1; + } else { + // endcap ids + layer = 0; + panel = (rawId >> ::pixelDetails::panelStartBit) & ::pixelDetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_; + side = (panel == 1) ? -1 : 1; + //blade = (rawId >> bladeStartBit_) & bladeMask_; + } + + // ***special case of layer to 1 be handled here + ::pixelDetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> ::pixelDetails::COL_shift) & ::pixelDetails::COL_mask; + uint32_t row = (ww >> ::pixelDetails::ROW_shift) & ::pixelDetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint8_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("BPIX1 Error status: %i\n", error); + return; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> ::pixelDetails::DCOL_shift) & ::pixelDetails::DCOL_mask; + uint32_t pxid = (ww >> ::pixelDetails::PXID_shift) & ::pixelDetails::PXID_mask; + uint32_t row = ::pixelDetails::numRowsInRoc - pxid / 2; + uint32_t col = dcol * 2 + pxid % 2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint8_t error = conversionError(fedId, 3, debug); + err[gIndex].pixelErrors() = SiPixelErrorCompact{rawId, ww, error, fedId}; + alpaka::atomicInc(acc, &err.size(), 0xffffffff, alpaka::hierarchy::Threads{}); + if (debug) + printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + return; + } + } + + ::pixelDetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + dvgi.xx() = globalPix.row; // origin shifting by 1 0-159 + dvgi.yy() = globalPix.col; // origin shifting by 1 0-415 + dvgi.adc() = getADC(ww); + dvgi.pdigi() = ::pixelDetails::pack(globalPix.row, globalPix.col, dvgi.adc()); + dvgi.moduleId() = detId.moduleId; + dvgi.rawIdArr() = rawId; + }); // end of stride on grid + + } // end of Raw to Digi kernel operator() + }; // end of Raw to Digi struct + + template + struct FillHitsModuleStart { + template + ALPAKA_FN_ACC void operator()(const TAcc &acc, SiPixelClustersSoAView clus_view) const { + ALPAKA_ASSERT_OFFLOAD(TrackerTraits::numberOfModules < 2048); // easy to extend at least till 32*1024 + + constexpr int nMaxModules = TrackerTraits::numberOfModules; + constexpr uint32_t maxHitsInModule = TrackerTraits::maxHitsInModule; + +#ifndef NDEBUG + [[maybe_unused]] const uint32_t blockIdxLocal(alpaka::getIdx(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(0 == blockIdxLocal); + [[maybe_unused]] const uint32_t gridDimension(alpaka::getWorkDiv(acc)[0u]); + ALPAKA_ASSERT_OFFLOAD(1 == gridDimension); +#endif + + // limit to maxHitsInModule; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules, [&](uint32_t i) { + clus_view[i + 1].clusModuleStart() = std::min(maxHitsInModule, clus_view[i].clusInModule()); + }); + + constexpr bool isPhase2 = std::is_base_of::value; + constexpr auto leftModules = isPhase2 ? 1024 : nMaxModules - 1024; + + auto &&ws = alpaka::declareSharedVar(acc); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1, clus_view.clusModuleStart() + 1, 1024, ws); + + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 1024 + 1, clus_view.clusModuleStart() + 1024 + 1, leftModules, ws); + + if constexpr (isPhase2) { + cms::alpakatools::blockPrefixScan( + acc, clus_view.clusModuleStart() + 2048 + 1, clus_view.clusModuleStart() + 2048 + 1, 1024, ws); + cms::alpakatools::blockPrefixScan(acc, + clus_view.clusModuleStart() + 3072 + 1, + clus_view.clusModuleStart() + 3072 + 1, + nMaxModules - 3072, + ws); + } + + constexpr auto lastModule = isPhase2 ? 2049u : nMaxModules + 1; + cms::alpakatools::for_each_element_in_block_strided(acc, lastModule, 1025u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[1024].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + if constexpr (isPhase2) { + cms::alpakatools::for_each_element_in_block_strided(acc, 3073u, 2049u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[2048].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, 3073u, [&](uint32_t i) { + clus_view[i].clusModuleStart() += clus_view[3072].clusModuleStart(); + }); + alpaka::syncBlockThreads(acc); + } +#ifdef GPU_DEBUG + ALPAKA_ASSERT_OFFLOAD(0 == clus_view[0].moduleStart()); + auto c0 = std::min(maxHitsInModule, clus_view[1].clusModuleStart()); + ALPAKA_ASSERT_OFFLOAD(c0 == clus_view[1].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1024].moduleStart() >= clus_view[1023].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[1025].moduleStart() >= clus_view[1024].moduleStart()); + ALPAKA_ASSERT_OFFLOAD(clus_view[nMaxModules].moduleStart() >= clus_view[1025].moduleStart()); + + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (0 != i) + ALPAKA_ASSERT_OFFLOAD(clus_view[i].moduleStart() >= clus_view[i - i].moduleStart()); + // Check BPX2 (1), FP1 (4) + constexpr auto bpix2 = TrackerTraits::layerStart[1]; + constexpr auto fpix1 = TrackerTraits::layerStart[4]; + if (i == bpix2 || i == fpix1) + printf("moduleStart %d %d\n", i, clus_view[i].moduleStart()); + }); +#endif + // avoid overflow + constexpr auto MAX_HITS = TrackerTraits::maxNumberOfHits; + cms::alpakatools::for_each_element_in_block_strided(acc, nMaxModules + 1, [&](uint32_t i) { + if (clus_view[i].clusModuleStart() > MAX_HITS) + clus_view[i].clusModuleStart() = MAX_HITS; + }); + + } // end of FillHitsModuleStart kernel operator() + }; // end of FillHitsModuleStart struct + + // Interface to outside + template + void SiPixelRawToClusterKernel::makePhase1ClustersAsync( + Queue &queue, + const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView &cablingMap, + const unsigned char *modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView &gains, + const WordFedAppender &wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug) { + nDigis = wordCounter; + +#ifdef GPU_DEBUG + std::cout << "decoding " << wordCounter << " digis." << std::endl; +#endif + constexpr int numberOfModules = TrackerTraits::numberOfModules; + digis_d = SiPixelDigisSoACollection(wordCounter, queue); + if (includeErrors) { + digiErrors_d = SiPixelDigiErrorsSoACollection(wordCounter, queue); + } + clusters_d = SiPixelClustersSoACollection(numberOfModules, queue); + // protect in case of empty event.... + if (wordCounter) { + const int threadsPerBlockOrElementsPerThread = + cms::alpakatools::requires_single_thread_per_block_v ? 32 : 512; + // fill it all + const uint32_t blocks = cms::alpakatools::divide_up_by(wordCounter, threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + assert(0 == wordCounter % 2); + // wordCounter is the total no of words in each event to be trasfered on device + auto word_d = cms::alpakatools::make_device_buffer(queue, wordCounter); + // NB: IMPORTANT: fedId_d: In legacy, wordCounter elements are allocated. + // However, only the first half of elements end up eventually used: + // hence, here, only wordCounter/2 elements are allocated. + auto fedId_d = cms::alpakatools::make_device_buffer(queue, wordCounter / 2); + alpaka::memcpy(queue, word_d, wordFed.word(), wordCounter); + alpaka::memcpy(queue, fedId_d, wordFed.fedId(), wordCounter / 2); + // Launch rawToDigi kernel + alpaka::exec(queue, + workDiv, + RawToDigi_kernel{}, + cablingMap, + modToUnp, + wordCounter, + word_d.data(), + fedId_d.data(), + digis_d->view(), + digiErrors_d->view(), + useQualityInfo, + includeErrors, + debug); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "RawToDigi_kernel was run smoothly!" << std::endl; +#endif + } + // End of Raw2Digi and passing data for clustering + + { + // clusterizer + using namespace pixelClustering; + // calibrations + using namespace calibPixel; + const int threadsPerBlockOrElementsPerThread = []() { + if constexpr (std::is_same_v) { + // NB: MPORTANT: This could be tuned to benefit from innermost loop. + return 32; + } else { + return 256; + } + }(); + const auto blocks = cms::alpakatools::divide_up_by(std::max(wordCounter, numberOfModules), + threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec( + queue, workDiv, CalibDigis{}, clusterThresholds, digis_d->view(), clusters_d->view(), gains, wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread + << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec( + queue, workDiv, CountModules{}, digis_d->view(), clusters_d->view(), wordCounter); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + constexpr auto threadsPerBlockFindClus = 512; + const auto workDivMaxNumModules = + cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + // NB: With present FindClus() / chargeCut() algorithm, + // threadPerBlock (GPU) or elementsPerThread (CPU) = 256 show optimal performance. + // Though, it does not have to be the same number for CPU/GPU cases. + +#ifdef GPU_DEBUG + std::cout << " FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + + alpaka::exec( + queue, workDivMaxNumModules, FindClus{}, digis_d->view(), clusters_d->view(), wordCounter); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::ClusterChargeCut{}, + digis_d->view(), + clusters_d->view(), + clusterThresholds, + wordCounter); + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u); + constexpr int startBPIX2 = TrackerTraits::layerStart[1]; + + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelClusterizerAlpaka results:" << std::endl + << " > no. of digis: " << nDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif + + } // end clusterizer scope + } + + template + void SiPixelRawToClusterKernel::makePhase2ClustersAsync( + Queue &queue, + const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView &digis_view, + const uint32_t numDigis) { + using namespace pixelClustering; + using pixelTopology::Phase2; + nDigis = numDigis; + constexpr int numberOfModules = pixelTopology::Phase2::numberOfModules; + clusters_d = SiPixelClustersSoACollection(numberOfModules, queue); + const auto threadsPerBlockOrElementsPerThread = 512; + const auto blocks = + cms::alpakatools::divide_up_by(std::max(numDigis, numberOfModules), threadsPerBlockOrElementsPerThread); + const auto workDiv = cms::alpakatools::make_workdiv(blocks, threadsPerBlockOrElementsPerThread); + + alpaka::exec( + queue, workDiv, calibPixel::CalibDigisPhase2{}, clusterThresholds, digis_view, clusters_d->view(), numDigis); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "CountModules kernel launch with " << blocks << " blocks of " << threadsPerBlockOrElementsPerThread + << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec( + queue, workDiv, CountModules{}, digis_view, clusters_d->view(), numDigis); + + auto moduleStartFirstElement = + cms::alpakatools::make_device_view(alpaka::getDev(queue), clusters_d->view().moduleStart(), 1u); + alpaka::memcpy(queue, nModules_Clusters_h, moduleStartFirstElement); + + /// should be larger than maxPixInModule/16 aka (maxPixInModule/maxiter in the kernel) + + const auto threadsPerBlockFindClus = ((TrackerTraits::maxPixInModule / 16 + 128 - 1) / 128) * 128; + const auto workDivMaxNumModules = cms::alpakatools::make_workdiv(numberOfModules, threadsPerBlockFindClus); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "FindClus kernel launch with " << numberOfModules << " blocks of " << threadsPerBlockFindClus + << " threadsPerBlockOrElementsPerThread\n"; +#endif + alpaka::exec( + queue, workDivMaxNumModules, FindClus{}, digis_view, clusters_d->view(), numDigis); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // apply charge cut + alpaka::exec(queue, + workDivMaxNumModules, + ::pixelClustering::ClusterChargeCut{}, + digis_view, + clusters_d->view(), + clusterThresholds, + numDigis); + + // count the module start indices already here (instead of + // rechits) so that the number of clusters/hits can be made + // available in the rechit producer without additional points of + // synchronization/ExternalWork + + // MUST be ONE block + const auto workDivOneBlock = cms::alpakatools::make_workdiv(1u, 1024u); + alpaka::exec(queue, workDivOneBlock, FillHitsModuleStart{}, clusters_d->view()); + + // last element holds the number of all clusters + const auto clusModuleStartLastElement = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + numberOfModules, 1u); + constexpr int startBPIX2 = pixelTopology::Phase2::layerStart[1]; + // element startBPIX2 hold the number of clusters until BPIX2 + const auto bpix2ClusterStart = cms::alpakatools::make_device_view( + alpaka::getDev(queue), clusters_d->const_view().clusModuleStart() + startBPIX2, 1u); + auto nModules_Clusters_h_1 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 1, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_1, clusModuleStartLastElement); + + auto nModules_Clusters_h_2 = cms::alpakatools::make_host_view(nModules_Clusters_h.data() + 2, 1u); + alpaka::memcpy(queue, nModules_Clusters_h_2, bpix2ClusterStart); + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "SiPixelPhase2DigiToCluster: results \n" + << " > no. of digis: " << numDigis << std::endl + << " > no. of active modules: " << nModules_Clusters_h[0] << std::endl + << " > no. of clusters: " << nModules_Clusters_h[1] << std::endl + << " > bpix2 offset: " << nModules_Clusters_h[2] << std::endl; +#endif + } // + + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + template class SiPixelRawToClusterKernel; + + } // namespace pixelDetails + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h new file mode 100644 index 0000000000000..b7b9071506652 --- /dev/null +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/alpaka/SiPixelRawToClusterKernel.h @@ -0,0 +1,199 @@ +#ifndef RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h +#define RecoLocalTracker_SiPixelClusterizer_SiPixelRawToClusterKernel_h + +#include +#include +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigiErrorsSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigiErrorsDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" + +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLTLayout.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelGainCalibrationForHLTDevice.h" +#include "CondFormats/SiPixelObjects/interface/alpaka/SiPixelMappingDevice.h" + +#include "DataFormats/SiPixelRawData/interface/SiPixelErrorCompact.h" +#include "DataFormats/SiPixelRawData/interface/SiPixelFormatterErrors.h" +#include "DataFormats/SiPixelDetId/interface/PixelChannelIdentifier.h" + +namespace pixelDetails { + + constexpr auto MAX_LINK = pixelgpudetails::MAX_LINK; + constexpr auto MAX_SIZE = pixelgpudetails::MAX_SIZE; + constexpr auto MAX_ROC = pixelgpudetails::MAX_ROC; + // Phase 1 geometry constants + constexpr uint32_t layerStartBit = 20; + constexpr uint32_t ladderStartBit = 12; + constexpr uint32_t moduleStartBit = 2; + + constexpr uint32_t panelStartBit = 10; + constexpr uint32_t diskStartBit = 18; + constexpr uint32_t bladeStartBit = 12; + + constexpr uint32_t layerMask = 0xF; + constexpr uint32_t ladderMask = 0xFF; + constexpr uint32_t moduleMask = 0x3FF; + constexpr uint32_t panelMask = 0x3; + constexpr uint32_t diskMask = 0xF; + constexpr uint32_t bladeMask = 0x3F; + + constexpr uint32_t LINK_bits = 6; + constexpr uint32_t ROC_bits = 5; + constexpr uint32_t DCOL_bits = 5; + constexpr uint32_t PXID_bits = 8; + constexpr uint32_t ADC_bits = 8; + + // special for layer 1 + constexpr uint32_t LINK_bits_l1 = 6; + constexpr uint32_t ROC_bits_l1 = 5; + constexpr uint32_t COL_bits_l1 = 6; + constexpr uint32_t ROW_bits_l1 = 7; + constexpr uint32_t OMIT_ERR_bits = 1; + + constexpr uint32_t maxROCIndex = 8; + constexpr uint32_t numRowsInRoc = 80; + constexpr uint32_t numColsInRoc = 52; + + constexpr uint32_t MAX_WORD = 2000; + + constexpr uint32_t ADC_shift = 0; + constexpr uint32_t PXID_shift = ADC_shift + ADC_bits; + constexpr uint32_t DCOL_shift = PXID_shift + PXID_bits; + constexpr uint32_t ROC_shift = DCOL_shift + DCOL_bits; + constexpr uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + constexpr uint32_t ROW_shift = ADC_shift + ADC_bits; + constexpr uint32_t COL_shift = ROW_shift + ROW_bits_l1; + constexpr uint32_t OMIT_ERR_shift = 20; + + constexpr uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + constexpr uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + constexpr uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + constexpr uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + constexpr uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + constexpr uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + constexpr uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + constexpr uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + constexpr uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t RawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr pixelchannelidentifierimpl::Packing packing() { + return PixelChannelIdentifier::thePacking; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE constexpr uint32_t pack(uint32_t row, + uint32_t col, + uint32_t adc, + uint32_t flag = 0) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + adc = std::min(adc, uint32_t(thePacking.max_adc)); + + return (row << thePacking.row_shift) | (col << thePacking.column_shift) | (adc << thePacking.adc_shift); + } + + constexpr uint32_t pixelToChannel(int row, int col) { + constexpr pixelchannelidentifierimpl::Packing thePacking = packing(); + return (row << thePacking.column_width) | col; + } + +} // namespace pixelDetails + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelDetails { + + class WordFedAppender { + public: + WordFedAppender(); + ~WordFedAppender() = default; + + WordFedAppender(uint32_t words) + : word_{cms::alpakatools::make_host_buffer(words)}, + fedId_{cms::alpakatools::make_host_buffer(words)} {}; + + void initializeWordFed(int fedId, unsigned int wordCounterGPU, const uint32_t* src, unsigned int length) { + std::memcpy(word_.data() + wordCounterGPU, src, sizeof(uint32_t) * length); + std::memset(fedId_.data() + wordCounterGPU / 2, fedId - 1200, length / 2); + } + auto word() const { return word_; } + auto fedId() const { return fedId_; } + + private: + cms::alpakatools::host_buffer word_; + cms::alpakatools::host_buffer fedId_; + }; + + template + class SiPixelRawToClusterKernel { + public: + SiPixelRawToClusterKernel() : nModules_Clusters_h{cms::alpakatools::make_host_buffer(3u)} {} + + ~SiPixelRawToClusterKernel() = default; + + SiPixelRawToClusterKernel(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel(SiPixelRawToClusterKernel&&) = delete; + SiPixelRawToClusterKernel& operator=(const SiPixelRawToClusterKernel&) = delete; + SiPixelRawToClusterKernel& operator=(SiPixelRawToClusterKernel&&) = delete; + + void makePhase1ClustersAsync(Queue& queue, + const SiPixelClusterThresholds clusterThresholds, + const SiPixelMappingSoAConstView& cablingMap, + const unsigned char* modToUnp, + const SiPixelGainCalibrationForHLTSoAConstView& gains, + const WordFedAppender& wordFed, + const uint32_t wordCounter, + const uint32_t fedCounter, + bool useQualityInfo, + bool includeErrors, + bool debug); + + void makePhase2ClustersAsync(Queue& queue, + const SiPixelClusterThresholds clusterThresholds, + SiPixelDigisSoAView& digis_view, + const uint32_t numDigis); + + SiPixelDigisSoACollection getDigis() { + digis_d->setNModulesDigis(nModules_Clusters_h[0], nDigis); + return std::move(*digis_d); + } + + SiPixelClustersSoACollection getClusters() { + clusters_d->setNClusters(nModules_Clusters_h[1], nModules_Clusters_h[2]); + return std::move(*clusters_d); + } + + SiPixelDigiErrorsSoACollection getErrors() { return std::move(*digiErrors_d); } + + auto nModules() { return nModules_Clusters_h[0]; } + + private: + uint32_t nDigis = 0; + + // Data to be put in the event + cms::alpakatools::host_buffer nModules_Clusters_h; + std::optional digis_d; + std::optional clusters_d; + std::optional digiErrors_d; + }; + + } // namespace pixelDetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // plugin_SiPixelClusterizer_alpaka_SiPixelRawToClusterKernel_h diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h index 180b356db2c88..869beb74564b8 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h @@ -8,11 +8,9 @@ #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" #include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" - -// local include(s) -#include "SiPixelClusterThresholds.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" namespace gpuCalibPixel { @@ -52,6 +50,7 @@ namespace gpuCalibPixel { int row = x[i]; int col = y[i]; + auto ret = ped->getPedAndGain(id[i], col, row, isDeadColumn, isNoisyColumn); float pedestal = ret.first; float gain = ret.second; diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h index a96cd0bcc5c15..1ff62ed1c6c57 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h @@ -8,9 +8,7 @@ #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" #include "HeterogeneousCore/CUDAUtilities/interface/cuda_assert.h" #include "HeterogeneousCore/CUDAUtilities/interface/prefixScan.h" - -// local include(s) -#include "SiPixelClusterThresholds.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" namespace gpuClustering { diff --git a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py index 4460dd6ab0240..8d78599d07d9c 100644 --- a/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py +++ b/RecoLocalTracker/SiPixelClusterizer/python/siPixelClustersPreSplitting_cff.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from Configuration.Eras.Modifier_run3_common_cff import run3_common from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # conditions used *only* by the modules running on GPU from CalibTracker.SiPixelESProducers.siPixelROCsStatusAndMappingWrapperESProducer_cfi import siPixelROCsStatusAndMappingWrapperESProducer @@ -17,6 +18,7 @@ # reconstruct the pixel digis and clusters on the gpu from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAPhase1_cfi import siPixelRawToClusterCUDAPhase1 as _siPixelRawToClusterCUDA from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterCUDAHIonPhase1_cfi import siPixelRawToClusterCUDAHIonPhase1 as _siPixelRawToClusterCUDAHIonPhase1 + siPixelClustersPreSplittingCUDA = _siPixelRawToClusterCUDA.clone() # HIon Modifiers @@ -34,7 +36,6 @@ VCaltoElectronOffset = 0, VCaltoElectronOffset_L1 = 0) - from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase1_cfi import siPixelDigisClustersFromSoAPhase1 as _siPixelDigisClustersFromSoAPhase1 from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAPhase2_cfi import siPixelDigisClustersFromSoAPhase2 as _siPixelDigisClustersFromSoAPhase2 @@ -93,3 +94,94 @@ siPixelDigisClustersPreSplitting, # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA siPixelClustersPreSplitting)) + +###################################################################### + +### Alpaka Pixel Clusters Reco + +#from CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi import siPixelCablingSoAESProducer +#from CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi import siPixelGainCalibrationForHLTSoAESProducer + +def _addProcessCalibTrackerAlpakaES(process): + process.load("CalibTracker.SiPixelESProducers.siPixelCablingSoAESProducer_cfi") + process.load("CalibTracker.SiPixelESProducers.siPixelGainCalibrationForHLTSoAESProducer_cfi") + +modifyConfigurationCalibTrackerAlpakaES_ = alpaka.makeProcessModifier(_addProcessCalibTrackerAlpakaES) + +# reconstruct the pixel digis and clusters with alpaka on the device +from RecoLocalTracker.SiPixelClusterizer.siPixelRawToClusterPhase1_cfi import siPixelRawToClusterPhase1 as _siPixelRawToClusterAlpaka +siPixelClustersPreSplittingAlpaka = _siPixelRawToClusterAlpaka.clone() + +(alpaka & run3_common).toModify(siPixelClustersPreSplittingAlpaka, + # use the pixel channel calibrations scheme for Run 3 + clusterThreshold_layer1 = 4000, + VCaltoElectronGain = 1, # all gains=1, pedestals=0 + VCaltoElectronGain_L1 = 1, + VCaltoElectronOffset = 0, + VCaltoElectronOffset_L1 = 0) + +from RecoLocalTracker.SiPixelClusterizer.siPixelPhase2DigiToCluster_cfi import siPixelPhase2DigiToCluster as _siPixelPhase2DigiToCluster + +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingAlpaka, _siPixelPhase2DigiToCluster.clone( + Phase2ReadoutMode = PixelDigitizerAlgorithmCommon.Phase2ReadoutMode.value(), # flag to decide Readout Mode : linear TDR (-1), dual slope with slope parameters (+1,+2,+3,+4 ...) with threshold subtraction + Phase2DigiBaseline = int(PixelDigitizerAlgorithmCommon.ThresholdInElectrons_Barrel.value()), # same for barrel and endcap + Phase2KinkADC = 8, + ElectronPerADCGain = PixelDigitizerAlgorithmCommon.ElectronPerAdc.value() +)) + +# reconstruct the pixel digis and clusters with alpaka on the cpu, for validation +siPixelClustersPreSplittingAlpakaSerial = siPixelClustersPreSplittingAlpaka.clone( + #alpaka = dict( backend = '*' ) + alpaka = None +) +siPixelClustersPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelClustersPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +(alpaka & phase2_tracker).toReplaceWith(siPixelDigisClustersPreSplitting,_siPixelDigisClustersFromSoAAlpakaPhase2.clone( + clusterThreshold_layer1 = 4000, + clusterThreshold_otherLayers = 4000, + src = "siPixelClustersPreSplittingAlpaka", + storeDigis = False, + produceDigis = False +)) + +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase1_cfi import siPixelDigisClustersFromSoAAlpakaPhase1 as _siPixelDigisClustersFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelClusterizer.siPixelDigisClustersFromSoAAlpakaPhase2_cfi import siPixelDigisClustersFromSoAAlpakaPhase2 as _siPixelDigisClustersFromSoAAlpakaPhase2 + +alpaka.toModify(siPixelClustersPreSplitting, + cpu = cms.EDAlias( + siPixelDigisClustersPreSplitting = cms.VPSet( + cms.PSet(type = cms.string("SiPixelClusteredmNewDetSetVector")) + ) + ) +) + +# Run 3 +alpaka.toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # reconstruct the pixel clusters with alpaka + siPixelClustersPreSplittingAlpaka, + # reconstruct the pixel clusters with alpaka on the cpu (if requested by the validation) + siPixelClustersPreSplittingAlpakaSerial, + # convert from host SoA to legacy formats (digis and clusters) + siPixelDigisClustersPreSplitting, + # EDAlias for the clusters + siPixelClustersPreSplitting) +) + +# Phase 2 +(alpaka & phase2_tracker).toReplaceWith(siPixelClustersPreSplittingTask, cms.Task( + # reconstruct the pixel clusters with alpaka from copied digis + siPixelClustersPreSplittingAlpaka, + # reconstruct the pixel clusters with alpaka from copied digis on the cpu (if requested by the validation) + siPixelClustersPreSplittingAlpakaSerial, + # convert the pixel digis (except errors) and clusters to the legacy format + siPixelDigisClustersPreSplitting, + # SwitchProducer wrapping the legacy pixel cluster producer or an alias for the pixel clusters information converted from SoA + siPixelClustersPreSplitting) +) diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index c0291ed9f32f8..5b70ded261ddf 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -16,12 +16,13 @@ #include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #endif // __CUDACC__ -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelClusterThresholds.h" #include "CUDADataFormats/SiPixelCluster/interface/gpuClusteringConstants.h" - #include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelClusterThresholds.h" + +// local includes, for testing only +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClusterChargeCut.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" int main(void) { #ifdef __CUDACC__ diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index 70a2970420c51..62787f4c989c1 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -1,15 +1,20 @@ + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h new file mode 100644 index 0000000000000..9a2139ab2e355 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h + +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class PixelCPEFastParamsDevice { +public: + using Buffer = cms::alpakatools::device_buffer>; + using ConstBuffer = cms::alpakatools::const_device_buffer>; + + template + PixelCPEFastParamsDevice(TQueue queue) + : buffer_(cms::alpakatools::make_device_buffer>(queue)) {} + + // non-copyable + PixelCPEFastParamsDevice(PixelCPEFastParamsDevice const&) = delete; + PixelCPEFastParamsDevice& operator=(PixelCPEFastParamsDevice const&) = delete; + + // movable + PixelCPEFastParamsDevice(PixelCPEFastParamsDevice&&) = default; + PixelCPEFastParamsDevice& operator=(PixelCPEFastParamsDevice&&) = default; + + // default destructor + ~PixelCPEFastParamsDevice() = default; + + // access the buffer + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + + auto size() const { return alpaka::getExtentProduct(buffer_); } + + pixelCPEforDevice::ParamsOnDeviceT const* data() const { return buffer_.data(); } + +private: + Buffer buffer_; +}; + +#endif // RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsDevice_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h new file mode 100644 index 0000000000000..7d57c46dd7a13 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h @@ -0,0 +1,66 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h +#define RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h + +#include + +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class PixelCPEFastParamsHost : public PixelCPEGenericBase { +public: + using Buffer = cms::alpakatools::host_buffer>; + using ConstBuffer = cms::alpakatools::const_host_buffer>; + + PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth); + + // non-copyable + PixelCPEFastParamsHost(PixelCPEFastParamsHost const&) = delete; + PixelCPEFastParamsHost& operator=(PixelCPEFastParamsHost const&) = delete; + + // movable + PixelCPEFastParamsHost(PixelCPEFastParamsHost&&) = default; + PixelCPEFastParamsHost& operator=(PixelCPEFastParamsHost&&) = default; + + // default destructor + ~PixelCPEFastParamsHost() override = default; + + // access the buffer + Buffer buffer() { return buffer_; } + ConstBuffer buffer() const { return buffer_; } + ConstBuffer const_buffer() const { return buffer_; } + + auto size() const { return alpaka::getExtentProduct(buffer_); } + + pixelCPEforDevice::ParamsOnDeviceT const* data() const { return buffer_.data(); } + + static void fillPSetDescription(edm::ParameterSetDescription& desc); + +private: + LocalPoint localPosition(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + LocalError localError(DetParam const& theDetParam, ClusterParam& theClusterParam) const override; + + void errorFromTemplates(DetParam const& theDetParam, ClusterParamGeneric& theClusterParam, float qclus) const; + + std::vector thePixelGenError_; + + void fillParamsForDevice(); + + Buffer buffer_; +}; + +#endif // RecoLocalTracker_SiPixelRecHits_interface_PixelCPEFastParamsHost_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h index 1c7b9646d037f..2f18d86a39944 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEGenericBase.h @@ -1,9 +1,10 @@ #ifndef RecoLocalTracker_SiPixelRecHits_PixelCPEGenericBase_H #define RecoLocalTracker_SiPixelRecHits_PixelCPEGenericBase_H -#include "PixelCPEBase.h" #include +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" + class PixelCPEGenericBase : public PixelCPEBase { public: struct ClusterParamGeneric : ClusterParam { diff --git a/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h new file mode 100644 index 0000000000000..4e66d24604aec --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h @@ -0,0 +1,40 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h +#define RecoLocalTracker_SiPixelRecHits_interface_alpaka_PixelCPEFastParamsCollection_h + +#include +#include +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +// TODO: The class is created via inheritance of the PortableCollection. +// This is generally discouraged, and should be done via composition. +// See: https://github.com/cms-sw/cmssw/pull/40465#discussion_r1067364306 +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + template + using PixelCPEFastParams = std::conditional_t, + PixelCPEFastParamsHost, + PixelCPEFastParamsDevice>; + + using PixelCPEFastParamsPhase1 = PixelCPEFastParams; + using PixelCPEFastParamsPhase2 = PixelCPEFastParams; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace cms::alpakatools { + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PixelCPEFastParamsHost const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PixelCPEFastParamsDevice dstData(queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; +} // namespace cms::alpakatools + +#endif // DataFormats_PixelCPEFastParamsoA_interface_alpaka_PixelCPEFastParamsCollection_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h new file mode 100644 index 0000000000000..ac99af3146904 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h @@ -0,0 +1,433 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h +#define RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h + +#include +#include +#include +#include +#include + +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +namespace pixelCPEforDevice { + + // From https://cmssdt.cern.ch/dxr/CMSSW/source/CondFormats/SiPixelTransient/src/SiPixelGenError.cc#485-486 + // qbin: int (0-4) describing the charge of the cluster + // [0: 1.5; + using Rotation = SOARotation; + + // SOA (on device) + + template + struct ClusParamsT { + uint32_t minRow[N]; + uint32_t maxRow[N]; + uint32_t minCol[N]; + uint32_t maxCol[N]; + + int32_t q_f_X[N]; + int32_t q_l_X[N]; + int32_t q_f_Y[N]; + int32_t q_l_Y[N]; + + int32_t charge[N]; + + float xpos[N]; + float ypos[N]; + + float xerr[N]; + float yerr[N]; + + int16_t xsize[N]; // (*8) clipped at 127 if negative is edge.... + int16_t ysize[N]; + + Status status[N]; + }; + + // all modules are identical! + struct CommonParams { + float theThicknessB; + float theThicknessE; + float thePitchX; + float thePitchY; + + uint16_t maxModuleStride; + uint8_t numberOfLaddersInBarrel; + }; + + struct DetParams { + bool isBarrel; + bool isPosZ; + uint16_t layer; + uint16_t index; + uint32_t rawId; + + float shiftX; + float shiftY; + float chargeWidthX; + float chargeWidthY; + uint16_t pixmx; // max pix charge + + uint16_t nRowsRoc; //we don't need 2^16 columns, is worth to use 15 + 1 for sign + uint16_t nColsRoc; + uint16_t nRows; + uint16_t nCols; + + uint32_t numPixsInModule; + + float x0, y0, z0; // the vertex in the local coord of the detector + + float apeXX, apeYY; // ape^2 + uint8_t sx2, sy1, sy2; + uint8_t sigmax[kNumErrorBins], sigmax1[kNumErrorBins], + sigmay[kNumErrorBins]; // in micron + float xfact[kGenErrorQBins], yfact[kGenErrorQBins]; + int minCh[kGenErrorQBins]; + + Frame frame; + }; + + template + struct LayerGeometryT { + uint32_t layerStart[TrackerTopology::numberOfLayers + 1]; + uint8_t layer[pixelTopology::layerIndexSize]; + uint16_t maxModuleStride; + }; + + constexpr int32_t MaxHitsInIter = pixelClustering::maxHitsInIter(); + using ClusParams = ClusParamsT; + + constexpr inline void computeAnglesFromDet( + DetParams const& __restrict__ detParams, float const x, float const y, float& cotalpha, float& cotbeta) { + // x,y local position on det + auto gvx = x - detParams.x0; + auto gvy = y - detParams.y0; + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... + // calculate angles + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; + } + + constexpr inline float correction(int sizeM1, + int q_f, //!< Charge in the first pixel. + int q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float w_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto w_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto w_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + w_eff = std::abs(w_pred) - w_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use w_pred-w_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + + // this can produce "large" regressions for very small numeric differences + simple = (w_eff < 0.0f) | (w_eff > pitch); + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) + sum_of_edge += 1.0f; + if (last_is_big) + sum_of_edge += 1.0f; + w_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float qdiff = q_l - q_f; + float qsum = q_l + q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (qsum == 0) + qsum = 1.0f; + + return 0.5f * (qdiff / qsum) * w_eff; + } + + template + constexpr inline void position(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + constexpr int maxSize = TrackerTraits::maxSizeCluster; + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic] + 1; + uint16_t lly = cp.minCol[ic] + 1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + uint16_t llxl = llx, llyl = lly, urxl = urx, uryl = ury; + + llxl = TrackerTraits::localX(llx); + llyl = TrackerTraits::localY(lly); + urxl = TrackerTraits::localX(urx); + uryl = TrackerTraits::localY(ury); + + auto mx = llxl + urxl; + auto my = llyl + uryl; + + int xsize = int(urxl) + 2 - int(llxl); + int ysize = int(uryl) + 2 - int(llyl); + assert(xsize >= 0); // 0 if bixpix... + assert(ysize >= 0); + + if (TrackerTraits::isBigPixX(cp.minRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixX(cp.maxRow[ic])) + ++xsize; + if (TrackerTraits::isBigPixY(cp.minCol[ic])) + ++ysize; + if (TrackerTraits::isBigPixY(cp.maxCol[ic])) + ++ysize; + + int unbalanceX = 8.f * std::abs(float(cp.q_f_X[ic] - cp.q_l_X[ic])) / float(cp.q_f_X[ic] + cp.q_l_X[ic]); + int unbalanceY = 8.f * std::abs(float(cp.q_f_Y[ic] - cp.q_l_Y[ic])) / float(cp.q_f_Y[ic] + cp.q_l_Y[ic]); + + xsize = 8 * xsize - unbalanceX; + ysize = 8 * ysize - unbalanceY; + + cp.xsize[ic] = std::min(xsize, maxSize); + cp.ysize[ic] = std::min(ysize, maxSize); + + if (cp.minRow[ic] == 0 || cp.maxRow[ic] == uint32_t(detParams.nRows - 1)) + cp.xsize[ic] = -cp.xsize[ic]; + + if (cp.minCol[ic] == 0 || cp.maxCol[ic] == uint32_t(detParams.nCols - 1)) + cp.ysize[ic] = -cp.ysize[ic]; + + // apply the lorentz offset correction + float xoff = 0.5f * float(detParams.nRows) * comParams.thePitchX; + float yoff = 0.5f * float(detParams.nCols) * comParams.thePitchY; + + //correction for bigpixels for phase1 + xoff = xoff + TrackerTraits::bigPixXCorrection * comParams.thePitchX; + yoff = yoff + TrackerTraits::bigPixYCorrection * comParams.thePitchY; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + (comParams.thePitchX * 0.5f * float(mx)) - xoff; + auto yPos = detParams.shiftY + (comParams.thePitchY * 0.5f * float(my)) - yoff; + + float cotalpha = 0, cotbeta = 0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction(cp.maxRow[ic] - cp.minRow[ic], + cp.q_f_X[ic], + cp.q_l_X[ic], + llxl, + urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + TrackerTraits::isBigPixX(cp.minRow[ic]), + TrackerTraits::isBigPixX(cp.maxRow[ic])); + + auto ycorr = correction(cp.maxCol[ic] - cp.minCol[ic], + cp.q_f_Y[ic], + cp.q_l_Y[ic], + llyl, + uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + TrackerTraits::isBigPixY(cp.minCol[ic]), + TrackerTraits::isBigPixY(cp.maxCol[ic])); + + cp.xpos[ic] = xPos + xcorr; + cp.ypos[ic] = yPos + ycorr; + } + + template + constexpr inline void errorFromSize(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050; + cp.yerr[ic] = 0.0085; + + // FIXME these are errors form Run1 + float xerr_barrel_l1_def = TrackerTraits::xerr_barrel_l1_def; + float yerr_barrel_l1_def = TrackerTraits::yerr_barrel_l1_def; + float xerr_barrel_ln_def = TrackerTraits::xerr_barrel_ln_def; + float yerr_barrel_ln_def = TrackerTraits::yerr_barrel_ln_def; + float xerr_endcap_def = TrackerTraits::xerr_endcap_def; + float yerr_endcap_def = TrackerTraits::yerr_endcap_def; + + constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; //TODO MOVE THESE SOMEWHERE ELSE + constexpr float yerr_barrel_l1[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_barrel_ln[] = {0.00115, 0.00120, 0.00088}; + constexpr float yerr_barrel_ln[] = { + 0.00375, 0.00230, 0.00250, 0.00250, 0.00230, 0.00230, 0.00210, 0.00210, 0.00240}; + constexpr float xerr_endcap[] = {0.0020, 0.0020}; + constexpr float yerr_endcap[] = {0.00210}; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + + // is one and big? + bool isBig1X = ((0 == sx) && TrackerTraits::isBigPixX(cp.minRow[ic])); + bool isBig1Y = ((0 == sy) && TrackerTraits::isBigPixY(cp.minCol[ic])); + + if (!isEdgeX && !isBig1X) { + if (not detParams.isBarrel) { + cp.xerr[ic] = sx < std::size(xerr_endcap) ? xerr_endcap[sx] : xerr_endcap_def; + } else if (detParams.layer == 1) { + cp.xerr[ic] = sx < std::size(xerr_barrel_l1) ? xerr_barrel_l1[sx] : xerr_barrel_l1_def; + } else { + cp.xerr[ic] = sx < std::size(xerr_barrel_ln) ? xerr_barrel_ln[sx] : xerr_barrel_ln_def; + } + } + + if (!isEdgeY && !isBig1Y) { + if (not detParams.isBarrel) { + cp.yerr[ic] = sy < std::size(yerr_endcap) ? yerr_endcap[sy] : yerr_endcap_def; + } else if (detParams.layer == 1) { + cp.yerr[ic] = sy < std::size(yerr_barrel_l1) ? yerr_barrel_l1[sy] : yerr_barrel_l1_def; + } else { + cp.yerr[ic] = sy < std::size(yerr_barrel_ln) ? yerr_barrel_ln[sy] : yerr_barrel_ln_def; + } + } + } + + template + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + // Edge cluster errors + cp.xerr[ic] = 0.0050f; + cp.yerr[ic] = 0.0085f; + + auto sx = cp.maxRow[ic] - cp.minRow[ic]; + auto sy = cp.maxCol[ic] - cp.minCol[ic]; + + // is edgy ? (size is set negative: see above) + bool isEdgeX = cp.xsize[ic] < 1; + bool isEdgeY = cp.ysize[ic] < 1; + // is one and big? + bool isOneX = (0 == sx); + bool isOneY = (0 == sy); + bool isBigX = TrackerTraits::isBigPixX(cp.minRow[ic]); + bool isBigY = TrackerTraits::isBigPixY(cp.minCol[ic]); + + auto ch = cp.charge[ic]; + auto bin = 0; + for (; bin < kGenErrorQBins - 1; ++bin) + // find first bin which minimum charge exceeds cluster charge + if (ch < detParams.minCh[bin + 1]) + break; + + // in detParams qBins are reversed bin0 -> smallest charge, bin4-> largest charge + // whereas in CondFormats/SiPixelTransient/src/SiPixelGenError.cc it is the opposite + // so we reverse the bin here -> kGenErrorQBins - 1 - bin + cp.status[ic].qBin = kGenErrorQBins - 1 - bin; + cp.status[ic].isOneX = isOneX; + cp.status[ic].isBigX = (isOneX & isBigX) | isEdgeX; + cp.status[ic].isOneY = isOneY; + cp.status[ic].isBigY = (isOneY & isBigY) | isEdgeY; + + auto xoff = -float(TrackerTraits::xOffset) * comParams.thePitchX; + int low_value = 0; + int high_value = kNumErrorBins - 1; + int bin_value = float(kNumErrorBins) * (cp.xpos[ic] + xoff) / (2 * xoff); + // return estimated bin value truncated to [0, 15] + int jx = std::clamp(bin_value, low_value, high_value); + + auto toCM = [](uint8_t x) { return float(x) * 1.e-4f; }; + + if (not isEdgeX) { + cp.xerr[ic] = isOneX ? toCM(isBigX ? detParams.sx2 : detParams.sigmax1[jx]) + : detParams.xfact[bin] * toCM(detParams.sigmax[jx]); + } + + auto ey = cp.ysize[ic] > 8 ? detParams.sigmay[std::min(cp.ysize[ic] - 9, 15)] : detParams.sy1; + if (not isEdgeY) { + cp.yerr[ic] = isOneY ? toCM(isBigY ? detParams.sy2 : detParams.sy1) : detParams.yfact[bin] * toCM(ey); + } + } + + //for Phase2 -> fallback to error from size + template <> + constexpr inline void errorFromDB(CommonParams const& __restrict__ comParams, + DetParams const& __restrict__ detParams, + ClusParams& cp, + uint32_t ic) { + errorFromSize(comParams, detParams, cp, ic); + } + + template + struct ParamsOnDeviceT { + using LayerGeometry = LayerGeometryT; + using AverageGeometry = pixelTopology::AverageGeometryT; + + CommonParams m_commonParams; + // Will contain an array of DetParams instances + DetParams m_detParams[TrackerTopology::numberOfModules]; + LayerGeometry m_layerGeometry; + AverageGeometry m_averageGeometry; + + constexpr CommonParams const& __restrict__ commonParams() const { return m_commonParams; } + constexpr DetParams const& __restrict__ detParams(int i) const { return m_detParams[i]; } + constexpr LayerGeometry const& __restrict__ layerGeometry() const { return m_layerGeometry; } + constexpr AverageGeometry const& __restrict__ averageGeometry() const { return m_averageGeometry; } + + CommonParams& commonParams() { return m_commonParams; } + DetParams& detParams(int i) { return m_detParams[i]; } + LayerGeometry& layerGeometry() { return m_layerGeometry; } + AverageGeometry& averageGeometry() { return m_averageGeometry; } + + constexpr uint8_t layer(uint16_t id) const { return m_layerGeometry.layer[id / TrackerTopology::maxModuleStride]; }; + }; + +} // namespace pixelCPEforDevice + +#endif // RecoLocalTracker_SiPixelRecHits_interface_pixelCPEforDevice_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 00c88eadd4b51..35a973120e9fd 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -1,12 +1,10 @@ - - - - + + + + - - @@ -14,5 +12,16 @@ + + + + + + + + + + + diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc new file mode 100644 index 0000000000000..9881aeab46bab --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSoAAlpaka.cc @@ -0,0 +1,187 @@ +#include +#include + +#include "DataFormats/Common/interface/DetSetVectorNew.h" +#include "DataFormats/Common/interface/Handle.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsHost.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +template +class SiPixelRecHitFromSoAAlpaka : public edm::global::EDProducer<> { + using HitModuleStartArray = typename TrackingRecHitSoA::HitModuleStartArray; + using hindex_type = typename TrackerTraits::hindex_type; + using HMSstorage = typename std::vector; + +public: + explicit SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitFromSoAAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + // Data has been implicitly copied from Device to Host by the framework + using HitsOnHost = TrackingRecHitHost; + +private: + void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken geomToken_; + const edm::EDGetTokenT hitsToken_; // Alpaka hits + const edm::EDGetTokenT clusterToken_; // legacy clusters + const edm::EDPutTokenT rechitsPutToken_; // legacy rechits + const edm::EDPutTokenT hostPutToken_; +}; + +template +SiPixelRecHitFromSoAAlpaka::SiPixelRecHitFromSoAAlpaka(const edm::ParameterSet& iConfig) + : geomToken_(esConsumes()), + hitsToken_(consumes(iConfig.getParameter("pixelRecHitSrc"))), + clusterToken_(consumes(iConfig.getParameter("src"))), + rechitsPutToken_(produces()), + hostPutToken_(produces()) {} + +template +void SiPixelRecHitFromSoAAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelRecHitSrc", edm::InputTag("siPixelRecHitsPreSplittingAlpaka")); + desc.add("src", edm::InputTag("siPixelClustersPreSplitting")); + descriptions.addWithDefaultLabel(desc); +} + +template +void SiPixelRecHitFromSoAAlpaka::produce(edm::StreamID streamID, + edm::Event& iEvent, + const edm::EventSetup& iSetup) const { + auto const& hits = iEvent.get(hitsToken_); + auto nHits = hits.view().metadata().size(); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "converting " << nHits << " Hits"; + + // allocate a buffer for the indices of the clusters + constexpr auto nMaxModules = TrackerTraits::numberOfModules; + + SiPixelRecHitCollection output; + output.reserve(nMaxModules, nHits); + + HMSstorage hmsp(nMaxModules + 1); + + if (0 == nHits) { + hmsp.clear(); + iEvent.emplace(rechitsPutToken_, std::move(output)); + iEvent.emplace(hostPutToken_, std::move(hmsp)); + return; + } + + // fill content of HMSstorage product, and put it into the Event + for (unsigned int idx = 0; idx < hmsp.size(); ++idx) { + hmsp[idx] = hits.view().hitsModuleStart()[idx]; + } + iEvent.emplace(hostPutToken_, std::move(hmsp)); + + auto xl = hits.view().xLocal(); + auto yl = hits.view().yLocal(); + auto xe = hits.view().xerrLocal(); + auto ye = hits.view().yerrLocal(); + + TrackerGeometry const& geom = iSetup.getData(geomToken_); + + auto const hclusters = iEvent.getHandle(clusterToken_); + + constexpr uint32_t maxHitsInModule = pixelClustering::maxHitsInModule(); + + int numberOfDetUnits = 0; + int numberOfClusters = 0; + for (auto const& dsv : *hclusters) { + numberOfDetUnits++; + unsigned int detid = dsv.detId(); + DetId detIdObject(detid); + const GeomDetUnit* genericDet = geom.idToDetUnit(detIdObject); + auto gind = genericDet->index(); + const PixelGeomDetUnit* pixDet = dynamic_cast(genericDet); + assert(pixDet); + SiPixelRecHitCollection::FastFiller recHitsOnDetUnit(output, detid); + auto fc = hits.view().hitsModuleStart()[gind]; + auto lc = hits.view().hitsModuleStart()[gind + 1]; + auto nhits = lc - fc; + + assert(lc > fc); + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << ": conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << fc << ',' << lc << "\n"; + if (nhits > maxHitsInModule) + edm::LogWarning("SiPixelRecHitFromSoAAlpaka") + .format("Too many clusters {} in module {}. Only the first {} hits will be converted", + nhits, + gind, + maxHitsInModule); + + nhits = std::min(nhits, maxHitsInModule); + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "in det " << gind << "conv " << nhits << " hits from " << dsv.size() + << " legacy clusters" << ' ' << lc << ',' << fc; + + if (0 == nhits) + continue; + auto jnd = [&](int k) { return fc + k; }; + assert(nhits <= dsv.size()); + if (nhits != dsv.size()) { + edm::LogWarning("GPUHits2CPU") << "nhits!= nclus " << nhits << ' ' << dsv.size(); + } + for (auto const& clust : dsv) { + assert(clust.originalId() >= 0); + assert(clust.originalId() < dsv.size()); + if (clust.originalId() >= nhits) + continue; + auto ij = jnd(clust.originalId()); + LocalPoint lp(xl[ij], yl[ij]); + LocalError le(xe[ij], 0, ye[ij]); + SiPixelRecHitQuality::QualWordType rqw = 0; + + numberOfClusters++; + + /* cpu version.... (for reference) + std::tuple tuple = cpe_->getParameters( clust, *genericDet ); + LocalPoint lp( std::get<0>(tuple) ); + LocalError le( std::get<1>(tuple) ); + SiPixelRecHitQuality::QualWordType rqw( std::get<2>(tuple) ); + */ + + // Create a persistent edm::Ref to the cluster + edm::Ref, SiPixelCluster> cluster = edmNew::makeRefTo(hclusters, &clust); + // Make a RecHit and add it to the DetSet + recHitsOnDetUnit.emplace_back(lp, le, rqw, *genericDet, cluster); + // ============================= + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "cluster " << numberOfClusters << " at " << lp << ' ' << le; + + } // <-- End loop on Clusters + + // LogDebug("SiPixelRecHitGPU") + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << recHitsOnDetUnit.size() << " RecHits on " << detid; + + } // <-- End loop on DetUnits + + LogDebug("SiPixelRecHitFromSoAAlpaka") << "found " << numberOfDetUnits << " dets, " << numberOfClusters + << " clusters"; + + iEvent.emplace(rechitsPutToken_, std::move(output)); +} + +using SiPixelRecHitFromSoAAlpakaPhase1 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase1); + +using SiPixelRecHitFromSoAAlpakaPhase2 = SiPixelRecHitFromSoAAlpaka; +DEFINE_FWK_MODULE(SiPixelRecHitFromSoAAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 8dc6ae93018ea..21da864c1c348 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -198,7 +198,7 @@ void SiPixelRecHitSoAFromLegacyT::produce(edm::StreamID streamID, ndigi += clust.size(); } - cms::cuda::PortableHostCollection> digis_h(ndigi); + cms::cuda::PortableHostCollection digis_h(ndigi); clusterRef.clear(); clusters_h.view()[0].moduleId() = gind; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc new file mode 100644 index 0000000000000..73059a13dc636 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelCPEFastParamsESProducerAlpaka.cc @@ -0,0 +1,120 @@ +#include +#include +#include +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" + +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/Records/interface/TrackerTopologyRcd.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoLocalTracker/ClusterParameterEstimator/interface/PixelClusterParameterEstimator.h" + +#include "CondFormats/DataRecord/interface/SiPixelGenErrorDBObjectRcd.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" +#include "DataFormats/TrackerCommon/interface/TrackerTopology.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class PixelCPEFastParamsESProducerAlpaka : public ESProducer { + public: + PixelCPEFastParamsESProducerAlpaka(edm::ParameterSet const& iConfig); + std::unique_ptr> produce(const PixelCPEFastParamsRecord& iRecord); + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + edm::ESGetToken magfieldToken_; + edm::ESGetToken pDDToken_; + edm::ESGetToken hTTToken_; + edm::ESGetToken lorentzAngleToken_; + edm::ESGetToken lorentzAngleWidthToken_; + edm::ESGetToken genErrorDBObjectToken_; + + edm::ParameterSet pset_; + bool useErrorsFromTemplates_; + }; + + using namespace edm; + + template + PixelCPEFastParamsESProducerAlpaka::PixelCPEFastParamsESProducerAlpaka(const edm::ParameterSet& p) + : ESProducer(p), pset_(p) { + auto const& myname = p.getParameter("ComponentName"); + auto const& magname = p.getParameter("MagneticFieldRecord"); + useErrorsFromTemplates_ = p.getParameter("UseErrorsFromTemplates"); + + auto cc = setWhatProduced(this, myname); + magfieldToken_ = cc.consumes(magname); + pDDToken_ = cc.consumes(); + hTTToken_ = cc.consumes(); + lorentzAngleToken_ = cc.consumes(edm::ESInputTag("")); + lorentzAngleWidthToken_ = cc.consumes(edm::ESInputTag("", "forWidth")); + if (useErrorsFromTemplates_) { + genErrorDBObjectToken_ = cc.consumes(); + } + } + + template + std::unique_ptr> PixelCPEFastParamsESProducerAlpaka::produce( + const PixelCPEFastParamsRecord& iRecord) { + // add the new la width object + const SiPixelLorentzAngle* lorentzAngleWidthProduct = &iRecord.get(lorentzAngleWidthToken_); + + const SiPixelGenErrorDBObject* genErrorDBObjectProduct = nullptr; + + // Errors take only from new GenError + if (useErrorsFromTemplates_) { // do only when generrors are needed + genErrorDBObjectProduct = &iRecord.get(genErrorDBObjectToken_); + //} else { + //std::cout<<" pass an empty GenError pointer"<>(pset_, + &iRecord.get(magfieldToken_), + iRecord.get(pDDToken_), + iRecord.get(hTTToken_), + &iRecord.get(lorentzAngleToken_), + genErrorDBObjectProduct, + lorentzAngleWidthProduct); + } + + template + void PixelCPEFastParamsESProducerAlpaka::fillDescriptions( + edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + // from PixelCPEBase + PixelCPEBase::fillPSetDescription(desc); + + // from PixelCPEFast + PixelCPEFastParamsHost::fillPSetDescription(desc); + + // used by PixelCPEFast + desc.add("EdgeClusterErrorX", 50.0); + desc.add("EdgeClusterErrorY", 85.0); + desc.add("UseErrorsFromTemplates", true); + desc.add("TruncatePixelCharge", true); + + std::string name = "PixelCPEFastParams"; + name += TrackerTraits::nameModifier; + desc.add("ComponentName", name); + desc.add("MagneticFieldRecord", edm::ESInputTag()); + + descriptions.addWithDefaultLabel(desc); + } + + using PixelCPEFastParamsESProducerAlpakaPhase1 = PixelCPEFastParamsESProducerAlpaka; + using PixelCPEFastParamsESProducerAlpakaPhase2 = PixelCPEFastParamsESProducerAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase1); +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(PixelCPEFastParamsESProducerAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h new file mode 100644 index 0000000000000..2fc1404a03bb7 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernel.h @@ -0,0 +1,45 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h +#define RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h + +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelgpudetails { + using namespace cms::alpakatools; + + template + class PixelRecHitKernel { + public: + PixelRecHitKernel() = default; + ~PixelRecHitKernel() = default; + + PixelRecHitKernel(const PixelRecHitKernel&) = delete; + PixelRecHitKernel(PixelRecHitKernel&&) = delete; + PixelRecHitKernel& operator=(const PixelRecHitKernel&) = delete; + PixelRecHitKernel& operator=(PixelRecHitKernel&&) = delete; + + using ParamsOnDevice = pixelCPEforDevice::ParamsOnDeviceT; + + TrackingRecHitsSoACollection makeHitsAsync(SiPixelDigisSoACollection const& digis_d, + SiPixelClustersSoACollection const& clusters_d, + BeamSpotPOD const* bs_d, + ParamsOnDevice const* cpeParams, + Queue queue) const; + }; + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_PixelRecHitKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc new file mode 100644 index 0000000000000..f0d61a646c0ce --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHitKernels.dev.cc @@ -0,0 +1,143 @@ +// C++ headers +#include +#include + +// Alpaka headers +#include + +// CMSSW headers +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "PixelRecHitKernel.h" +#include "PixelRecHits.h" + +//#define GPU_DEBUG + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + using namespace cms::alpakatools; + template + class setHitsLayerStart { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint32_t const* __restrict__ hitsModuleStart, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + uint32_t* __restrict__ hitsLayerStart) const { + assert(0 == hitsModuleStart[0]); + + for (int32_t i : cms::alpakatools::elements_with_stride(acc, TrackerTraits::numberOfLayers + 1)) { + hitsLayerStart[i] = hitsModuleStart[cpeParams->layerGeometry().layerStart[i]]; +#ifdef GPU_DEBUG + int old = i == 0 ? 0 : hitsModuleStart[cpeParams->layerGeometry().layerStart[i - 1]]; + printf("LayerStart %d/%d at module %d: %d - %d\n", + i, + TrackerTraits::numberOfLayers, + cpeParams->layerGeometry().layerStart[i], + hitsLayerStart[i], + hitsLayerStart[i] - old); +#endif + } + } + }; + + namespace pixelgpudetails { + + template + TrackingRecHitsSoACollection PixelRecHitKernel::makeHitsAsync( + SiPixelDigisSoACollection const& digis_d, + SiPixelClustersSoACollection const& clusters_d, + BeamSpotPOD const* bs_d, + pixelCPEforDevice::ParamsOnDeviceT const* cpeParams, + Queue queue) const { + using namespace pixelRecHits; + auto nHits = clusters_d.nClusters(); + auto offsetBPIX2 = clusters_d.offsetBPIX2(); + + TrackingRecHitsSoACollection hits_d(nHits, offsetBPIX2, clusters_d->clusModuleStart(), queue); + + int activeModulesWithDigis = digis_d.nModules(); + + // protect from empty events + if (activeModulesWithDigis) { + int threadsPerBlock = 128; + int blocks = activeModulesWithDigis; + const auto workDiv1D = cms::alpakatools::make_workdiv(blocks, threadsPerBlock); + +#ifdef GPU_DEBUG + std::cout << "launching GetHits kernel on " << alpaka::core::demangled << " with " << blocks << " blocks" + << std::endl; +#endif + alpaka::exec(queue, + workDiv1D, + GetHits{}, + cpeParams, + bs_d, + digis_d.view(), + digis_d.nDigis(), + clusters_d.view(), + hits_d.view()); +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + + // assuming full warp of threads is better than a smaller number... + if (nHits) { + const auto workDiv1D = cms::alpakatools::make_workdiv(1, 32); + alpaka::exec(queue, + workDiv1D, + setHitsLayerStart{}, + clusters_d->clusModuleStart(), + cpeParams, + hits_d.view().hitsLayerStart().data()); + constexpr auto nLayers = TrackerTraits::numberOfLayers; + + // Use a view since it's runtime sized and can't use the implicit definition + // see HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h:100 + typename TrackingRecHitSoA::PhiBinnerView hrv_d; + hrv_d.assoc = &(hits_d.view().phiBinner()); + hrv_d.offSize = -1; + hrv_d.offStorage = nullptr; + hrv_d.contentSize = nHits; + hrv_d.contentStorage = hits_d.view().phiBinnerStorage(); + + // fillManyFromVector(h_d.data(), nParts, v_d.data(), offsets_d.data(), offsets[10], 256, queue); + /* cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), + nLayers, + hits_d.view().iphi(), + hits_d.view().hitsLayerStart().data(), + nHits, + (uint32_t)256, + queue); +*/ + cms::alpakatools::fillManyFromVector(&(hits_d.view().phiBinner()), + hrv_d, + nLayers, + hits_d.view().iphi(), + hits_d.view().hitsLayerStart().data(), + nHits, + (uint32_t)256, + queue); + +#ifdef GPU_DEBUG + alpaka::wait(queue); +#endif + } + } + +#ifdef GPU_DEBUG + alpaka::wait(queue); + std::cout << "PixelRecHitKernel -> DONE!" << std::endl; +#endif + + return hits_d; + } + + template class PixelRecHitKernel; + template class PixelRecHitKernel; + template class PixelRecHitKernel; + + } // namespace pixelgpudetails +} // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h new file mode 100644 index 0000000000000..220a91b85ced3 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/PixelRecHits.h @@ -0,0 +1,240 @@ +#ifndef RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h +#define RecoLocalTracker_SiPixelRecHits_alpaka_PixelRecHits_h + +#include +#include +#include +#include + +#include + +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/Math/interface/approx_atan2.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsSoA.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" + +//#define GPU_DEBUG 1 +namespace ALPAKA_ACCELERATOR_NAMESPACE { + namespace pixelRecHits { + + template + class GetHits { + public: + template >> + ALPAKA_FN_ACC void operator()(const TAcc& acc, + pixelCPEforDevice::ParamsOnDeviceT const* __restrict__ cpeParams, + BeamSpotPOD const* __restrict__ bs, + SiPixelDigisSoAConstView digis, + uint32_t numElements, + SiPixelClustersSoAConstView clusters, + TrackingRecHitSoAView hits) const { + // FIXME + // the compiler seems NOT to optimize loads from views (even in a simple test case) + // The whole gimnastic here of copying or not is a pure heuristic exercise that seems to produce the fastest code with the above signature + // not using views (passing a gazzilion of array pointers) seems to produce the fastest code (but it is harder to mantain) + + ALPAKA_ASSERT_OFFLOAD(cpeParams); + + const uint32_t blockIdx(alpaka::getIdx(acc)[0u]); + + // copy average geometry corrected by beamspot . FIXME (move it somewhere else???) + if (0 == blockIdx) { + auto& agc = hits.averageGeometry(); + auto const& ag = cpeParams->averageGeometry(); + auto nLadders = TrackerTraits::numberOfLaddersInBarrel; + + cms::alpakatools::for_each_element_in_block_strided(acc, nLadders, [&](uint32_t il) { + agc.ladderZ[il] = ag.ladderZ[il] - bs->z; + agc.ladderX[il] = ag.ladderX[il] - bs->x; + agc.ladderY[il] = ag.ladderY[il] - bs->y; + agc.ladderR[il] = sqrt(agc.ladderX[il] * agc.ladderX[il] + agc.ladderY[il] * agc.ladderY[il]); + agc.ladderMinZ[il] = ag.ladderMinZ[il] - bs->z; + agc.ladderMaxZ[il] = ag.ladderMaxZ[il] - bs->z; + }); + + if (cms::alpakatools::once_per_block(acc)) { + agc.endCapZ[0] = ag.endCapZ[0] - bs->z; + agc.endCapZ[1] = ag.endCapZ[1] - bs->z; + } + } + + // to be moved in common namespace... + using pixelClustering::invalidModuleId; + constexpr int32_t MaxHitsInIter = pixelCPEforDevice::MaxHitsInIter; + + using ClusParams = pixelCPEforDevice::ClusParams; + + // as usual one block per module + auto& clusParams = alpaka::declareSharedVar(acc); + + auto me = clusters[blockIdx].moduleId(); + int nclus = clusters[me].clusInModule(); + + if (0 == nclus) + return; +#ifdef GPU_DEBUG + if (cms::alpakatools::once_per_block(acc)) { + auto k = clusters[1 + blockIdx].moduleStart(); + while (digis[k].moduleId() == invalidModuleId) + ++k; + ALPAKA_ASSERT_OFFLOAD(digis[k].moduleId() == me); + } + + if (me % 100 == 1) + if (cms::alpakatools::once_per_block(acc)) + printf( + "hitbuilder: %d clusters in module %d. will write at %d\n", nclus, me, clusters[me].clusModuleStart()); +#endif + + for (int startClus = 0, endClus = nclus; startClus < endClus; startClus += MaxHitsInIter) { + auto first = clusters[1 + blockIdx].moduleStart(); + + int nClusInIter = alpaka::math::min(acc, MaxHitsInIter, endClus - startClus); + int lastClus = startClus + nClusInIter; + assert(nClusInIter <= nclus); + assert(nClusInIter > 0); + assert(lastClus <= nclus); + + assert(nclus > MaxHitsInIter || (0 == startClus && nClusInIter == nclus && lastClus == nclus)); + + // init + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + clusParams.minRow[ic] = std::numeric_limits::max(); + clusParams.maxRow[ic] = 0; + clusParams.minCol[ic] = std::numeric_limits::max(); + clusParams.maxCol[ic] = 0; + clusParams.charge[ic] = 0; + clusParams.q_f_X[ic] = 0; + clusParams.q_l_X[ic] = 0; + clusParams.q_f_Y[ic] = 0; + clusParams.q_l_Y[ic] = 0; + }); + + alpaka::syncBlockThreads(acc); + + // one thread per "digi" + const uint32_t blockDimension(alpaka::getWorkDiv(acc)[0u]); + const auto& [firstElementIdxNoStride, endElementIdxNoStride] = + cms::alpakatools::element_index_range_in_block(acc, first); + uint32_t rowsColsFirstElementIdx = firstElementIdxNoStride; + uint32_t rowsColsEndElementIdx = endElementIdxNoStride; + for (uint32_t i = rowsColsFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, rowsColsFirstElementIdx, rowsColsEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + alpaka::atomicMin(acc, &clusParams.minRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxRow[cl], (uint32_t)x, alpaka::hierarchy::Threads{}); + alpaka::atomicMin(acc, &clusParams.minCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + alpaka::atomicMax(acc, &clusParams.maxCol[cl], (uint32_t)y, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + auto pixmx = cpeParams->detParams(me).pixmx; + uint32_t chargeFirstElementIdx = firstElementIdxNoStride; + uint32_t chargeEndElementIdx = endElementIdxNoStride; + for (uint32_t i = chargeFirstElementIdx; i < numElements; ++i) { + if (not cms::alpakatools::next_valid_element_index_strided( + i, chargeFirstElementIdx, chargeEndElementIdx, blockDimension, numElements)) + break; + auto id = digis[i].moduleId(); + if (id == invalidModuleId) + continue; // not valid + if (id != me) + break; // end of module + auto cl = digis[i].clus(); + if (cl < startClus || cl >= lastClus) + continue; + cl -= startClus; + ALPAKA_ASSERT_OFFLOAD(cl >= 0); + ALPAKA_ASSERT_OFFLOAD(cl < MaxHitsInIter); + auto x = digis[i].xx(); + auto y = digis[i].yy(); + auto ch = digis[i].adc(); + alpaka::atomicAdd(acc, &clusParams.charge[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + ch = alpaka::math::min(acc, ch, pixmx); + if (clusParams.minRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_f_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxRow[cl] == x) + alpaka::atomicAdd(acc, &clusParams.q_l_X[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.minCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_f_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + if (clusParams.maxCol[cl] == y) + alpaka::atomicAdd(acc, &clusParams.q_l_Y[cl], (int32_t)ch, alpaka::hierarchy::Threads{}); + } + + alpaka::syncBlockThreads(acc); + + // next one cluster per thread... + first = clusters[me].clusModuleStart() + startClus; + cms::alpakatools::for_each_element_in_block_strided(acc, nClusInIter, [&](uint32_t ic) { + auto h = first + ic; // output index in global memory + + assert(h < (uint32_t)hits.metadata().size()); + assert(h < clusters[me + 1].clusModuleStart()); + + pixelCPEforDevice::position( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + pixelCPEforDevice::errorFromDB( + cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + + // store it + hits[h].chargeAndStatus().charge = clusParams.charge[ic]; + hits[h].chargeAndStatus().status = clusParams.status[ic]; + hits[h].detectorIndex() = me; + + float xl, yl; + hits[h].xLocal() = xl = clusParams.xpos[ic]; + hits[h].yLocal() = yl = clusParams.ypos[ic]; + + hits[h].clusterSizeX() = clusParams.xsize[ic]; + hits[h].clusterSizeY() = clusParams.ysize[ic]; + + hits[h].xerrLocal() = clusParams.xerr[ic] * clusParams.xerr[ic] + cpeParams->detParams(me).apeXX; + hits[h].yerrLocal() = clusParams.yerr[ic] * clusParams.yerr[ic] + cpeParams->detParams(me).apeYY; + + // keep it local for computations + float xg, yg, zg; + // to global and compute phi... + cpeParams->detParams(me).frame.toGlobal(xl, yl, xg, yg, zg); + // here correct for the beamspot... + xg -= bs->x; + yg -= bs->y; + zg -= bs->z; + + hits[h].xGlobal() = xg; + hits[h].yGlobal() = yg; + hits[h].zGlobal() = zg; + + hits[h].rGlobal() = alpaka::math::sqrt(acc, xg * xg + yg * yg); + hits[h].iphi() = unsafe_atan2s<7>(yg, xg); + }); + alpaka::syncBlockThreads(acc); + } // end loop on batches + } + }; + + } // namespace pixelRecHits +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#endif // RecoLocalTracker_SiPixelRecHits_plugins_alpaka_PixelRecHits_h diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc new file mode 100644 index 0000000000000..46fd8a6b8c2ca --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/plugins/alpaka/SiPixelRecHitAlpaka.cc @@ -0,0 +1,100 @@ +#include "DataFormats/BeamSpot/interface/BeamSpotPOD.h" +#include "DataFormats/BeamSpot/interface/alpaka/BeamSpotDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/SiPixelClustersDevice.h" +#include "DataFormats/SiPixelClusterSoA/interface/alpaka/SiPixelClustersSoACollection.h" +#include "DataFormats/SiPixelDigiSoA/interface/SiPixelDigisDevice.h" +#include "DataFormats/SiPixelDigiSoA/interface/alpaka/SiPixelDigisSoACollection.h" +#include "DataFormats/TrackingRecHitSoA/interface/TrackingRecHitsDevice.h" +#include "DataFormats/TrackingRecHitSoA/interface/alpaka/TrackingRecHitsSoACollection.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/global/EDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "RecoLocalTracker/Records/interface/PixelCPEFastParamsRecord.h" + +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforDevice.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" + +#include "PixelRecHitKernel.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + template + class SiPixelRecHitAlpaka : public global::EDProducer<> { + public: + explicit SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig); + ~SiPixelRecHitAlpaka() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + + private: + void produce(edm::StreamID streamID, device::Event& iEvent, const device::EventSetup& iSetup) const override; + + const device::ESGetToken, PixelCPEFastParamsRecord> cpeToken_; + const device::EDGetToken tBeamSpot; + const device::EDGetToken tokenClusters_; + const device::EDGetToken tokenDigi_; + const device::EDPutToken> tokenHit_; + + const pixelgpudetails::PixelRecHitKernel Algo_; + }; + + template + SiPixelRecHitAlpaka::SiPixelRecHitAlpaka(const edm::ParameterSet& iConfig) + : cpeToken_(esConsumes(edm::ESInputTag("", iConfig.getParameter("CPE")))), + tBeamSpot(consumes(iConfig.getParameter("beamSpot"))), + tokenClusters_(consumes(iConfig.getParameter("src"))), + tokenDigi_(consumes(iConfig.getParameter("src"))), + tokenHit_(produces()) {} + + template + void SiPixelRecHitAlpaka::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpotDevice")); + desc.add("src", edm::InputTag("siPixelClustersPreSplittingAlpaka")); + + std::string cpe = "PixelCPEFastParams"; + cpe += TrackerTraits::nameModifier; + desc.add("CPE", cpe); + + descriptions.addWithDefaultLabel(desc); + } + + template + void SiPixelRecHitAlpaka::produce(edm::StreamID streamID, + device::Event& iEvent, + const device::EventSetup& es) const { + auto& fcpe = es.getData(cpeToken_); + + auto const& clusters = iEvent.get(tokenClusters_); + + auto const& digis = iEvent.get(tokenDigi_); + + auto const& bs = iEvent.get(tBeamSpot); + + iEvent.emplace(tokenHit_, + Algo_.makeHitsAsync(digis, clusters, bs.data(), fcpe.const_buffer().data(), iEvent.queue())); + } + using SiPixelRecHitAlpakaPhase1 = SiPixelRecHitAlpaka; + using SiPixelRecHitAlpakaPhase2 = SiPixelRecHitAlpaka; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase1); +DEFINE_FWK_ALPAKA_MODULE(SiPixelRecHitAlpakaPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h index 09d0b55030d9c..94ae258cc16fb 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h @@ -19,7 +19,7 @@ namespace gpuPixelRecHits { template __global__ void getHits(pixelCPEforGPU::ParamsOnGPUT const* __restrict__ cpeParams, BeamSpotPOD const* __restrict__ bs, - SiPixelDigisCUDASOAConstView digis, + SiPixelDigisSoA::ConstView digis, int numElements, SiPixelClustersCUDASOAConstView clusters, TrackingRecHitSoAView hits) { diff --git a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py index 686b0afc335c4..52efaece5e4df 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py +++ b/RecoLocalTracker/SiPixelRecHits/python/PixelCPEESProducers_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.alpaka_cff import alpaka # # Load all Pixel Cluster Position Estimator ESProducers @@ -18,3 +19,10 @@ # from CalibTracker.SiPixelESProducers.SiPixelTemplateDBObjectESProducer_cfi import * from CalibTracker.SiPixelESProducers.SiPixel2DTemplateDBObjectESProducer_cfi import * + +def _addProcessCPEsAlpaka(process): + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase1_cfi") + process.load("RecoLocalTracker.SiPixelRecHits.pixelCPEFastParamsESProducerAlpakaPhase2_cfi") + +modifyConfigurationForAlpakaCPEs_ = alpaka.makeProcessModifier(_addProcessCPEsAlpaka) + diff --git a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py index f45b41861995d..e6b2c9832600c 100644 --- a/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py +++ b/RecoLocalTracker/SiPixelRecHits/python/SiPixelRecHits_cfi.py @@ -1,6 +1,7 @@ import FWCore.ParameterSet.Config as cms from HeterogeneousCore.CUDACore.SwitchProducerCUDA import SwitchProducerCUDA from Configuration.ProcessModifiers.gpu_cff import gpu +from Configuration.ProcessModifiers.alpaka_cff import alpaka # legacy pixel rechit producer siPixelRecHits = cms.EDProducer("SiPixelRecHitConverter", @@ -112,9 +113,6 @@ ) ) - -#(gpu & pixelNtupletFit & phase2_tracker).toReplaceWith(siPixelRecHitsPreSplitting , cuda = _siPixelRecHitFromCUDAPhase2.clone()) - (gpu & pixelNtupletFit).toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( # reconstruct the pixel rechits on the gpu or on the cpu # (normally only one of the two is run because only one is consumed from later stages) @@ -125,3 +123,49 @@ # producing and converting on cpu (if needed) siPixelRecHitsPreSplittingSoA )) + +###################################################################### + +### Alpaka Pixel Hits Reco +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase1_cfi import siPixelRecHitAlpakaPhase1 as _siPixelRecHitAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitAlpakaPhase2_cfi import siPixelRecHitAlpakaPhase2 as _siPixelRecHitAlpakaPhase2 + +# Hit SoA producer on the device +siPixelRecHitsPreSplittingAlpaka = _siPixelRecHitAlpakaPhase1.clone( + src = "siPixelClustersPreSplittingAlpaka" +) +phase2_tracker.toReplaceWith(siPixelRecHitsPreSplittingAlpaka,_siPixelRecHitAlpakaPhase2.clone( + src = "siPixelClustersPreSplittingAlpaka" +)) + +# Hit SoA producer on the cpu, for validation +siPixelRecHitsPreSplittingAlpakaSerial = siPixelRecHitsPreSplittingAlpaka.clone( + src = "siPixelClustersPreSplittingAlpakaSerial", + #alpaka = dict( backend = '*' ) + alpaka = None +) +siPixelRecHitsPreSplittingAlpakaSerial._TypedParameterizable__type = 'alpaka_serial_sync' + siPixelRecHitsPreSplittingAlpaka._TypedParameterizable__type.removesuffix('@alpaka') + +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase1_cfi import siPixelRecHitFromSoAAlpakaPhase1 as _siPixelRecHitFromSoAAlpakaPhase1 +from RecoLocalTracker.SiPixelRecHits.siPixelRecHitFromSoAAlpakaPhase2_cfi import siPixelRecHitFromSoAAlpakaPhase2 as _siPixelRecHitFromSoAAlpakaPhase2 + +(alpaka & ~phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase1.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + +(alpaka & phase2_tracker).toModify(siPixelRecHitsPreSplitting, + cpu = _siPixelRecHitFromSoAAlpakaPhase2.clone( + pixelRecHitSrc = cms.InputTag('siPixelRecHitsPreSplittingAlpaka'), + src = cms.InputTag('siPixelClustersPreSplitting')) +) + + +alpaka.toReplaceWith(siPixelRecHitsPreSplittingTask, cms.Task( + # Reconstruct the pixel hits with alpaka on the device + siPixelRecHitsPreSplittingAlpaka, + # Reconstruct the pixel hits with alpaka on the cpu (if requested by the validation) + siPixelRecHitsPreSplittingAlpakaSerial, + # Convert hit soa on host to legacy formats + siPixelRecHitsPreSplitting)) diff --git a/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..804f817bdb6e0 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/ES_PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsHostPhase1 = PixelCPEFastParamsHost; +using PixelCPEFastParamsHostPhase2 = PixelCPEFastParamsHost; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsHostPhase2); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc new file mode 100644 index 0000000000000..d98c84e5860f4 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParams.cc @@ -0,0 +1,9 @@ +#include "FWCore/Utilities/interface/typelookup.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsDevice.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" + +using PixelCPEFastParamsPhase1 = PixelCPEFastParamsDevice; +using PixelCPEFastParamsPhase2 = PixelCPEFastParamsDevice; + +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_DATA_REG(PixelCPEFastParamsPhase2); \ No newline at end of file diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc new file mode 100644 index 0000000000000..36c127259a383 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFastParamsHost.cc @@ -0,0 +1,482 @@ +#include + +#include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" +#include "DataFormats/GeometrySurface/interface/SOARotation.h" +#include "DataFormats/SiPixelClusterSoA/interface/ClusteringConstants.h" +#include "DataFormats/TrackingRecHitSoA/interface/SiPixelHitStatus.h" +#include "Geometry/CommonTopologies/interface/SimplePixelTopology.h" +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFastParamsHost.h" + +//----------------------------------------------------------------------------- +//! The constructor. +//----------------------------------------------------------------------------- +template +PixelCPEFastParamsHost::PixelCPEFastParamsHost(edm::ParameterSet const& conf, + const MagneticField* mag, + const TrackerGeometry& geom, + const TrackerTopology& ttopo, + const SiPixelLorentzAngle* lorentzAngle, + const SiPixelGenErrorDBObject* genErrorDBObject, + const SiPixelLorentzAngle* lorentzAngleWidth) + : PixelCPEGenericBase(conf, mag, geom, ttopo, lorentzAngle, genErrorDBObject, lorentzAngleWidth), + buffer_(cms::alpakatools::make_host_buffer>()) { + // Use errors from templates or from GenError + if (useErrorsFromTemplates_) { + if (!SiPixelGenError::pushfile(*genErrorDBObject_, this->thePixelGenError_)) + throw cms::Exception("InvalidCalibrationLoaded") + << "ERROR: GenErrors not filled correctly. Check the sqlite file. Using SiPixelTemplateDBObject version " + << (*genErrorDBObject_).version(); + } + + fillParamsForDevice(); +} + +template +void PixelCPEFastParamsHost::fillParamsForDevice() { + // this code executes only once per job, computation inefficiency is not an issue + // many code blocks are repeated: better keep the computation local and self consistent as blocks may in future move around, be deleted ... + // It is valid only for Phase1 and the version of GenError in DB used in late 2018 and in 2021 + + buffer_->commonParams().theThicknessB = m_DetParams.front().theThickness; + buffer_->commonParams().theThicknessE = m_DetParams.back().theThickness; + buffer_->commonParams().thePitchX = m_DetParams[0].thePitchX; + buffer_->commonParams().thePitchY = m_DetParams[0].thePitchY; + + buffer_->commonParams().numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + + LogDebug("PixelCPEFastParamsHost") << "pitch & thickness " << buffer_->commonParams().thePitchX << ' ' + << buffer_->commonParams().thePitchY << " " + << buffer_->commonParams().theThicknessB << ' ' + << buffer_->commonParams().theThicknessE; + + // zero average geometry + memset(&buffer_->averageGeometry(), 0, sizeof(pixelTopology::AverageGeometryT)); + + uint32_t oldLayer = 0; + uint32_t oldLadder = 0; + float rl = 0; + float zl = 0; + float miz = 500, mxz = 0; + float pl = 0; + int nl = 0; + + assert(m_DetParams.size() <= TrackerTraits::numberOfModules); + for (auto i = 0U; i < m_DetParams.size(); ++i) { + auto& p = m_DetParams[i]; + auto& g = buffer_->detParams(i); + + g.nRowsRoc = p.theDet->specificTopology().rowsperroc(); + g.nColsRoc = p.theDet->specificTopology().colsperroc(); + g.nRows = p.theDet->specificTopology().rocsX() * g.nRowsRoc; + g.nCols = p.theDet->specificTopology().rocsY() * g.nColsRoc; + + g.numPixsInModule = g.nRows * g.nCols; + + assert(p.theDet->index() == int(i)); + assert(buffer_->commonParams().thePitchY == p.thePitchY); + assert(buffer_->commonParams().thePitchX == p.thePitchX); + + g.isBarrel = GeomDetEnumerators::isBarrel(p.thePart); + g.isPosZ = p.theDet->surface().position().z() > 0; + g.layer = ttopo_.layer(p.theDet->geographicalId()); + g.index = i; // better be! + g.rawId = p.theDet->geographicalId(); + auto thickness = g.isBarrel ? buffer_->commonParams().theThicknessB : buffer_->commonParams().theThicknessE; + assert(thickness == p.theThickness); + + auto ladder = ttopo_.pxbLadder(p.theDet->geographicalId()); + if (oldLayer != g.layer) { + oldLayer = g.layer; + LogDebug("PixelCPEFastParamsHost") << "new layer at " << i << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) + << g.layer << " starting at " << g.rawId << '\n' + << "old layer had " << nl << " ladders"; + nl = 0; + } + if (oldLadder != ladder) { + oldLadder = ladder; + LogDebug("PixelCPEFastParamsHost") << "new ladder at " << i + << (g.isBarrel ? " B " : (g.isPosZ ? " E+ " : " E- ")) << ladder + << " starting at " << g.rawId << '\n' + << "old ladder ave z,r,p mz " << zl / 8.f << " " << rl / 8.f << " " << pl / 8.f + << ' ' << miz << ' ' << mxz; + rl = 0; + zl = 0; + pl = 0; + miz = 500; + mxz = 0; + nl++; + } + + g.shiftX = 0.5f * p.lorentzShiftInCmX; + g.shiftY = 0.5f * p.lorentzShiftInCmY; + g.chargeWidthX = p.lorentzShiftInCmX * p.widthLAFractionX; + g.chargeWidthY = p.lorentzShiftInCmY * p.widthLAFractionY; + + g.x0 = p.theOrigin.x(); + g.y0 = p.theOrigin.y(); + g.z0 = p.theOrigin.z(); + + auto vv = p.theDet->surface().position(); + auto rr = pixelCPEforDevice::Rotation(p.theDet->surface().rotation()); + g.frame = pixelCPEforDevice::Frame(vv.x(), vv.y(), vv.z(), rr); + + zl += vv.z(); + miz = std::min(miz, std::abs(vv.z())); + mxz = std::max(mxz, std::abs(vv.z())); + rl += vv.perp(); + pl += vv.phi(); // (not obvious) + + // errors ..... + ClusterParamGeneric cp; + + cp.with_track_angle = false; + + auto lape = p.theDet->localAlignmentError(); + if (lape.invalid()) + lape = LocalError(); // zero.... + + g.apeXX = lape.xx(); + g.apeYY = lape.yy(); + + auto toMicron = [&](float x) { return std::min(511, int(x * 1.e4f + 0.5f)); }; + + // average angle + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchX; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + //--- Note that the normalization is not required as only the ratio used + + { + // calculate angles (fed into errorFromTemplates) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + + errorFromTemplates(p, cp, 20000.); + } + +#ifdef EDM_ML_DEBUG + auto m = 10000.f; + for (float qclus = 15000; qclus < 35000; qclus += 15000) { + errorFromTemplates(p, cp, qclus); + LogDebug("PixelCPEFastParamsHost") << i << ' ' << qclus << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' + << m * cp.sx1 << ' ' << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 + << ' ' << m * cp.sy2; + } + LogDebug("PixelCPEFastParamsHost") << i << ' ' << m * std::sqrt(lape.xx()) << ' ' << m * std::sqrt(lape.yy()); +#endif // EDM_ML_DEBUG + + g.pixmx = std::max(0, cp.pixmx); + g.sx2 = toMicron(cp.sx2); + g.sy1 = std::max(21, toMicron(cp.sy1)); // for some angles sy1 is very small + g.sy2 = std::max(55, toMicron(cp.sy2)); // sometimes sy2 is smaller than others (due to angle?) + + //sample xerr as function of position + // moduleOffsetX is the definition of TrackerTraits::xOffset, + // needs to be calculated because for Phase2 the modules are not uniform + float moduleOffsetX = -(0.5f * float(g.nRows) + TrackerTraits::bigPixXCorrection); + auto const xoff = moduleOffsetX * buffer_->commonParams().thePitchX; + + for (int ix = 0; ix < pixelCPEforDevice::kNumErrorBins; ++ix) { + auto x = xoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() - x; + auto gvy = p.theOrigin.y(); + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + g.sigmax[ix] = toMicron(cp.sigmax); + g.sigmax1[ix] = toMicron(cp.sx1); + LogDebug("PixelCPEFastParamsHost") << "sigmax vs x " << i << ' ' << x << ' ' << cp.cotalpha << ' ' + << int(g.sigmax[ix]) << ' ' << int(g.sigmax1[ix]) << ' ' << 10000.f * cp.sigmay + << std::endl; + } +#ifdef EDM_ML_DEBUG + // sample yerr as function of position + // moduleOffsetY is the definition of TrackerTraits::yOffset (removed) + float moduleOffsetY = 0.5f * float(g.nCols) + TrackerTraits::bigPixYCorrection; + auto const yoff = -moduleOffsetY * buffer_->commonParams().thePitchY; + + for (int ix = 0; ix < pixelCPEforDevice::kNumErrorBins; ++ix) { + auto y = yoff * (1.f - (0.5f + float(ix)) / 8.f); + auto gvx = p.theOrigin.x() + 40.f * buffer_->commonParams().thePitchY; + auto gvy = p.theOrigin.y() - y; + auto gvz = 1.f / p.theOrigin.z(); + cp.cotbeta = gvy * gvz; + cp.cotalpha = gvx * gvz; + errorFromTemplates(p, cp, 20000.f); + LogDebug("PixelCPEFastParamsHost") << "sigmay vs y " << i << ' ' << y << ' ' << cp.cotbeta << ' ' + << 10000.f * cp.sigmay << std::endl; + } +#endif // EDM_ML_DEBUG + + // calculate angles (repeated) + cp.cotalpha = gvx * gvz; + cp.cotbeta = gvy * gvz; + auto aveCB = cp.cotbeta; + + // sample x by charge + int qbin = pixelCPEforDevice::kGenErrorQBins; // low charge + int k = 0; + for (int qclus = 1000; qclus < 200000; qclus += 1000) { + errorFromTemplates(p, cp, qclus); + if (cp.qBin_ == qbin) + continue; + qbin = cp.qBin_; + g.xfact[k] = cp.sigmax; + g.yfact[k] = cp.sigmay; + g.minCh[k++] = qclus; +#ifdef EDM_ML_DEBUG + LogDebug("PixelCPEFastParamsHost") << i << ' ' << g.rawId << ' ' << cp.cotalpha << ' ' << qclus << ' ' << cp.qBin_ + << ' ' << cp.pixmx << ' ' << m * cp.sigmax << ' ' << m * cp.sx1 << ' ' + << m * cp.sx2 << ' ' << m * cp.sigmay << ' ' << m * cp.sy1 << ' ' << m * cp.sy2 + << std::endl; +#endif // EDM_ML_DEBUG + } + + assert(k <= pixelCPEforDevice::kGenErrorQBins); + + // fill the rest (sometimes bin 4 is missing) + for (int kk = k; kk < pixelCPEforDevice::kGenErrorQBins; ++kk) { + g.xfact[kk] = g.xfact[k - 1]; + g.yfact[kk] = g.yfact[k - 1]; + g.minCh[kk] = g.minCh[k - 1]; + } + auto detx = 1.f / g.xfact[0]; + auto dety = 1.f / g.yfact[0]; + for (int kk = 0; kk < pixelCPEforDevice::kGenErrorQBins; ++kk) { + g.xfact[kk] *= detx; + g.yfact[kk] *= dety; + } + // sample y in "angle" (estimated from cluster size) + float ys = 8.f - 4.f; // apperent bias of half pixel (see plot) + // plot: https://indico.cern.ch/event/934821/contributions/3974619/attachments/2091853/3515041/DigilessReco.pdf page 25 + // sample yerr as function of "size" + for (int iy = 0; iy < pixelCPEforDevice::kNumErrorBins; ++iy) { + ys += 1.f; // first bin 0 is for size 9 (and size is in fixed point 2^3) + if (pixelCPEforDevice::kNumErrorBins - 1 == iy) + ys += 8.f; // last bin for "overflow" + // cp.cotalpha = ys*(buffer_->commonParams().thePitchX/(8.f*thickness)); // use this to print sampling in "x" (and comment the line below) + cp.cotbeta = std::copysign(ys * (buffer_->commonParams().thePitchY / (8.f * thickness)), aveCB); + errorFromTemplates(p, cp, 20000.f); + g.sigmay[iy] = toMicron(cp.sigmay); + LogDebug("PixelCPEFastParamsHost") << "sigmax/sigmay " << i << ' ' << (ys + 4.f) / 8.f << ' ' << cp.cotalpha + << '/' << cp.cotbeta << ' ' << 10000.f * cp.sigmax << '/' << int(g.sigmay[iy]) + << std::endl; + } + } // loop over det + + constexpr int numberOfModulesInLadder = TrackerTraits::numberOfModulesInLadder; + constexpr int numberOfLaddersInBarrel = TrackerTraits::numberOfLaddersInBarrel; + constexpr int numberOfModulesInBarrel = TrackerTraits::numberOfModulesInBarrel; + + constexpr float ladderFactor = 1.f / float(numberOfModulesInLadder); + + constexpr int firstEndcapPos = TrackerTraits::firstEndcapPos; + constexpr int firstEndcapNeg = TrackerTraits::firstEndcapNeg; + + // compute ladder baricenter (only in global z) for the barrel + // + auto& aveGeom = buffer_->averageGeometry(); + int il = 0; + for (int im = 0, nm = numberOfModulesInBarrel; im < nm; ++im) { + auto const& g = buffer_->detParams(im); + il = im / numberOfModulesInLadder; + assert(il < int(numberOfLaddersInBarrel)); + auto z = g.frame.z(); + aveGeom.ladderZ[il] += ladderFactor * z; + aveGeom.ladderMinZ[il] = std::min(aveGeom.ladderMinZ[il], z); + aveGeom.ladderMaxZ[il] = std::max(aveGeom.ladderMaxZ[il], z); + aveGeom.ladderX[il] += ladderFactor * g.frame.x(); + aveGeom.ladderY[il] += ladderFactor * g.frame.y(); + aveGeom.ladderR[il] += ladderFactor * sqrt(g.frame.x() * g.frame.x() + g.frame.y() * g.frame.y()); + } + assert(il + 1 == int(numberOfLaddersInBarrel)); + // add half_module and tollerance + constexpr float moduleLength = TrackerTraits::moduleLength; + constexpr float module_tolerance = 0.2f; + for (int il = 0, nl = numberOfLaddersInBarrel; il < nl; ++il) { + aveGeom.ladderMinZ[il] -= (0.5f * moduleLength - module_tolerance); + aveGeom.ladderMaxZ[il] += (0.5f * moduleLength - module_tolerance); + } + + // compute "max z" for first layer in endcap (should we restrict to the outermost ring?) + for (auto im = TrackerTraits::layerStart[firstEndcapPos]; im < TrackerTraits::layerStart[firstEndcapPos + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[0] = std::max(aveGeom.endCapZ[0], g.frame.z()); + } + for (auto im = TrackerTraits::layerStart[firstEndcapNeg]; im < TrackerTraits::layerStart[firstEndcapNeg + 1]; ++im) { + auto const& g = buffer_->detParams(im); + aveGeom.endCapZ[1] = std::min(aveGeom.endCapZ[1], g.frame.z()); + } + // correct for outer ring being closer + aveGeom.endCapZ[0] -= TrackerTraits::endcapCorrection; + aveGeom.endCapZ[1] += TrackerTraits::endcapCorrection; +#ifdef EDM_ML_DEBUG + for (int jl = 0, nl = numberOfLaddersInBarrel; jl < nl; ++jl) { + LogDebug("PixelCPEFastParamsHost") << jl << ':' << aveGeom.ladderR[jl] << '/' + << std::sqrt(aveGeom.ladderX[jl] * aveGeom.ladderX[jl] + + aveGeom.ladderY[jl] * aveGeom.ladderY[jl]) + << ',' << aveGeom.ladderZ[jl] << ',' << aveGeom.ladderMinZ[jl] << ',' + << aveGeom.ladderMaxZ[jl] << '\n'; + } + LogDebug("PixelCPEFastParamsHost") << aveGeom.endCapZ[0] << ' ' << aveGeom.endCapZ[1]; +#endif // EDM_ML_DEBUG + + // fill Layer and ladders geometry + memset(&buffer_->layerGeometry(), 0, sizeof(pixelCPEforDevice::LayerGeometryT)); + memcpy(buffer_->layerGeometry().layerStart, + TrackerTraits::layerStart, + sizeof(pixelCPEforDevice::LayerGeometryT::layerStart)); + memcpy(buffer_->layerGeometry().layer, + pixelTopology::layer.data(), + pixelTopology::layer.size()); + buffer_->layerGeometry().maxModuleStride = pixelTopology::maxModuleStride; +} + +template +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + float locBz = theDetParam.bz; + float locBx = theDetParam.bx; + LogDebug("PixelCPEFastParamsHost") << "PixelCPEFastParamsHost::localPosition(...) : locBz = " << locBz; + + theClusterParam.pixmx = std::numeric_limits::max(); // max pixel charge for truncation of 2-D cluster + + theClusterParam.sigmay = -999.9; // CPE Generic y-error for multi-pixel cluster + theClusterParam.sigmax = -999.9; // CPE Generic x-error for multi-pixel cluster + theClusterParam.sy1 = -999.9; // CPE Generic y-error for single single-pixel + theClusterParam.sy2 = -999.9; // CPE Generic y-error for single double-pixel cluster + theClusterParam.sx1 = -999.9; // CPE Generic x-error for single single-pixel cluster + theClusterParam.sx2 = -999.9; // CPE Generic x-error for single double-pixel cluster + + float dummy; + + SiPixelGenError gtempl(this->thePixelGenError_); + int gtemplID = theDetParam.detTemplateId; + + theClusterParam.qBin_ = gtempl.qbin(gtemplID, + theClusterParam.cotalpha, + theClusterParam.cotbeta, + locBz, + locBx, + qclus, + false, + theClusterParam.pixmx, + theClusterParam.sigmay, + dummy, + theClusterParam.sigmax, + dummy, + theClusterParam.sy1, + dummy, + theClusterParam.sy2, + dummy, + theClusterParam.sx1, + dummy, + theClusterParam.sx2, + dummy); + + theClusterParam.sigmax = theClusterParam.sigmax * pixelCPEforDevice::micronsToCm; + theClusterParam.sx1 = theClusterParam.sx1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sx2 = theClusterParam.sx2 * pixelCPEforDevice::micronsToCm; + + theClusterParam.sigmay = theClusterParam.sigmay * pixelCPEforDevice::micronsToCm; + theClusterParam.sy1 = theClusterParam.sy1 * pixelCPEforDevice::micronsToCm; + theClusterParam.sy2 = theClusterParam.sy2 * pixelCPEforDevice::micronsToCm; +} + +template <> +void PixelCPEFastParamsHost::errorFromTemplates(DetParam const& theDetParam, + ClusterParamGeneric& theClusterParam, + float qclus) const { + theClusterParam.qBin_ = 0.0f; +} + +//----------------------------------------------------------------------------- +//! Hit position in the local frame (in cm). Unlike other CPE's, this +//! one converts everything from the measurement frame (in channel numbers) +//! into the local frame (in centimeters). +//----------------------------------------------------------------------------- +template +LocalPoint PixelCPEFastParamsHost::localPosition(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + if (useErrorsFromTemplates_) { + errorFromTemplates(theDetParam, theClusterParam, theClusterParam.theCluster->charge()); + } else { + theClusterParam.qBin_ = 0; + } + + int q_f_X; //!< Q of the first pixel in X + int q_l_X; //!< Q of the last pixel in X + int q_f_Y; //!< Q of the first pixel in Y + int q_l_Y; //!< Q of the last pixel in Y + collect_edge_charges(theClusterParam, q_f_X, q_l_X, q_f_Y, q_l_Y, useErrorsFromTemplates_ && truncatePixelCharge_); + + // do GPU like ... + pixelCPEforDevice::ClusParams cp; + + cp.minRow[0] = theClusterParam.theCluster->minPixelRow(); + cp.maxRow[0] = theClusterParam.theCluster->maxPixelRow(); + cp.minCol[0] = theClusterParam.theCluster->minPixelCol(); + cp.maxCol[0] = theClusterParam.theCluster->maxPixelCol(); + + cp.q_f_X[0] = q_f_X; + cp.q_l_X[0] = q_l_X; + cp.q_f_Y[0] = q_f_Y; + cp.q_l_Y[0] = q_l_Y; + + cp.charge[0] = theClusterParam.theCluster->charge(); + + auto ind = theDetParam.theDet->index(); + pixelCPEforDevice::position(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + auto xPos = cp.xpos[0]; + auto yPos = cp.ypos[0]; + + // set the error (mind ape....) + pixelCPEforDevice::errorFromDB(buffer_->commonParams(), buffer_->detParams(ind), cp, 0); + theClusterParam.sigmax = cp.xerr[0]; + theClusterParam.sigmay = cp.yerr[0]; + + LogDebug("PixelCPEFastParamsHost") << " in PixelCPEFastParamsHost:localPosition - pos = " << xPos << " " << yPos + << " size " << cp.maxRow[0] - cp.minRow[0] << ' ' << cp.maxCol[0] - cp.minCol[0]; + + //--- Now put the two together + LocalPoint pos_in_local(xPos, yPos); + return pos_in_local; +} + +//============== INFLATED ERROR AND ERRORS FROM DB BELOW ================ + +//------------------------------------------------------------------------- +// Hit error in the local frame +//------------------------------------------------------------------------- +template +LocalError PixelCPEFastParamsHost::localError(DetParam const& theDetParam, + ClusterParam& theClusterParamBase) const { + ClusterParamGeneric& theClusterParam = static_cast(theClusterParamBase); + + auto xerr = theClusterParam.sigmax; + auto yerr = theClusterParam.sigmay; + + LogDebug("PixelCPEFastParamsHost") << " errors " << xerr << " " << yerr; + + auto xerr_sq = xerr * xerr; + auto yerr_sq = yerr * yerr; + + return LocalError(xerr_sq, 0, yerr_sq); +} + +template +void PixelCPEFastParamsHost::fillPSetDescription(edm::ParameterSetDescription& desc) { + // call PixelCPEGenericBase fillPSetDescription to add common rechit errors + PixelCPEGenericBase::fillPSetDescription(desc); +} + +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; +template class PixelCPEFastParamsHost; diff --git a/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc new file mode 100644 index 0000000000000..3b4a2f74a8869 --- /dev/null +++ b/RecoLocalTracker/SiPixelRecHits/src/alpaka/ES_PixelCPEFastParams.cc @@ -0,0 +1,5 @@ +#include "RecoLocalTracker/SiPixelRecHits/interface/alpaka/PixelCPEFastParamsCollection.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase1); +TYPELOOKUP_ALPAKA_DATA_REG(PixelCPEFastParamsPhase2);