diff --git a/CUDADataFormats/EcalDigi/BuildFile.xml b/CUDADataFormats/EcalDigi/BuildFile.xml new file mode 100644 index 0000000000000..b7d25b0872646 --- /dev/null +++ b/CUDADataFormats/EcalDigi/BuildFile.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/CUDADataFormats/EcalDigi/interface/DigisCollection.h b/CUDADataFormats/EcalDigi/interface/DigisCollection.h new file mode 100644 index 0000000000000..f471dbfb9fac8 --- /dev/null +++ b/CUDADataFormats/EcalDigi/interface/DigisCollection.h @@ -0,0 +1,24 @@ +#ifndef CUDADataFormats_EcalDigi_interface_DigisCollection_h +#define CUDADataFormats_EcalDigi_interface_DigisCollection_h + +#include "CUDADataFormats/CaloCommon/interface/Common.h" + +namespace ecal { + + template + struct DigisCollection : public ::calo::common::AddSize { + DigisCollection() = default; + DigisCollection(DigisCollection const &) = default; + DigisCollection &operator=(DigisCollection const &) = default; + + DigisCollection(DigisCollection &&) = default; + DigisCollection &operator=(DigisCollection &&) = default; + + // stride is statically known + typename StoragePolicy::template StorageSelector::type ids; + typename StoragePolicy::template StorageSelector::type data; + }; + +} // namespace ecal + +#endif // CUDADataFormats_EcalDigi_interface_DigisCollection_h diff --git a/CUDADataFormats/EcalDigi/src/classes.h b/CUDADataFormats/EcalDigi/src/classes.h new file mode 100644 index 0000000000000..cd60b775e229b --- /dev/null +++ b/CUDADataFormats/EcalDigi/src/classes.h @@ -0,0 +1,3 @@ +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h" +#include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/EcalDigi/src/classes_def.xml b/CUDADataFormats/EcalDigi/src/classes_def.xml new file mode 100644 index 0000000000000..6a3adfe4b41c5 --- /dev/null +++ b/CUDADataFormats/EcalDigi/src/classes_def.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/CUDADataFormats/EcalRecHitSoA/BuildFile.xml b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml new file mode 100644 index 0000000000000..a684d9a23f1c6 --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h b/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h new file mode 100644 index 0000000000000..731b8b801407f --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h @@ -0,0 +1,45 @@ +#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h +#define CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h + +#include +#include + +#include "CUDADataFormats/CaloCommon/interface/Common.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" + +namespace ecal { + + template + struct RecHit : public ::calo::common::AddSize { + RecHit() = default; + RecHit(const RecHit&) = default; + RecHit& operator=(const RecHit&) = default; + + RecHit(RecHit&&) = default; + RecHit& operator=(RecHit&&) = default; + + typename StoragePolicy::template StorageSelector::type energy; + typename StoragePolicy::template StorageSelector::type time; + // should we remove the following, since already included in "extra" ? + typename StoragePolicy::template StorageSelector::type chi2; + typename StoragePolicy::template StorageSelector::type + extra; // packed uint32_t for timeError, chi2, energyError + typename StoragePolicy::template StorageSelector::type + flagBits; // store rechit condition (see Flags enum) in a bit-wise way + typename StoragePolicy::template StorageSelector::type did; + + template + typename std::enable_if::value, void>::type resize(size_t size) { + energy.resize(size); + time.resize(size); + chi2.resize(size); + extra.resize(size); + flagBits.resize(size); + did.resize(size); + } + }; + +} // namespace ecal + +#endif // CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h diff --git a/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h new file mode 100644 index 0000000000000..78c909b029dc1 --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h @@ -0,0 +1,46 @@ +#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h +#define CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h + +#include +#include + +#include "CUDADataFormats/CaloCommon/interface/Common.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" + +namespace ecal { + + template + struct UncalibratedRecHit : public ::calo::common::AddSize { + UncalibratedRecHit() = default; + UncalibratedRecHit(const UncalibratedRecHit&) = default; + UncalibratedRecHit& operator=(const UncalibratedRecHit&) = default; + + UncalibratedRecHit(UncalibratedRecHit&&) = default; + UncalibratedRecHit& operator=(UncalibratedRecHit&&) = default; + + typename StoragePolicy::template StorageSelector::type amplitudesAll; + typename StoragePolicy::template StorageSelector::type amplitude; + typename StoragePolicy::template StorageSelector::type chi2; + typename StoragePolicy::template StorageSelector::type pedestal; + typename StoragePolicy::template StorageSelector::type jitter; + typename StoragePolicy::template StorageSelector::type jitterError; + typename StoragePolicy::template StorageSelector::type did; + typename StoragePolicy::template StorageSelector::type flags; + + template + typename std::enable_if::value, void>::type resize(size_t size) { + amplitudesAll.resize(size * EcalDataFrame::MAXSAMPLES); + amplitude.resize(size); + pedestal.resize(size); + chi2.resize(size); + did.resize(size); + flags.resize(size); + jitter.resize(size); + jitterError.resize(size); + } + }; + +} // namespace ecal + +#endif // CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h diff --git a/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h new file mode 100644 index 0000000000000..87c4252a5e949 --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h @@ -0,0 +1,13 @@ +#ifndef CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h +#define CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h + +namespace ecal { + namespace reco { + + using ComputationScalarType = float; + using StorageScalarType = float; + + } // namespace reco +} // namespace ecal + +#endif // CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes.h b/CUDADataFormats/EcalRecHitSoA/src/classes.h new file mode 100644 index 0000000000000..ef95da461e3ba --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/src/classes.h @@ -0,0 +1,4 @@ +#include "CUDADataFormats/Common/interface/Product.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml new file mode 100644 index 0000000000000..27e315b2c2822 --- /dev/null +++ b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py index dd3bf675faf0d..102e8b1132f71 100644 --- a/Configuration/StandardSequences/python/RawToDigi_cff.py +++ b/Configuration/StandardSequences/python/RawToDigi_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu # This object is used to selectively make changes for different running # scenarios. In this case it makes changes for Run 2. @@ -74,7 +75,7 @@ scalersRawToDigi.scalersInputTag = 'rawDataCollector' siPixelDigis.cpu.InputLabel = 'rawDataCollector' -ecalDigis.InputLabel = 'rawDataCollector' +(~gpu).toModify(ecalDigis, InputLabel='rawDataCollector') ecalPreshowerDigis.sourceTag = 'rawDataCollector' hcalDigis.InputLabel = 'rawDataCollector' muonCSCDigis.InputObjects = 'rawDataCollector' diff --git a/EventFilter/EcalRawToDigi/BuildFile.xml b/EventFilter/EcalRawToDigi/BuildFile.xml index 88f864737813e..2ac1b25233910 100644 --- a/EventFilter/EcalRawToDigi/BuildFile.xml +++ b/EventFilter/EcalRawToDigi/BuildFile.xml @@ -1,14 +1,18 @@ - - + + + + + + diff --git a/EventFilter/EcalRawToDigi/bin/BuildFile.xml b/EventFilter/EcalRawToDigi/bin/BuildFile.xml new file mode 100644 index 0000000000000..792fe438d8799 --- /dev/null +++ b/EventFilter/EcalRawToDigi/bin/BuildFile.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp b/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp new file mode 100644 index 0000000000000..609c277e19288 --- /dev/null +++ b/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "run with: ./ \n"; + exit(0); + } + + // branches to use + edm::Wrapper*wgpuEB = nullptr, *wcpuEB = nullptr; + edm::Wrapper*wgpuEE = nullptr, *wcpuEE = nullptr; + + std::string inFileName{argv[1]}; + std::string outFileName{argv[2]}; + + // prep output + TFile rfout{outFileName.c_str(), "recreate"}; + + int const nbins = 400; + float const last = 4096.; + auto hADCEBGPU = new TH1D("hADCEBGPU", "hADCEBGPU", nbins, 0, last); + auto hADCEBCPU = new TH1D("hADCEBCPU", "hADCEBCPU", nbins, 0, last); + auto hADCEEGPU = new TH1D("hADCEEGPU", "hADCEEGPU", nbins, 0, last); + auto hADCEECPU = new TH1D("hADCEECPU", "hADCEECPU", nbins, 0, last); + + auto hGainEBGPU = new TH1D("hGainEBGPU", "hGainEBGPU", 4, 0, 4); + auto hGainEBCPU = new TH1D("hGainEBCPU", "hGainEBCPU", 4, 0, 4); + auto hGainEEGPU = new TH1D("hGainEEGPU", "hGainEEGPU", 4, 0, 4); + auto hGainEECPU = new TH1D("hGainEECPU", "hGainEECPU", 4, 0, 4); + + auto hADCEBGPUvsCPU = new TH2D("hADCEBGPUvsCPU", "hADCEBGPUvsCPU", nbins, 0, last, nbins, 0, last); + auto hADCEEGPUvsCPU = new TH2D("hADCEEGPUvsCPU", "hADCEEGPUvsCPU", nbins, 0, last, nbins, 0, last); + auto hGainEBGPUvsCPU = new TH2D("hGainEBGPUvsCPU", "hGainEBGPUvsCPU", 4, 0, 4, 4, 0, 4); + auto hGainEEGPUvsCPU = new TH2D("hGainEEGPUvsCPU", "hGainEEGPUvsCPU", 4, 0, 4, 4, 0, 4); + + // prep input + TFile rfin{inFileName.c_str()}; + TTree* rt = (TTree*)rfin.Get("Events"); + rt->SetBranchAddress("EBDigiCollection_ecalCPUDigisProducer_ebDigis_RECO.", &wgpuEB); + rt->SetBranchAddress("EEDigiCollection_ecalCPUDigisProducer_eeDigis_RECO.", &wgpuEE); + rt->SetBranchAddress("EBDigiCollection_ecalDigis_ebDigis_RECO.", &wcpuEB); + rt->SetBranchAddress("EEDigiCollection_ecalDigis_eeDigis_RECO.", &wcpuEE); + + // accumulate + auto const nentries = rt->GetEntries(); + std::cout << ">>> nentries = " << nentries << std::endl; + for (int ie = 0; ie < nentries; ++ie) { + rt->GetEntry(ie); + + auto const ngpuebs = wgpuEB->bareProduct().size(); + auto const ncpuebs = wcpuEB->bareProduct().size(); + auto const ngpuees = wgpuEE->bareProduct().size(); + auto const ncpuees = wcpuEE->bareProduct().size(); + + if (ngpuebs != ncpuebs or ngpuees != ncpuees) { + std::cerr << "*** mismatch in ndigis: " + << "ie = " << ie << " ngpuebs = " << ngpuebs << " ncpuebs = " << ncpuebs << " ngpuees = " << ngpuees + << " ncpuees = " << ncpuees << std::endl; + + // this is a must for now + //assert(ngpuebs==ncpuebs); + //assert(ngpuees==ncpuees); + } + + // assume identical sizes + auto const& idsgpuEB = wgpuEB->bareProduct().ids(); + auto const& datagpuEB = wgpuEB->bareProduct().data(); + auto const& idscpuEB = wcpuEB->bareProduct().ids(); + auto const& datacpuEB = wcpuEB->bareProduct().data(); + for (uint32_t ieb = 0; ieb < ngpuebs; ++ieb) { + auto const& idgpu = idsgpuEB[ieb]; + auto iter2idcpu = std::find(idscpuEB.begin(), idscpuEB.end(), idgpu); + // FIXME + assert(idgpu == *iter2idcpu); + + auto const ptrdiff = iter2idcpu - idscpuEB.begin(); + for (uint32_t s = 0u; s < 10u; s++) { + EcalMGPASample sampleGPU{datagpuEB[ieb * 10 + s]}; + EcalMGPASample sampleCPU{datacpuEB[ptrdiff * 10 + s]}; + + hADCEBGPU->Fill(sampleGPU.adc()); + hGainEBGPU->Fill(sampleGPU.gainId()); + hADCEBCPU->Fill(sampleCPU.adc()); + hGainEBCPU->Fill(sampleCPU.gainId()); + hADCEBGPUvsCPU->Fill(sampleCPU.adc(), sampleGPU.adc()); + hGainEBGPUvsCPU->Fill(sampleCPU.gainId(), sampleGPU.gainId()); + } + } + + auto const& idsgpuEE = wgpuEE->bareProduct().ids(); + auto const& datagpuEE = wgpuEE->bareProduct().data(); + auto const& idscpuEE = wcpuEE->bareProduct().ids(); + auto const& datacpuEE = wcpuEE->bareProduct().data(); + for (uint32_t iee = 0; iee < ngpuees; ++iee) { + auto const& idgpu = idsgpuEE[iee]; + auto iter2idcpu = std::find(idscpuEE.begin(), idscpuEE.end(), idgpu); + // FIXME + assert(idgpu == *iter2idcpu); + + // get the digis + auto const ptrdiff = iter2idcpu - idscpuEE.begin(); + for (uint32_t s = 0u; s < 10u; s++) { + EcalMGPASample sampleGPU{datagpuEE[iee * 10 + s]}; + EcalMGPASample sampleCPU{datacpuEE[ptrdiff * 10 + s]}; + + hADCEEGPU->Fill(sampleGPU.adc()); + hGainEEGPU->Fill(sampleGPU.gainId()); + hADCEECPU->Fill(sampleCPU.adc()); + hGainEECPU->Fill(sampleCPU.gainId()); + hADCEEGPUvsCPU->Fill(sampleCPU.adc(), sampleGPU.adc()); + hGainEEGPUvsCPU->Fill(sampleCPU.gainId(), sampleGPU.gainId()); + } + } + } + + { + TCanvas c{"plots", "plots", 4200, 6200}; + c.Divide(2, 4); + c.cd(1); + { + gPad->SetLogy(); + hADCEBCPU->SetLineColor(kBlack); + hADCEBCPU->SetLineWidth(1.); + hADCEBCPU->Draw(""); + hADCEBGPU->SetLineColor(kBlue); + hADCEBGPU->SetLineWidth(1.); + hADCEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats*)hADCEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + c.cd(2); + { + gPad->SetLogy(); + hADCEECPU->SetLineColor(kBlack); + hADCEECPU->SetLineWidth(1.); + hADCEECPU->Draw(""); + hADCEEGPU->SetLineColor(kBlue); + hADCEEGPU->SetLineWidth(1.); + hADCEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats*)hADCEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + c.cd(3); + { + gPad->SetLogy(); + hGainEBCPU->SetLineColor(kBlack); + hGainEBCPU->SetLineWidth(1.); + hGainEBCPU->Draw(""); + hGainEBGPU->SetLineColor(kBlue); + hGainEBGPU->SetLineWidth(1.); + hGainEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats*)hGainEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + c.cd(4); + { + gPad->SetLogy(); + hGainEECPU->SetLineColor(kBlack); + hGainEECPU->SetLineWidth(1.); + hGainEECPU->Draw(""); + hGainEEGPU->SetLineColor(kBlue); + hGainEEGPU->SetLineWidth(1.); + hGainEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats*)hGainEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + c.cd(5); + hADCEBGPUvsCPU->Draw("colz"); + c.cd(6); + hADCEEGPUvsCPU->Draw("colz"); + c.cd(7); + hGainEBGPUvsCPU->Draw("colz"); + c.cd(8); + hGainEEGPUvsCPU->Draw("colz"); + c.SaveAs("plots.pdf"); + } + + rfin.Close(); + rfout.Write(); + rfout.Close(); +} diff --git a/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h b/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h index fa6e9f5d5a161..38a9ebdf18cb8 100644 --- a/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h +++ b/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h @@ -1,14 +1,11 @@ -#ifndef EcalRegionCabling_H -#define EcalRegionCabling_H +#ifndef EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h +#define EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h -#include "Geometry/EcalMapping/interface/EcalElectronicsMapping.h" -#include "Geometry/EcalMapping/interface/ESElectronicsMapper.h" - -#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "DataFormats/FEDRawData/interface/FEDNumbering.h" - #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "Geometry/EcalMapping/interface/ESElectronicsMapper.h" +#include "Geometry/EcalMapping/interface/EcalElectronicsMapping.h" class EcalRegionCabling { public: @@ -73,4 +70,4 @@ class EcalRegionCabling { const ESElectronicsMapper* es_mapping_; }; -#endif +#endif // EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h diff --git a/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h b/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h new file mode 100644 index 0000000000000..abedcf5a2d479 --- /dev/null +++ b/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h @@ -0,0 +1,91 @@ +#ifndef EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h +#define EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h + +#include + +#include "DataFormats/EcalDetId/interface/EcalSubdetector.h" + +namespace ecal { + namespace raw { + + /** \brief Ecal readout channel identification + [32:20] Unused (so far) + [19:13] DCC id + [12:6] tower + [5:3] strip + [2:0] xtal + Index starts from 1 + */ + + class ElectronicsIdGPU { + public: + /** Default constructor -- invalid value */ + constexpr ElectronicsIdGPU() : id_{0xFFFFFFFFu} {} + /** from raw */ + constexpr ElectronicsIdGPU(uint32_t id) : id_{id} {} + /** Constructor from dcc,tower,channel **/ + constexpr ElectronicsIdGPU(uint8_t const dccid, uint8_t const towerid, uint8_t const stripid, uint8_t const xtalid) + : id_{static_cast((xtalid & 0x7) | ((stripid & 0x7) << 3) | ((towerid & 0x7F) << 6) | + ((dccid & 0x7F) << 13))} {} + + constexpr uint32_t operator()() { return id_; } + constexpr uint32_t rawId() const { return id_; } + + /// get the DCC (Ecal Local DCC value not global one) id + constexpr uint8_t dccId() const { return (id_ >> 13) & 0x7F; } + /// get the tower id + constexpr uint8_t towerId() const { return (id_ >> 6) & 0x7F; } + /// get the tower id + constexpr uint8_t stripId() const { return (id_ >> 3) & 0x7; } + /// get the channel id + constexpr uint8_t xtalId() const { return (id_ & 0x7); } + + /// get the subdet + //EcalSubdetector subdet() const; + + /// get a fast, compact, unique index for linear lookups (maximum value = 4194303) + constexpr uint32_t linearIndex() const { return id_ & 0x3FFFFF; } + + /// so far for EndCap only : + //int channelId() const; // xtal id between 1 and 25 + + static constexpr int kTowersInPhi = 4; // see EBDetId + static constexpr int kCrystalsInPhi = 20; // see EBDetId + + static constexpr uint8_t MAX_DCCID = 54; //To be updated with correct and final number + static constexpr uint8_t MIN_DCCID = 1; + static constexpr uint8_t MAX_TOWERID = 70; + static constexpr uint8_t MIN_TOWERID = 1; + static constexpr uint8_t MAX_STRIPID = 5; + static constexpr uint8_t MIN_STRIPID = 1; + static constexpr uint8_t MAX_CHANNELID = 25; + static constexpr uint8_t MIN_CHANNELID = 1; + static constexpr uint8_t MAX_XTALID = 5; + static constexpr uint8_t MIN_XTALID = 1; + + static constexpr int MIN_DCCID_EEM = 1; + static constexpr int MAX_DCCID_EEM = 9; + static constexpr int MIN_DCCID_EBM = 10; + static constexpr int MAX_DCCID_EBM = 27; + static constexpr int MIN_DCCID_EBP = 28; + static constexpr int MAX_DCCID_EBP = 45; + static constexpr int MIN_DCCID_EEP = 46; + static constexpr int MAX_DCCID_EEP = 54; + + static constexpr int DCCID_PHI0_EBM = 10; + static constexpr int DCCID_PHI0_EBP = 28; + + static constexpr int kDCCChannelBoundary = 17; + static constexpr int DCC_EBM = 10; // id of the DCC in EB- which contains phi=0 deg. + static constexpr int DCC_EBP = 28; // id of the DCC in EB+ which contains phi=0 deg. + static constexpr int DCC_EEM = 1; // id of the DCC in EE- which contains phi=0 deg. + static constexpr int DCC_EEP = 46; // id of the DCC in EE+ which contains phi=0 deg. + + private: + uint32_t id_; + }; + + } // namespace raw +} // namespace ecal + +#endif // EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h diff --git a/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h b/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h new file mode 100644 index 0000000000000..004821afe3ed8 --- /dev/null +++ b/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h @@ -0,0 +1,47 @@ +#ifndef EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h +#define EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h + +#include "CondFormats/EcalObjects/interface/EcalMappingElectronics.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +namespace ecal { + namespace raw { + + class ElectronicsMappingGPU { + public: + struct Product { + ~Product(); + uint32_t* eid2did; + }; + +#ifndef __CUDACC__ + + // rearrange pedestals + ElectronicsMappingGPU(EcalMappingElectronics const&); + + // will call dealloation for Product thru ~Product + ~ElectronicsMappingGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalElectronicsMappingGPU"}; } + + private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> eid2did_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ + }; + + } // namespace raw +} // namespace ecal + +#endif // EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h diff --git a/EventFilter/EcalRawToDigi/plugins/BuildFile.xml b/EventFilter/EcalRawToDigi/plugins/BuildFile.xml index e55f1bcaab660..c3c2bd988e2c3 100644 --- a/EventFilter/EcalRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/EcalRawToDigi/plugins/BuildFile.xml @@ -1,10 +1,14 @@ - + + - + - + + + + diff --git a/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h b/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h new file mode 100644 index 0000000000000..a6429121adc82 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h @@ -0,0 +1,70 @@ +#ifndef EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h +#define EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h + +#include + +#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "EventFilter/EcalRawToDigi/interface/DCCRawDataDefinitions.h" +#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" +#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" + +namespace ecal { + namespace raw { + + constexpr auto empty_event_size = EMPTYEVENTSIZE; + constexpr uint32_t nfeds_max = 54; + constexpr uint32_t nbytes_per_fed_max = 10 * 1024; + + struct InputDataCPU { + cms::cuda::host::unique_ptr data; + cms::cuda::host::unique_ptr offsets; + cms::cuda::host::unique_ptr feds; + }; + + struct ConfigurationParameters { + uint32_t maxChannelsEE; + uint32_t maxChannelsEB; + }; + + struct OutputDataCPU { + // [0] - eb, [1] - ee + cms::cuda::host::unique_ptr nchannels; + }; + + struct OutputDataGPU { + DigisCollection<::calo::common::DevStoragePolicy> digisEB, digisEE; + + void allocate(ConfigurationParameters const &config, cudaStream_t cudaStream) { + digisEB.data = + cms::cuda::make_device_unique(config.maxChannelsEB * EcalDataFrame::MAXSAMPLES, cudaStream); + digisEE.data = + cms::cuda::make_device_unique(config.maxChannelsEE * EcalDataFrame::MAXSAMPLES, cudaStream); + digisEB.ids = cms::cuda::make_device_unique(config.maxChannelsEB, cudaStream); + digisEE.ids = cms::cuda::make_device_unique(config.maxChannelsEE, cudaStream); + } + }; + + struct ScratchDataGPU { + // [0] = EB + // [1] = EE + cms::cuda::device::unique_ptr pChannelsCounter; + }; + + struct InputDataGPU { + cms::cuda::device::unique_ptr data; + cms::cuda::device::unique_ptr offsets; + cms::cuda::device::unique_ptr feds; + }; + + struct ConditionsProducts { + ElectronicsMappingGPU::Product const &eMappingProduct; + }; + + } // namespace raw +} // namespace ecal + +#endif // EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h diff --git a/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc b/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc new file mode 100644 index 0000000000000..5563dd5b52cc8 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc @@ -0,0 +1,196 @@ +#include + +#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h" +#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h" +#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "DeclsForKernels.h" +#include "UnpackGPU.h" + +class EcalCPUDigisProducer : public edm::stream::EDProducer { +public: + explicit EcalCPUDigisProducer(edm::ParameterSet const& ps); + ~EcalCPUDigisProducer() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + // input digi collections in GPU-friendly format + using InputProduct = cms::cuda::Product>; + edm::EDGetTokenT digisInEBToken_; + edm::EDGetTokenT digisInEEToken_; + + // output digi collections in legacy format + edm::EDPutTokenT digisOutEBToken_; + edm::EDPutTokenT digisOutEEToken_; + + // whether to produce dummy integrity collections + bool produceDummyIntegrityCollections_; + + // dummy SRP collections + edm::EDPutTokenT ebSrFlagToken_; + edm::EDPutTokenT eeSrFlagToken_; + + // dummy integrity for xtal data + edm::EDPutTokenT ebIntegrityGainErrorsToken_; + edm::EDPutTokenT ebIntegrityGainSwitchErrorsToken_; + edm::EDPutTokenT ebIntegrityChIdErrorsToken_; + + // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection) + edm::EDPutTokenT eeIntegrityGainErrorsToken_; + edm::EDPutTokenT eeIntegrityGainSwitchErrorsToken_; + edm::EDPutTokenT eeIntegrityChIdErrorsToken_; + + // dummy integrity errors + edm::EDPutTokenT integrityTTIdErrorsToken_; + edm::EDPutTokenT integrityBlockSizeErrorsToken_; + + // FIXME better way to pass pointers from acquire to produce? + std::vector> idsebtmp, idseetmp; + std::vector> dataebtmp, dataeetmp; +}; + +void EcalCPUDigisProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("digisInLabelEB", edm::InputTag{"ecalRawToDigiGPU", "ebDigis"}); + desc.add("digisInLabelEE", edm::InputTag{"ecalRawToDigiGPU", "eeDigis"}); + desc.add("digisOutLabelEB", "ebDigis"); + desc.add("digisOutLabelEE", "eeDigis"); + + desc.add("produceDummyIntegrityCollections", false); + + std::string label = "ecalCPUDigisProducer"; + confDesc.add(label, desc); +} + +EcalCPUDigisProducer::EcalCPUDigisProducer(const edm::ParameterSet& ps) + : // input digi collections in GPU-friendly format + digisInEBToken_{consumes(ps.getParameter("digisInLabelEB"))}, + digisInEEToken_{consumes(ps.getParameter("digisInLabelEE"))}, + // output digi collections in legacy format + digisOutEBToken_{produces(ps.getParameter("digisOutLabelEB"))}, + digisOutEEToken_{produces(ps.getParameter("digisOutLabelEE"))}, + // whether to produce dummy integrity collections + produceDummyIntegrityCollections_{ps.getParameter("produceDummyIntegrityCollections")}, + // dummy SRP collections + ebSrFlagToken_{produceDummyIntegrityCollections_ ? produces() + : edm::EDPutTokenT{}}, + eeSrFlagToken_{produceDummyIntegrityCollections_ ? produces() + : edm::EDPutTokenT{}}, + // dummy integrity for xtal data + ebIntegrityGainErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityGainErrors") + : edm::EDPutTokenT{}}, + ebIntegrityGainSwitchErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityGainSwitchErrors") + : edm::EDPutTokenT{}}, + ebIntegrityChIdErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityChIdErrors") + : edm::EDPutTokenT{}}, + // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection) + eeIntegrityGainErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityGainErrors") + : edm::EDPutTokenT{}}, + eeIntegrityGainSwitchErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityGainSwitchErrors") + : edm::EDPutTokenT{}}, + eeIntegrityChIdErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityChIdErrors") + : edm::EDPutTokenT{}}, + // dummy integrity errors + integrityTTIdErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityTTIdErrors") + : edm::EDPutTokenT{}}, + integrityBlockSizeErrorsToken_{produceDummyIntegrityCollections_ + ? produces("EcalIntegrityBlockSizeErrors") + : edm::EDPutTokenT{}} {} + +EcalCPUDigisProducer::~EcalCPUDigisProducer() {} + +void EcalCPUDigisProducer::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder taskHolder) { + // retrieve data/ctx + auto const& ebdigisProduct = event.get(digisInEBToken_); + auto const& eedigisProduct = event.get(digisInEEToken_); + cms::cuda::ScopedContextAcquire ctx{ebdigisProduct, std::move(taskHolder)}; + auto const& ebdigis = ctx.get(ebdigisProduct); + auto const& eedigis = ctx.get(eedigisProduct); + + // resize tmp buffers + dataebtmp.resize(ebdigis.size * EcalDataFrame::MAXSAMPLES); + dataeetmp.resize(eedigis.size * EcalDataFrame::MAXSAMPLES); + idsebtmp.resize(ebdigis.size); + idseetmp.resize(eedigis.size); + + // enqeue transfers + cudaCheck(cudaMemcpyAsync( + dataebtmp.data(), ebdigis.data.get(), dataebtmp.size() * sizeof(uint16_t), cudaMemcpyDeviceToHost, ctx.stream())); + cudaCheck(cudaMemcpyAsync( + dataeetmp.data(), eedigis.data.get(), dataeetmp.size() * sizeof(uint16_t), cudaMemcpyDeviceToHost, ctx.stream())); + cudaCheck(cudaMemcpyAsync( + idsebtmp.data(), ebdigis.ids.get(), idsebtmp.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost, ctx.stream())); + cudaCheck(cudaMemcpyAsync( + idseetmp.data(), eedigis.ids.get(), idseetmp.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost, ctx.stream())); +} + +void EcalCPUDigisProducer::produce(edm::Event& event, edm::EventSetup const& setup) { + // output collections + auto digisEB = std::make_unique(); + auto digisEE = std::make_unique(); + digisEB->resize(idsebtmp.size()); + digisEE->resize(idseetmp.size()); + + // cast constness away + // use pointers to buffers instead of move operator= semantics + // cause we have different allocators in there... + auto* dataEB = const_cast(digisEB->data().data()); + auto* dataEE = const_cast(digisEE->data().data()); + auto* idsEB = const_cast(digisEB->ids().data()); + auto* idsEE = const_cast(digisEE->ids().data()); + + // copy data + std::memcpy(dataEB, dataebtmp.data(), dataebtmp.size() * sizeof(uint16_t)); + std::memcpy(dataEE, dataeetmp.data(), dataeetmp.size() * sizeof(uint16_t)); + std::memcpy(idsEB, idsebtmp.data(), idsebtmp.size() * sizeof(uint32_t)); + std::memcpy(idsEE, idseetmp.data(), idseetmp.size() * sizeof(uint32_t)); + + event.put(digisOutEBToken_, std::move(digisEB)); + event.put(digisOutEEToken_, std::move(digisEE)); + + if (produceDummyIntegrityCollections_) { + // dummy SRP collections + event.emplace(ebSrFlagToken_); + event.emplace(eeSrFlagToken_); + // dummy integrity for xtal data + event.emplace(ebIntegrityGainErrorsToken_); + event.emplace(ebIntegrityGainSwitchErrorsToken_); + event.emplace(ebIntegrityChIdErrorsToken_); + // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection) + event.emplace(eeIntegrityGainErrorsToken_); + event.emplace(eeIntegrityGainSwitchErrorsToken_); + event.emplace(eeIntegrityChIdErrorsToken_); + // dummy integrity errors + event.emplace(integrityTTIdErrorsToken_); + event.emplace(integrityBlockSizeErrorsToken_); + } +} + +DEFINE_FWK_MODULE(EcalCPUDigisProducer); diff --git a/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc b/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc new file mode 100644 index 0000000000000..84fcc7b2b2952 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc @@ -0,0 +1,9 @@ +#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h" +#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h" + +using EcalElectronicsMappingGPUESProducer = + ConvertingESProducerT; + +DEFINE_FWK_EVENTSETUP_MODULE(EcalElectronicsMappingGPUESProducer); diff --git a/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc b/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc new file mode 100644 index 0000000000000..4f0743c9b1b51 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc @@ -0,0 +1,155 @@ +#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h" +#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ESGetToken.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "DeclsForKernels.h" +#include "UnpackGPU.h" + +class EcalRawToDigiGPU : public edm::stream::EDProducer { +public: + explicit EcalRawToDigiGPU(edm::ParameterSet const& ps); + ~EcalRawToDigiGPU() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + edm::EDGetTokenT rawDataToken_; + using OutputProduct = cms::cuda::Product>; + edm::EDPutTokenT digisEBToken_, digisEEToken_; + edm::ESGetToken eMappingToken_; + + cms::cuda::ContextState cudaState_; + + std::vector fedsToUnpack_; + + ecal::raw::ConfigurationParameters config_; + ecal::raw::OutputDataGPU outputGPU_; + ecal::raw::OutputDataCPU outputCPU_; +}; + +void EcalRawToDigiGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("InputLabel", edm::InputTag("rawDataCollector")); + std::vector feds(54); + for (uint32_t i = 0; i < 54; ++i) + feds[i] = i + 601; + desc.add>("FEDs", feds); + desc.add("maxChannelsEB", 61200); + desc.add("maxChannelsEE", 14648); + desc.add("digisLabelEB", "ebDigis"); + desc.add("digisLabelEE", "eeDigis"); + + std::string label = "ecalRawToDigiGPU"; + confDesc.add(label, desc); +} + +EcalRawToDigiGPU::EcalRawToDigiGPU(const edm::ParameterSet& ps) + : rawDataToken_{consumes(ps.getParameter("InputLabel"))}, + digisEBToken_{produces(ps.getParameter("digisLabelEB"))}, + digisEEToken_{produces(ps.getParameter("digisLabelEE"))}, + eMappingToken_{esConsumes()}, + fedsToUnpack_{ps.getParameter>("FEDs")} { + config_.maxChannelsEB = ps.getParameter("maxChannelsEB"); + config_.maxChannelsEE = ps.getParameter("maxChannelsEE"); +} + +EcalRawToDigiGPU::~EcalRawToDigiGPU() {} + +void EcalRawToDigiGPU::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder holder) { + // raii + cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_}; + + // conditions + edm::ESHandle eMappingHandle = setup.getHandle(eMappingToken_); + auto const& eMappingProduct = eMappingHandle->getProduct(ctx.stream()); + + // bundle up conditions + ecal::raw::ConditionsProducts conditions{eMappingProduct}; + + // event data + edm::Handle rawDataHandle; + event.getByToken(rawDataToken_, rawDataHandle); + + // scratch + ecal::raw::ScratchDataGPU scratchGPU = {cms::cuda::make_device_unique(2, ctx.stream())}; + + // input cpu data + ecal::raw::InputDataCPU inputCPU = { + cms::cuda::make_host_unique(ecal::raw::nfeds_max * ecal::raw::nbytes_per_fed_max, ctx.stream()), + cms::cuda::make_host_unique(ecal::raw::nfeds_max, ctx.stream()), + cms::cuda::make_host_unique(ecal::raw::nfeds_max, ctx.stream())}; + + // input data gpu + ecal::raw::InputDataGPU inputGPU = {cms::cuda::make_device_unique( + ecal::raw::nfeds_max * ecal::raw::nbytes_per_fed_max, ctx.stream()), + cms::cuda::make_device_unique(ecal::raw::nfeds_max, ctx.stream()), + cms::cuda::make_device_unique(ecal::raw::nfeds_max, ctx.stream())}; + + // output cpu + outputCPU_ = {cms::cuda::make_host_unique(2, ctx.stream())}; + + // output gpu + outputGPU_.allocate(config_, ctx.stream()); + + // iterate over feds + // TODO: another idea + // - loop over all feds to unpack and enqueue cuda memcpy + // - accumulate the sizes + // - after the loop launch cuda memcpy for sizes + // - enqueue the kernel + uint32_t currentCummOffset = 0; + uint32_t counter = 0; + for (auto const& fed : fedsToUnpack_) { + auto const& data = rawDataHandle->FEDData(fed); + auto const nbytes = data.size(); + + // skip empty feds + if (nbytes < ecal::raw::empty_event_size) + continue; + + // copy raw data into plain buffer + std::memcpy(inputCPU.data.get() + currentCummOffset, data.data(), nbytes); + // set the offset in bytes from the start + inputCPU.offsets[counter] = currentCummOffset; + inputCPU.feds[counter] = fed; + + // this is the current offset into the vector + currentCummOffset += nbytes; + ++counter; + } + + ecal::raw::entryPoint( + inputCPU, inputGPU, outputGPU_, scratchGPU, outputCPU_, conditions, ctx.stream(), counter, currentCummOffset); +} + +void EcalRawToDigiGPU::produce(edm::Event& event, edm::EventSetup const& setup) { + cms::cuda::ScopedContextProduce ctx{cudaState_}; + + // get the number of channels + outputGPU_.digisEB.size = outputCPU_.nchannels[0]; + outputGPU_.digisEE.size = outputCPU_.nchannels[1]; + + ctx.emplace(event, digisEBToken_, std::move(outputGPU_.digisEB)); + ctx.emplace(event, digisEEToken_, std::move(outputGPU_.digisEE)); + + // reset ptrs that are carried as members + outputCPU_.nchannels.reset(); +} + +DEFINE_FWK_MODULE(EcalRawToDigiGPU); diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu new file mode 100644 index 0000000000000..a25bf235d15f6 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu @@ -0,0 +1,333 @@ +#include "EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h" + +#include "UnpackGPU.h" + +namespace ecal { + namespace raw { + + __forceinline__ __device__ void print_raw_buffer(uint8_t const* const buffer, + uint32_t const nbytes, + uint32_t const nbytes_per_row = 20) { + for (uint32_t i = 0; i < nbytes; i++) { + if (i % nbytes_per_row == 0 && i > 0) + printf("\n"); + printf("%02X ", buffer[i]); + } + } + + __forceinline__ __device__ void print_first3bits(uint64_t const* buffer, uint32_t size) { + for (uint32_t i = 0; i < size; ++i) { + uint8_t const b61 = (buffer[i] >> 61) & 0x1; + uint8_t const b62 = (buffer[i] >> 62) & 0x1; + uint8_t const b63 = (buffer[i] >> 63) & 0x1; + printf("[word: %u] %u%u%u\n", i, b63, b62, b61); + } + } + + __forceinline__ __device__ bool is_barrel(uint8_t dccid) { + return dccid >= ElectronicsIdGPU::MIN_DCCID_EBM && dccid <= ElectronicsIdGPU::MAX_DCCID_EBP; + } + + __forceinline__ __device__ uint8_t fed2dcc(int fed) { return static_cast(fed - 600); } + + __forceinline__ __device__ int zside_for_eb(ElectronicsIdGPU const& eid) { + int dcc = eid.dccId(); + return ((dcc >= ElectronicsIdGPU::MIN_DCCID_EBM && dcc <= ElectronicsIdGPU::MAX_DCCID_EBM)) ? -1 : 1; + } + + __forceinline__ __device__ bool is_synced_towerblock(uint16_t const dccbx, + uint16_t const bx, + uint16_t const dccl1, + uint16_t const l1) { + bool const bxsync = (bx == 0 && dccbx == 3564) || (bx == dccbx && dccbx != 3564); + bool const l1sync = (l1 == ((dccl1 - 1) & 0xfff)); + return bxsync && l1sync; + } + + __forceinline__ __device__ bool right_tower_for_eb(int tower) { + // for EB, two types of tower (LVRB top/bottom) + if ((tower > 12 && tower < 21) || (tower > 28 && tower < 37) || (tower > 44 && tower < 53) || + (tower > 60 && tower < 69)) + return true; + else + return false; + } + + __forceinline__ __device__ uint32_t compute_ebdetid(ElectronicsIdGPU const& eid) { + // as in Geometry/EcalMaping/.../EcalElectronicsMapping + auto const dcc = eid.dccId(); + auto const tower = eid.towerId(); + auto const strip = eid.stripId(); + auto const xtal = eid.xtalId(); + + int smid = 0; + int iphi = 0; + bool EBPlus = (zside_for_eb(eid) > 0); + bool EBMinus = !EBPlus; + + if (zside_for_eb(eid) < 0) { + smid = dcc + 19 - ElectronicsIdGPU::DCCID_PHI0_EBM; + iphi = (smid - 19) * ElectronicsIdGPU::kCrystalsInPhi; + iphi += 5 * ((tower - 1) % ElectronicsIdGPU::kTowersInPhi); + } else { + smid = dcc + 1 - ElectronicsIdGPU::DCCID_PHI0_EBP; + iphi = (smid - 1) * ElectronicsIdGPU::kCrystalsInPhi; + iphi += 5 * (ElectronicsIdGPU::kTowersInPhi - ((tower - 1) % ElectronicsIdGPU::kTowersInPhi) - 1); + } + + bool RightTower = right_tower_for_eb(tower); + int ieta = 5 * ((tower - 1) / ElectronicsIdGPU::kTowersInPhi) + 1; + if (RightTower) { + ieta += (strip - 1); + if (strip % 2 == 1) { + if (EBMinus) + iphi += (xtal - 1) + 1; + else + iphi += (4 - (xtal - 1)) + 1; + } else { + if (EBMinus) + iphi += (4 - (xtal - 1)) + 1; + else + iphi += (xtal - 1) + 1; + } + } else { + ieta += 4 - (strip - 1); + if (strip % 2 == 1) { + if (EBMinus) + iphi += (4 - (xtal - 1)) + 1; + else + iphi += (xtal - 1) + 1; + } else { + if (EBMinus) + iphi += (xtal - 1) + 1; + else + iphi += (4 - (xtal - 1)) + 1; + } + } + + if (zside_for_eb(eid) < 0) + ieta = -ieta; + + DetId did{DetId::Ecal, EcalBarrel}; + return did.rawId() | ((ieta > 0) ? (0x10000 | (ieta << 9)) : ((-ieta) << 9)) | (iphi & 0x1FF); + } + + __forceinline__ __device__ int adc(uint16_t sample) { return sample & 0xfff; } + + __forceinline__ __device__ int gainId(uint16_t sample) { return (sample >> 12) & 0x3; } + + template + __global__ void kernel_unpack_test(unsigned char const* __restrict__ data, + uint32_t const* __restrict__ offsets, + int const* __restrict__ feds, + uint16_t* samplesEB, + uint16_t* samplesEE, + uint32_t* idsEB, + uint32_t* idsEE, + uint32_t* pChannelsCounterEBEE, + uint32_t const* eid2did, + uint32_t const nbytesTotal) { + // indices + auto const ifed = blockIdx.x; + + // offset in bytes + auto const offset = offsets[ifed]; + // fed id + auto const fed = feds[ifed]; + auto const isBarrel = is_barrel(static_cast(fed - 600)); + // size + auto const size = ifed == gridDim.x - 1 ? nbytesTotal - offset : offsets[ifed + 1] - offset; + auto* samples = isBarrel ? samplesEB : samplesEE; + auto* ids = isBarrel ? idsEB : idsEE; + auto* pChannelsCounter = isBarrel ? &pChannelsCounterEBEE[0] : &pChannelsCounterEBEE[1]; + + // offset to the right raw buffer + uint64_t const* buffer = reinterpret_cast(data + offset); + + // dump first 3 bits for each 64-bit word + //print_first3bits(buffer, size / 8); + + // + // fed header + // + auto const fed_header = buffer[0]; + uint32_t bx = (fed_header >> 20) & 0xfff; + uint32_t lv1 = (fed_header >> 32) & 0xffffff; + + // 9 for fed + dcc header + // 36 for 4 EE TCC blocks or 18 for 1 EB TCC block + // 6 for SR block size + + // dcc header w2 + auto const w2 = buffer[2]; + uint8_t const fov = (w2 >> 48) & 0xf; + + // + // print Tower block headers + // + uint8_t ntccblockwords = isBarrel ? 18 : 36; + auto const* tower_blocks_start = buffer + 9 + ntccblockwords + 6; + auto const* trailer = buffer + (size / 8 - 1); + auto const* current_tower_block = tower_blocks_start; + while (current_tower_block != trailer) { + auto const w = *current_tower_block; + uint8_t ttid = w & 0xff; + uint16_t bxlocal = (w >> 16) & 0xfff; + uint16_t lv1local = (w >> 32) & 0xfff; + uint16_t block_length = (w >> 48) & 0x1ff; + + uint16_t const dccbx = bx & 0xfff; + uint16_t const dccl1 = lv1 & 0xfff; + // fov>=1 is required to support simulated data for which bx==bxlocal==0 + if (fov >= 1 && !is_synced_towerblock(dccbx, bxlocal, dccl1, lv1local)) { + current_tower_block += block_length; + continue; + } + + // go through all the channels + // get the next channel coordinates + uint32_t nchannels = (block_length - 1) / 3; + + // 1 threads per channel in this block + for (uint32_t ich = 0; ich < nchannels; ich += NTHREADS) { + auto const i_to_access = ich + threadIdx.x; + // threads outside of the range -> leave the loop + if (i_to_access >= nchannels) + break; + + // inc the channel's counter and get the pos where to store + auto const wdata = current_tower_block[1 + i_to_access * 3]; + uint8_t const stripid = wdata & 0x7; + uint8_t const xtalid = (wdata >> 4) & 0x7; + ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid}; + auto const didraw = isBarrel ? compute_ebdetid(eid) : eid2did[eid.linearIndex()]; + // FIXME: what kind of channels are these guys + if (didraw == 0) + continue; + + // get samples + uint16_t sampleValues[10]; + sampleValues[0] = (wdata >> 16) & 0x3fff; + sampleValues[1] = (wdata >> 32) & 0x3fff; + sampleValues[2] = (wdata >> 48) & 0x3fff; + auto const wdata1 = current_tower_block[2 + i_to_access * 3]; + sampleValues[3] = wdata1 & 0x3fff; + sampleValues[4] = (wdata1 >> 16) & 0x3fff; + sampleValues[5] = (wdata1 >> 32) & 0x3fff; + sampleValues[6] = (wdata1 >> 48) & 0x3fff; + auto const wdata2 = current_tower_block[3 + i_to_access * 3]; + sampleValues[7] = wdata2 & 0x3fff; + sampleValues[8] = (wdata2 >> 16) & 0x3fff; + sampleValues[9] = (wdata2 >> 32) & 0x3fff; + + // check gain + bool isSaturation = true; + short firstGainZeroSampID{-1}, firstGainZeroSampADC{-1}; + for (uint32_t si = 0; si < 10; si++) { + if (gainId(sampleValues[si]) == 0) { + firstGainZeroSampID = si; + firstGainZeroSampADC = adc(sampleValues[si]); + break; + } + } + if (firstGainZeroSampID != -1) { + unsigned int plateauEnd = std::min(10u, (unsigned int)(firstGainZeroSampID + 5)); + for (unsigned int s = firstGainZeroSampID; s < plateauEnd; s++) { + if (gainId(sampleValues[s]) == 0 && adc(sampleValues[s]) == firstGainZeroSampADC) { + ; + } else { + isSaturation = false; + break; + } //it's not saturation + } + // get rid of channels which are stuck in gain0 + if (firstGainZeroSampID < 3) { + isSaturation = false; + } + if (!isSaturation) + continue; + } else { // there is no zero gainId sample + // gain switch check + short numGain = 1; + bool gainSwitchError = false; + for (unsigned int si = 1; si < 10; si++) { + if ((gainId(sampleValues[si - 1]) > gainId(sampleValues[si])) && numGain < 5) + gainSwitchError = true; + if (gainId(sampleValues[si - 1]) == gainId(sampleValues[si])) + numGain++; + else + numGain = 1; + } + if (gainSwitchError) + continue; + } + + auto const pos = atomicAdd(pChannelsCounter, 1); + + // store to global + ids[pos] = didraw; + samples[pos * 10] = sampleValues[0]; + samples[pos * 10 + 1] = sampleValues[1]; + samples[pos * 10 + 2] = sampleValues[2]; + samples[pos * 10 + 3] = sampleValues[3]; + samples[pos * 10 + 4] = sampleValues[4]; + samples[pos * 10 + 5] = sampleValues[5]; + samples[pos * 10 + 6] = sampleValues[6]; + samples[pos * 10 + 7] = sampleValues[7]; + samples[pos * 10 + 8] = sampleValues[8]; + samples[pos * 10 + 9] = sampleValues[9]; + } + + current_tower_block += block_length; + } + } + + void entryPoint(InputDataCPU const& inputCPU, + InputDataGPU& inputGPU, + OutputDataGPU& outputGPU, + ScratchDataGPU& scratchGPU, + OutputDataCPU& outputCPU, + ConditionsProducts const& conditions, + cudaStream_t cudaStream, + uint32_t const nfedsWithData, + uint32_t const nbytesTotal) { + // transfer + cudaCheck(cudaMemcpyAsync(inputGPU.data.get(), + inputCPU.data.get(), + nbytesTotal * sizeof(unsigned char), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(inputGPU.offsets.get(), + inputCPU.offsets.get(), + nfedsWithData * sizeof(uint32_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemsetAsync(scratchGPU.pChannelsCounter.get(), + 0, + sizeof(uint32_t) * 2, // EB + EE + cudaStream)); + cudaCheck(cudaMemcpyAsync( + inputGPU.feds.get(), inputCPU.feds.get(), nfedsWithData * sizeof(int), cudaMemcpyHostToDevice, cudaStream)); + + kernel_unpack_test<32><<>>(inputGPU.data.get(), + inputGPU.offsets.get(), + inputGPU.feds.get(), + outputGPU.digisEB.data.get(), + outputGPU.digisEE.data.get(), + outputGPU.digisEB.ids.get(), + outputGPU.digisEE.ids.get(), + scratchGPU.pChannelsCounter.get(), + conditions.eMappingProduct.eid2did, + nbytesTotal); + cudaCheck(cudaGetLastError()); + + // transfer the counters for how many eb and ee channels we got + cudaCheck(cudaMemcpyAsync(outputCPU.nchannels.get(), + scratchGPU.pChannelsCounter.get(), + sizeof(uint32_t) * 2, + cudaMemcpyDeviceToHost, + cudaStream)); + } + + } // namespace raw +} // namespace ecal diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h new file mode 100644 index 0000000000000..d98906e7e24a7 --- /dev/null +++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h @@ -0,0 +1,23 @@ +#ifndef EventFilter_EcalRawToDigi_plugins_UnpackGPU_h +#define EventFilter_EcalRawToDigi_plugins_UnpackGPU_h + +#include "DeclsForKernels.h" + +namespace ecal { + namespace raw { + + // FIXME: bundle up uint32_t values + void entryPoint(InputDataCPU const&, + InputDataGPU&, + OutputDataGPU&, + ScratchDataGPU&, + OutputDataCPU&, + ConditionsProducts const&, + cudaStream_t, + uint32_t const, + uint32_t const); + + } // namespace raw +} // namespace ecal + +#endif // EventFilter_EcalRawToDigi_plugins_UnpackGPU_h diff --git a/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py b/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py index 849aaeeb414a4..00a54ad56c128 100644 --- a/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py +++ b/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py @@ -5,3 +5,24 @@ ecalDigis = _ecalEBunpacker.clone() ecalDigisTask = cms.Task(ecalDigis) + +# process modifier to run on GPUs +from Configuration.ProcessModifiers.gpu_cff import gpu + +# GPU-friendly EventSetup modules +from EventFilter.EcalRawToDigi.ecalElectronicsMappingGPUESProducer_cfi import ecalElectronicsMappingGPUESProducer + +# raw to digi on GPUs +from EventFilter.EcalRawToDigi.ecalRawToDigiGPU_cfi import ecalRawToDigiGPU as _ecalRawToDigiGPU +ecalDigisGPU = _ecalRawToDigiGPU.clone() + +# copy the digi from the GPU to the CPU and convert to legacy format +from EventFilter.EcalRawToDigi.ecalCPUDigisProducer_cfi import ecalCPUDigisProducer as _ecalCPUDigisProducer +_ecalDigis_gpu = _ecalCPUDigisProducer.clone( + digisInLabelEB = ('ecalDigisGPU', 'ebDigis'), + digisInLabelEE = ('ecalDigisGPU', 'eeDigis'), + produceDummyIntegrityCollections = True +) +gpu.toReplaceWith(ecalDigis, _ecalDigis_gpu) + +gpu.toReplaceWith(ecalDigisTask, cms.Task(ecalElectronicsMappingGPUESProducer, ecalDigisGPU, ecalDigis)) diff --git a/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc b/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc new file mode 100644 index 0000000000000..8264c501a896c --- /dev/null +++ b/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc @@ -0,0 +1,57 @@ +#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +#include "DataFormats/EcalDetId/interface/EcalElectronicsId.h" + +namespace ecal { + namespace raw { + + // TODO: 0x3FFFFF * 4B ~= 16MB + // tmp solution for linear mapping of eid -> did + ElectronicsMappingGPU::ElectronicsMappingGPU(EcalMappingElectronics const& mapping) : eid2did_(0x3FFFFF) { + // fill in eb + // TODO: EB vector is actually empty + auto const& barrelValues = mapping.barrelItems(); + for (unsigned int i = 0; i < barrelValues.size(); i++) { + EcalElectronicsId eid{barrelValues[i].electronicsid}; + EBDetId did{EBDetId::unhashIndex(i)}; + eid2did_[eid.linearIndex()] = did.rawId(); + } + + // fill in ee + auto const& endcapValues = mapping.endcapItems(); + for (unsigned int i = 0; i < endcapValues.size(); i++) { + EcalElectronicsId eid{endcapValues[i].electronicsid}; + EEDetId did{EEDetId::unhashIndex(i)}; + eid2did_[eid.linearIndex()] = did.rawId(); + } + } + + ElectronicsMappingGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(eid2did)); + } + + ElectronicsMappingGPU::Product const& ElectronicsMappingGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](ElectronicsMappingGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.eid2did, this->eid2did_.size() * sizeof(uint32_t))); + + // transfer + cudaCheck(cudaMemcpyAsync(product.eid2did, + this->eid2did_.data(), + this->eid2did_.size() * sizeof(uint32_t), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; + } + + } // namespace raw +} // namespace ecal + +TYPELOOKUP_DATA_REG(ecal::raw::ElectronicsMappingGPU); diff --git a/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py b/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py new file mode 100644 index 0000000000000..4fa955bd33836 --- /dev/null +++ b/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py @@ -0,0 +1,51 @@ +import FWCore.ParameterSet.Config as cms + +# Customise the ECAL-only reconstruction to run on GPU +# +# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits +def customizeEcalOnlyForProfilingGPUOnly(process): + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('ecalMultiFitUncalibRecHitGPU') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process + + +# Customise the ECAL-only reconstruction to run on GPU, and copy the data to the host +# +# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits +def customizeEcalOnlyForProfilingGPUWithHostCopy(process): + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('ecalMultiFitUncalibRecHitSoA') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process + + +# Customise the ECAL-only reconstruction to run on GPU, copy the data to the host, and convert to legacy format +# +# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits, on the GPU +# and the rechits producer on the CPU +# +# The same customisation can be also used on the CPU workflow, running up to the rechits on CPU. +def customizeEcalOnlyForProfiling(process): + + process.consumer = cms.EDAnalyzer("GenericConsumer", + eventProducts = cms.untracked.vstring('ecalRecHit') + ) + + process.consume_step = cms.EndPath(process.consumer) + + process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step) + + return process diff --git a/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py b/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py index 06fecf4787baf..75ae5fc0c202f 100644 --- a/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py +++ b/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py @@ -1,4 +1,5 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu # TPG condition needed by ecalRecHit producer if TT recovery is ON from RecoLocalCalo.EcalRecProducers.ecalRecHitTPGConditions_cff import * @@ -43,6 +44,61 @@ ecalOnlyLocalRecoSequence = cms.Sequence(ecalOnlyLocalRecoTask) +# ECAL rechit calibrations on GPU +from RecoLocalCalo.EcalRecProducers.ecalRechitADCToGeVConstantGPUESProducer_cfi import ecalRechitADCToGeVConstantGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalRechitChannelStatusGPUESProducer_cfi import ecalRechitChannelStatusGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalIntercalibConstantsGPUESProducer_cfi import ecalIntercalibConstantsGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosGPUESProducer_cfi import ecalLaserAPDPNRatiosGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosRefGPUESProducer_cfi import ecalLaserAPDPNRatiosRefGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalLaserAlphasGPUESProducer_cfi import ecalLaserAlphasGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalLinearCorrectionsGPUESProducer_cfi import ecalLinearCorrectionsGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalRecHitParametersGPUESProducer_cfi import ecalRecHitParametersGPUESProducer + +# ECAL rechits running on GPU +from RecoLocalCalo.EcalRecProducers.ecalRecHitGPU_cfi import ecalRecHitGPU as _ecalRecHitGPU +ecalRecHitGPU = _ecalRecHitGPU.clone( + uncalibrecHitsInLabelEB = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEB'), + uncalibrecHitsInLabelEE = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEE') +) + +# copy the rechits from GPU to CPU +from RecoLocalCalo.EcalRecProducers.ecalCPURecHitProducer_cfi import ecalCPURecHitProducer as _ecalCPURecHitProducer +ecalRecHitSoA = _ecalCPURecHitProducer.clone( + recHitsInLabelEB = cms.InputTag('ecalRecHitGPU', 'EcalRecHitsEB'), + recHitsInLabelEE = cms.InputTag('ecalRecHitGPU', 'EcalRecHitsEE') +) + +# convert the rechits from SoA to legacy format +from RecoLocalCalo.EcalRecProducers.ecalRecHitConvertGPU2CPUFormat_cfi import ecalRecHitConvertGPU2CPUFormat as _ecalRecHitConvertGPU2CPUFormat +_ecalRecHit_gpu = _ecalRecHitConvertGPU2CPUFormat.clone( + recHitsLabelGPUEB = cms.InputTag('ecalRecHitSoA', 'EcalRecHitsEB'), + recHitsLabelGPUEE = cms.InputTag('ecalRecHitSoA', 'EcalRecHitsEE') +) +# TODO: the ECAL calibrated rechits produced on the GPU are not correct, yet. +# When they are working and validated, remove this comment and uncomment the next line: +#gpu.toReplaceWith(ecalRecHit, _ecalRecHit_gpu) + +# ECAL reconstruction on GPU +gpu.toReplaceWith(ecalRecHitNoTPTask, cms.Task( + # ECAL rechit calibrations on GPU + ecalRechitADCToGeVConstantGPUESProducer, + ecalRechitChannelStatusGPUESProducer, + ecalIntercalibConstantsGPUESProducer, + ecalLaserAPDPNRatiosGPUESProducer, + ecalLaserAPDPNRatiosRefGPUESProducer, + ecalLaserAlphasGPUESProducer, + ecalLinearCorrectionsGPUESProducer, + ecalRecHitParametersGPUESProducer, + # ECAL rechits running on GPU + ecalRecHitGPU, + # copy the rechits from GPU to CPU + ecalRecHitSoA, + # convert the rechits from SoA to legacy format + ecalRecHit, + # ECAL preshower rechit legacy module + ecalPreshowerRecHit +)) + # Phase 2 modifications from RecoLocalCalo.EcalRecProducers.ecalDetailedTimeRecHit_cfi import * _phase2_timing_ecalRecHitTask = cms.Task( ecalRecHitTask.copy() , ecalDetailedTimeRecHit ) diff --git a/RecoLocalCalo/EcalRecAlgos/BuildFile.xml b/RecoLocalCalo/EcalRecAlgos/BuildFile.xml index 2eaf053c342dd..c2858ae76d7bc 100644 --- a/RecoLocalCalo/EcalRecAlgos/BuildFile.xml +++ b/RecoLocalCalo/EcalRecAlgos/BuildFile.xml @@ -1,9 +1,13 @@ + + + + @@ -11,6 +15,7 @@ + diff --git a/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml b/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml new file mode 100644 index 0000000000000..4c98171091b84 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp new file mode 100644 index 0000000000000..f010e3afdbb18 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp @@ -0,0 +1,564 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" + +#include "TStyle.h" + +void setAxis(TH2D *histo) { + histo->GetXaxis()->SetTitle("cpu"); + histo->GetYaxis()->SetTitle("gpu"); +} + +void setAxisDelta(TH2D *histo) { + histo->GetXaxis()->SetTitle("cpu"); + histo->GetYaxis()->SetTitle("#Delta gpu-cpu"); +} + +int main(int argc, char *argv[]) { + if (argc < 3) { + std::cout << "run with: ./validateGPU \n"; + exit(0); + } + + gStyle->SetOptStat("ourme"); + + edm::Wrapper>> *wgpuEB = + nullptr; + edm::Wrapper>> *wgpuEE = + nullptr; + edm::Wrapper *wcpuEB = nullptr; + edm::Wrapper *wcpuEE = nullptr; + + std::string fileName = argv[1]; + std::string outFileName = argv[2]; + + // output + TFile rfout{outFileName.c_str(), "recreate"}; + + int nbins_count = 200; + float last_count = 5000.; + int nbins_count_delta = 201; + + int nbins = 300; + float last = 3000.; + + // int nbins_chi2 = 1000; + // float last_chi2 = 1000.; + int nbins_chi2 = 1000; + float last_chi2 = 200.; + + int nbins_flags = 100; + float last_flags = 100.; + float delta_flags = 20; + + int nbins_delta = 201; // use an odd number to center around 0 + float delta = 0.2; + + // RecHits plots for EB and EE on both GPU and CPU + auto hRechitsEBGPU = new TH1D("RechitsEBGPU", "RechitsEBGPU; No. of Rechits", nbins_count, 0, last_count); + auto hRechitsEBCPU = new TH1D("RechitsEBCPU", "RechitsEBCPU; No. of Rechits", nbins_count, 0, last_count); + auto hRechitsEEGPU = new TH1D("RechitsEEGPU", "RechitsEEGPU; No. of Rechits", nbins_count, 0, last_count); + auto hRechitsEECPU = new TH1D("RechitsEECPU", "RechitsEECPU; No. of Rechits", nbins_count, 0, last_count); + auto hRechitsEBGPUCPUratio = new TH1D("RechitsEBGPU/CPUratio", "RechitsEBGPU/CPUratio; GPU/CPU", 50, 0.9, 1.1); + auto hRechitsEEGPUCPUratio = new TH1D("RechitsEEGPU/CPUratio", "RechitsEEGPU/CPUratio; GPU/CPU", 50, 0.9, 1.1); + + auto hSOIAmplitudesEBGPU = new TH1D("hSOIAmplitudesEBGPU", "hSOIAmplitudesEBGPU", nbins, 0, last); + auto hSOIAmplitudesEEGPU = new TH1D("hSOIAmplitudesEEGPU", "hSOIAmplitudesEEGPU", nbins, 0, last); + auto hSOIAmplitudesEBCPU = new TH1D("hSOIAmplitudesEBCPU", "hSOIAmplitudesEBCPU", nbins, 0, last); + auto hSOIAmplitudesEECPU = new TH1D("hSOIAmplitudesEECPU", "hSOIAmplitudesEECPU", nbins, 0, last); + auto hSOIAmplitudesEBGPUCPUratio = + new TH1D("SOIAmplitudesEBGPU/CPUratio", "SOIAmplitudesEBGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + auto hSOIAmplitudesEEGPUCPUratio = + new TH1D("SOIAmplitudesEEGPU/CPUratio", "SOIAmplitudesEEGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + + auto hChi2EBGPU = new TH1D("hChi2EBGPU", "hChi2EBGPU", nbins_chi2, 0, last_chi2); + auto hChi2EEGPU = new TH1D("hChi2EEGPU", "hChi2EEGPU", nbins_chi2, 0, last_chi2); + auto hChi2EBCPU = new TH1D("hChi2EBCPU", "hChi2EBCPU", nbins_chi2, 0, last_chi2); + auto hChi2EECPU = new TH1D("hChi2EECPU", "hChi2EECPU", nbins_chi2, 0, last_chi2); + auto hChi2EBGPUCPUratio = new TH1D("Chi2EBGPU/CPUratio", "Chi2EBGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + auto hChi2EEGPUCPUratio = new TH1D("Chi2EEGPU/CPUratio", "Chi2EEGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + + auto hFlagsEBGPU = new TH1D("hFlagsEBGPU", "hFlagsEBGPU", nbins_flags, 0, last_flags); + auto hFlagsEEGPU = new TH1D("hFlagsEEGPU", "hFlagsEEGPU", nbins_flags, 0, last_flags); + auto hFlagsEBCPU = new TH1D("hFlagsEBCPU", "hFlagsEBCPU", nbins_flags, 0, last_flags); + auto hFlagsEECPU = new TH1D("hFlagsEECPU", "hFlagsEECPU", nbins_flags, 0, last_flags); + auto hFlagsEBGPUCPUratio = new TH1D("FlagsEBGPU/CPUratio", "FlagsEBGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + auto hFlagsEEGPUCPUratio = new TH1D("FlagsEEGPU/CPUratio", "FlagsEEGPU/CPUratio; GPU/CPU", 200, 0.9, 1.1); + + auto hSOIAmplitudesEBGPUvsCPU = + new TH2D("hSOIAmplitudesEBGPUvsCPU", "hSOIAmplitudesEBGPUvsCPU", nbins, 0, last, nbins, 0, last); + setAxis(hSOIAmplitudesEBGPUvsCPU); + auto hSOIAmplitudesEEGPUvsCPU = + new TH2D("hSOIAmplitudesEEGPUvsCPU", "hSOIAmplitudesEEGPUvsCPU", nbins, 0, last, nbins, 0, last); + setAxis(hSOIAmplitudesEEGPUvsCPU); + auto hSOIAmplitudesEBdeltavsCPU = + new TH2D("hSOIAmplitudesEBdeltavsCPU", "hSOIAmplitudesEBdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + setAxisDelta(hSOIAmplitudesEBdeltavsCPU); + auto hSOIAmplitudesEEdeltavsCPU = + new TH2D("hSOIAmplitudesEEdeltavsCPU", "hSOIAmplitudesEEdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + setAxisDelta(hSOIAmplitudesEEdeltavsCPU); + + auto hChi2EBGPUvsCPU = + new TH2D("hChi2EBGPUvsCPU", "hChi2EBGPUvsCPU", nbins_chi2, 0, last_chi2, nbins_chi2, 0, last_chi2); + setAxis(hChi2EBGPUvsCPU); + auto hChi2EEGPUvsCPU = + new TH2D("hChi2EEGPUvsCPU", "hChi2EEGPUvsCPU", nbins_chi2, 0, last_chi2, nbins_chi2, 0, last_chi2); + setAxis(hChi2EEGPUvsCPU); + auto hChi2EBdeltavsCPU = + new TH2D("hChi2EBdeltavsCPU", "hChi2EBdeltavsCPU", nbins_chi2, 0, last_chi2, nbins_delta, -delta, delta); + setAxisDelta(hChi2EBdeltavsCPU); + auto hChi2EEdeltavsCPU = + new TH2D("hChi2EEdeltavsCPU", "hChi2EEdeltavsCPU", nbins_chi2, 0, last_chi2, nbins_delta, -delta, delta); + setAxisDelta(hChi2EEdeltavsCPU); + + auto hFlagsEBGPUvsCPU = + new TH2D("hFlagsEBGPUvsCPU", "hFlagsEBGPUvsCPU", nbins_flags, 0, last_flags, nbins_flags, 0, last_flags); + setAxis(hFlagsEBGPUvsCPU); + auto hFlagsEEGPUvsCPU = + new TH2D("hFlagsEEGPUvsCPU", "hFlagsEEGPUvsCPU", nbins_flags, 0, last_flags, nbins_flags, 0, last_flags); + setAxis(hFlagsEEGPUvsCPU); + auto hFlagsEBdeltavsCPU = new TH2D( + "hFlagsEBdeltavsCPU", "hFlagsEBdeltavsCPU", nbins_flags, 0, last_flags, nbins_delta, -delta_flags, delta_flags); + setAxisDelta(hFlagsEBdeltavsCPU); + auto hFlagsEEdeltavsCPU = new TH2D( + "hFlagsEEdeltavsCPU", "hFlagsEEdeltavsCPU", nbins_flags, 0, last_flags, nbins_delta, -delta_flags, delta_flags); + setAxisDelta(hFlagsEEdeltavsCPU); + + auto hRechitsEBGPUvsCPU = new TH2D( + "RechitsEBGPUvsCPU", "RechitsEBGPUvsCPU; CPU; GPU", last_count, 0, last_count, last_count, 0, last_count); + setAxis(hRechitsEBGPUvsCPU); + auto hRechitsEEGPUvsCPU = new TH2D( + "RechitsEEGPUvsCPU", "RechitsEEGPUvsCPU; CPU; GPU", last_count, 0, last_count, last_count, 0, last_count); + setAxis(hRechitsEEGPUvsCPU); + auto hRechitsEBdeltavsCPU = new TH2D( + "RechitsEBdeltavsCPU", "RechitsEBdeltavsCPU", nbins_count, 0, last_count, nbins_count_delta, -delta, delta); + setAxisDelta(hRechitsEBdeltavsCPU); + auto hRechitsEEdeltavsCPU = new TH2D( + "RechitsEEdeltavsCPU", "RechitsEEdeltavsCPU", nbins_count, 0, last_count, nbins_count_delta, -delta, delta); + setAxisDelta(hRechitsEEdeltavsCPU); + + // input + std::cout << "validating file " << fileName << std::endl; + TFile rf{fileName.c_str()}; + TTree *rt = (TTree *)rf.Get("Events"); + rt->SetBranchAddress( + "calocommonCUDAHostAllocatorAliascalocommonVecStoragePolicyecalUncalibratedRecHit_ecalCPUUncalibRecHitProducer_" + "EcalUncalibRecHitsEB_RECO.", + &wgpuEB); + rt->SetBranchAddress( + "calocommonCUDAHostAllocatorAliascalocommonVecStoragePolicyecalUncalibratedRecHit_ecalCPUUncalibRecHitProducer_" + "EcalUncalibRecHitsEE_RECO.", + &wgpuEE); + rt->SetBranchAddress("EcalUncalibratedRecHitsSorted_ecalMultiFitUncalibRecHit_EcalUncalibRecHitsEB_RECO.", &wcpuEB); + rt->SetBranchAddress("EcalUncalibratedRecHitsSorted_ecalMultiFitUncalibRecHit_EcalUncalibRecHitsEE_RECO.", &wcpuEE); + + constexpr float eps_diff = 1e-3; + + // accumulate + auto const nentries = rt->GetEntries(); + std::cout << "#events to validate over: " << nentries << std::endl; + for (int ie = 0; ie < nentries; ++ie) { + rt->GetEntry(ie); + + const char *ordinal[] = {"th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th"}; + auto cpu_eb_size = wcpuEB->bareProduct().size(); + auto cpu_ee_size = wcpuEE->bareProduct().size(); + auto gpu_eb_size = wgpuEB->bareProduct().amplitude.size(); + auto gpu_ee_size = wgpuEE->bareProduct().amplitude.size(); + + float eb_ratio = (float)gpu_eb_size / cpu_eb_size; + float ee_ratio = (float)gpu_ee_size / cpu_ee_size; + + // Filling up the histograms on events sizes for EB and EE on both GPU and CPU + hRechitsEBGPU->Fill(gpu_eb_size); + hRechitsEBCPU->Fill(cpu_eb_size); + hRechitsEEGPU->Fill(gpu_ee_size); + hRechitsEECPU->Fill(cpu_ee_size); + hRechitsEBGPUvsCPU->Fill(cpu_eb_size, gpu_eb_size); + hRechitsEEGPUvsCPU->Fill(cpu_ee_size, gpu_ee_size); + hRechitsEBGPUCPUratio->Fill(eb_ratio); + hRechitsEEGPUCPUratio->Fill(ee_ratio); + hRechitsEBdeltavsCPU->Fill(cpu_eb_size, gpu_eb_size - cpu_eb_size); + hRechitsEEdeltavsCPU->Fill(cpu_ee_size, gpu_ee_size - cpu_ee_size); + + if (cpu_eb_size != gpu_eb_size or cpu_ee_size != gpu_ee_size) { + std::cerr << ie << ordinal[ie % 10] << " entry:\n" + << " EB size: " << std::setw(4) << cpu_eb_size << " (cpu) vs " << std::setw(4) << gpu_eb_size + << " (gpu)\n" + << " EE size: " << std::setw(4) << cpu_ee_size << " (cpu) vs " << std::setw(4) << gpu_ee_size + << " (gpu)" << std::endl; + continue; + } + + assert(wgpuEB->bareProduct().amplitude.size() == wcpuEB->bareProduct().size()); + assert(wgpuEE->bareProduct().amplitude.size() == wcpuEE->bareProduct().size()); + auto const neb = wcpuEB->bareProduct().size(); + auto const nee = wcpuEE->bareProduct().size(); + + for (uint32_t i = 0; i < neb; ++i) { + auto const did_gpu = wgpuEB->bareProduct().did[i]; + auto const soi_amp_gpu = wgpuEB->bareProduct().amplitude[i]; + auto const cpu_iter = wcpuEB->bareProduct().find(DetId{did_gpu}); + if (cpu_iter == wcpuEB->bareProduct().end()) { + std::cerr << ie << ordinal[ie % 10] << " entry\n" + << " Did not find a DetId " << did_gpu << " in a CPU collection\n"; + continue; + } + auto const soi_amp_cpu = cpu_iter->amplitude(); + auto const chi2_gpu = wgpuEB->bareProduct().chi2[i]; + auto const chi2_cpu = cpu_iter->chi2(); + + auto const flags_gpu = wgpuEB->bareProduct().flags[i]; + auto const flags_cpu = cpu_iter->flags(); + + hSOIAmplitudesEBGPU->Fill(soi_amp_gpu); + hSOIAmplitudesEBCPU->Fill(soi_amp_cpu); + hSOIAmplitudesEBGPUvsCPU->Fill(soi_amp_cpu, soi_amp_gpu); + hSOIAmplitudesEBdeltavsCPU->Fill(soi_amp_cpu, soi_amp_gpu - soi_amp_cpu); + if (soi_amp_cpu > 0) + hSOIAmplitudesEBGPUCPUratio->Fill((float)soi_amp_gpu / soi_amp_cpu); + + hChi2EBGPU->Fill(chi2_gpu); + hChi2EBCPU->Fill(chi2_cpu); + hChi2EBGPUvsCPU->Fill(chi2_cpu, chi2_gpu); + hChi2EBdeltavsCPU->Fill(chi2_cpu, chi2_gpu - chi2_cpu); + if (chi2_cpu > 0) + hChi2EBGPUCPUratio->Fill((float)chi2_gpu / chi2_cpu); + + if (std::abs(chi2_gpu / chi2_cpu - 1) > 0.05 || std::abs(soi_amp_gpu / soi_amp_cpu - 1) > 0.05) { + std::cout << " ---- EB " << std::endl; + std::cout << " eventid = " << ie << " xtal = " << i << std::endl; + std::cout << " chi2_gpu = " << chi2_gpu << " chi2_cpu = " << chi2_cpu << std::endl; + std::cout << " soi_amp_gpu = " << soi_amp_gpu << " soi_amp_cpu = " << soi_amp_cpu << std::endl; + std::cout << " flags_gpu = " << flags_gpu << " flags_cpu = " << flags_cpu << std::endl; + } + + hFlagsEBGPU->Fill(flags_gpu); + hFlagsEBCPU->Fill(flags_cpu); + hFlagsEBGPUvsCPU->Fill(flags_cpu, flags_gpu); + hFlagsEBdeltavsCPU->Fill(flags_cpu, flags_gpu - flags_cpu); + if (flags_cpu > 0) + hFlagsEBGPUCPUratio->Fill((float)flags_gpu / flags_cpu); + + if (flags_cpu != flags_gpu) { + std::cout << " >> No! Different flag cpu:gpu = " << flags_cpu << " : " << flags_gpu; + std::cout << std::endl; + } + + if ((std::abs(soi_amp_gpu - soi_amp_cpu) >= eps_diff) or (std::abs(chi2_gpu - chi2_cpu) >= eps_diff) or + std::isnan(chi2_gpu) or (flags_cpu != flags_gpu)) { + printf("EB eventid = %d chid = %d amp_gpu = %f amp_cpu %f chi2_gpu = %f chi2_cpu = %f\n", + ie, + i, + soi_amp_gpu, + soi_amp_cpu, + chi2_gpu, + chi2_cpu); + if (std::isnan(chi2_gpu)) + printf("*** nan ***\n"); + } + } + + for (uint32_t i = 0; i < nee; ++i) { + auto const did_gpu = wgpuEE->bareProduct().did[i]; + auto const soi_amp_gpu = wgpuEE->bareProduct().amplitude[i]; + auto const cpu_iter = wcpuEE->bareProduct().find(DetId{did_gpu}); + if (cpu_iter == wcpuEE->bareProduct().end()) { + std::cerr << ie << ordinal[ie % 10] << " entry\n" + << " did not find a DetId " << did_gpu << " in a CPU collection\n"; + continue; + } + auto const soi_amp_cpu = cpu_iter->amplitude(); + auto const chi2_gpu = wgpuEE->bareProduct().chi2[i]; + auto const chi2_cpu = cpu_iter->chi2(); + + auto const flags_gpu = wgpuEE->bareProduct().flags[i]; + auto const flags_cpu = cpu_iter->flags(); + + hSOIAmplitudesEEGPU->Fill(soi_amp_gpu); + hSOIAmplitudesEECPU->Fill(soi_amp_cpu); + hSOIAmplitudesEEGPUvsCPU->Fill(soi_amp_cpu, soi_amp_gpu); + hSOIAmplitudesEEdeltavsCPU->Fill(soi_amp_cpu, soi_amp_gpu - soi_amp_cpu); + if (soi_amp_cpu > 0) + hSOIAmplitudesEEGPUCPUratio->Fill((float)soi_amp_gpu / soi_amp_cpu); + + hChi2EEGPU->Fill(chi2_gpu); + hChi2EECPU->Fill(chi2_cpu); + hChi2EEGPUvsCPU->Fill(chi2_cpu, chi2_gpu); + hChi2EEdeltavsCPU->Fill(chi2_cpu, chi2_gpu - chi2_cpu); + if (chi2_cpu > 0) + hChi2EEGPUCPUratio->Fill((float)chi2_gpu / chi2_cpu); + + if (std::abs(chi2_gpu / chi2_cpu - 1) > 0.05 || std::abs(soi_amp_gpu / soi_amp_cpu - 1) > 0.05) { + std::cout << " ---- EE " << std::endl; + std::cout << " eventid = " << ie << " xtal = " << i << std::endl; + std::cout << " chi2_gpu = " << chi2_gpu << " chi2_cpu = " << chi2_cpu << std::endl; + std::cout << " soi_amp_gpu = " << soi_amp_gpu << " soi_amp_cpu = " << soi_amp_cpu << std::endl; + std::cout << " flags_gpu = " << flags_gpu << " flags_cpu = " << flags_cpu << std::endl; + } + + hFlagsEEGPU->Fill(flags_gpu); + hFlagsEECPU->Fill(flags_cpu); + hFlagsEEGPUvsCPU->Fill(flags_cpu, flags_gpu); + hFlagsEEdeltavsCPU->Fill(flags_cpu, flags_gpu - flags_cpu); + if (flags_cpu > 0) + hFlagsEEGPUCPUratio->Fill((float)flags_gpu / flags_cpu); + + if (flags_cpu != flags_gpu) { + std::cout << " >> No! Different flag cpu:gpu = " << flags_cpu << " : " << flags_gpu; + std::cout << std::endl; + } + + if ((std::abs(soi_amp_gpu - soi_amp_cpu) >= eps_diff) or (std::abs(chi2_gpu - chi2_cpu) >= eps_diff) or + std::isnan(chi2_gpu) or (flags_cpu != flags_gpu)) { + printf("EE eventid = %d chid = %d amp_gpu = %f amp_cpu %f chi2_gpu = %f chi2_cpu = %f\n", + ie, + static_cast(neb + i), + soi_amp_gpu, + soi_amp_cpu, + chi2_gpu, + chi2_cpu); + if (std::isnan(chi2_gpu)) + printf("*** nan ***\n"); + } + } + } + + { + TCanvas c("plots", "plots", 1750, 860); + c.Divide(3, 2); + + c.cd(1); + { + gPad->SetLogy(); + hSOIAmplitudesEBCPU->SetLineColor(kBlack); + hSOIAmplitudesEBCPU->SetLineWidth(1.); + hSOIAmplitudesEBCPU->Draw(""); + hSOIAmplitudesEBGPU->SetLineColor(kBlue); + hSOIAmplitudesEBGPU->SetLineWidth(1.); + hSOIAmplitudesEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hSOIAmplitudesEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(4); + { + gPad->SetLogy(); + hSOIAmplitudesEECPU->SetLineColor(kBlack); + hSOIAmplitudesEECPU->SetLineWidth(1.); + hSOIAmplitudesEECPU->Draw(""); + hSOIAmplitudesEEGPU->SetLineColor(kBlue); + hSOIAmplitudesEEGPU->SetLineWidth(1.); + hSOIAmplitudesEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hSOIAmplitudesEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(2); + gPad->SetGrid(); + hSOIAmplitudesEBGPUvsCPU->Draw("COLZ"); + + c.cd(5); + gPad->SetGrid(); + hSOIAmplitudesEEGPUvsCPU->Draw("COLZ"); + + c.cd(3); + + hSOIAmplitudesEBGPUCPUratio->Draw(""); + + c.cd(6); + + hSOIAmplitudesEEGPUCPUratio->Draw(""); + + c.SaveAs("ecal-amplitudes.root"); + c.SaveAs("ecal-amplitudes.png"); + + // chi2 + + c.cd(1); + { + gPad->SetLogy(); + hChi2EBCPU->SetLineColor(kBlack); + hChi2EBCPU->SetLineWidth(1.); + hChi2EBCPU->Draw(""); + hChi2EBGPU->SetLineColor(kBlue); + hChi2EBGPU->SetLineWidth(1.); + hChi2EBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hChi2EBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(4); + { + gPad->SetLogy(); + hChi2EECPU->SetLineColor(kBlack); + hChi2EECPU->SetLineWidth(1.); + hChi2EECPU->Draw(""); + hChi2EEGPU->SetLineColor(kBlue); + hChi2EEGPU->SetLineWidth(1.); + hChi2EEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hChi2EEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(2); + gPad->SetGrid(); + hChi2EBGPUvsCPU->Draw("COLZ"); + + c.cd(5); + gPad->SetGrid(); + hChi2EEGPUvsCPU->Draw("COLZ"); + + c.cd(3); + + hChi2EBGPUCPUratio->Draw(""); + + c.cd(6); + + hChi2EEGPUCPUratio->Draw(""); + + c.SaveAs("ecal-chi2.root"); + c.SaveAs("ecal-chi2.png"); + + // flags + + c.cd(1); + { + gPad->SetLogy(); + hFlagsEBCPU->SetLineColor(kBlack); + hFlagsEBCPU->SetLineWidth(1.); + hFlagsEBCPU->Draw(""); + hFlagsEBGPU->SetLineColor(kBlue); + hFlagsEBGPU->SetLineWidth(1.); + hFlagsEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hFlagsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(4); + { + gPad->SetLogy(); + hFlagsEECPU->SetLineColor(kBlack); + hFlagsEECPU->SetLineWidth(1.); + hFlagsEECPU->Draw(""); + hFlagsEEGPU->SetLineColor(kBlue); + hFlagsEEGPU->SetLineWidth(1.); + hFlagsEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hFlagsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + + c.cd(2); + gPad->SetGrid(); + hFlagsEBGPUvsCPU->Draw("COLZ"); + + c.cd(5); + gPad->SetGrid(); + hFlagsEEGPUvsCPU->Draw("COLZ"); + + c.cd(3); + hFlagsEBGPUCPUratio->Draw(""); + + c.cd(6); + hFlagsEEGPUCPUratio->Draw(""); + + c.SaveAs("ecal-flags.root"); + c.SaveAs("ecal-flags.png"); + + TCanvas cRechits("Rechits", "Rechits", 1750, 860); + cRechits.Divide(3, 2); + + // Plotting the sizes of GPU vs CPU for each event of EB + cRechits.cd(1); + { + gPad->SetLogy(); + hRechitsEBCPU->SetLineColor(kRed); + hRechitsEBCPU->SetLineWidth(2); + hRechitsEBCPU->Draw(""); + hRechitsEBGPU->SetLineColor(kBlue); + hRechitsEBGPU->SetLineWidth(2); + hRechitsEBGPU->Draw("sames"); + cRechits.Update(); + auto stats = (TPaveStats *)hRechitsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechits.cd(4); + { + gPad->SetLogy(); + hRechitsEECPU->SetLineColor(kRed); + hRechitsEECPU->SetLineWidth(2); + hRechitsEECPU->Draw(""); + hRechitsEEGPU->SetLineColor(kBlue); + hRechitsEEGPU->SetLineWidth(2); + hRechitsEEGPU->Draw("sames"); + cRechits.Update(); + auto stats = (TPaveStats *)hRechitsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechits.cd(2); + { hRechitsEBGPUvsCPU->Draw("COLZ"); } + cRechits.cd(5); + { hRechitsEEGPUvsCPU->Draw("COLZ"); } + cRechits.cd(3); + { + gPad->SetLogy(); + hRechitsEBGPUCPUratio->Draw(""); + } + cRechits.cd(6); + { + gPad->SetLogy(); + hRechitsEEGPUCPUratio->Draw(""); + } + cRechits.SaveAs("ecal-rechits.root"); + cRechits.SaveAs("ecal-rechits.png"); + } + + rf.Close(); + rfout.Write(); + rfout.Close(); + + return 0; +} diff --git a/RecoLocalCalo/EcalRecAlgos/bin/makeEcalRechitValidationPlots.cpp b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalRechitValidationPlots.cpp new file mode 100644 index 0000000000000..42d1fceaf8b76 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalRechitValidationPlots.cpp @@ -0,0 +1,864 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "DataFormats/Common/interface/Wrapper.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" + +int main(int argc, char *argv[]) { + if (argc < 3) { + std::cout << "run with: ./makeEcalRechitValidationPlots \n"; + exit(0); + } + // Set the GPU and CPU pointers for both EB and EE + edm::Wrapper>> *wgpuEB = nullptr; + edm::Wrapper>> *wgpuEE = nullptr; + edm::Wrapper *wcpuEB = nullptr; + edm::Wrapper *wcpuEE = nullptr; + + std::string fileName = argv[1]; // The input file containing the data to be validated (i.e. result.root) + std::string outFileName = argv[2]; //The output file in which the validation results will be saved (i.e. output.root) + + //output + TFile rfout{outFileName.c_str(), "recreate"}; + + int nbins = 200; + int last = 5000.; + + int nbins_energy = 300; + float last_energy = 2.; + + int nbins_chi2 = 200; + float last_chi2 = 100.; + + int nbins_flag = 40; + // int nbins_flag = 1000; + int last_flag = 1500; + // int nbins_flag = 40; + // int last_flag = 10000; + + int nbins_extra = 200; + int last_extra = 200; + + int nbins_delta = 201; // use an odd number to center around 0 + float delta = 0.2; + + // RecHits plots for EB and EE on both GPU and CPU + auto hRechitsEBGPU = new TH1D("RechitsEBGPU", "RechitsEBGPU; No. of Rechits. No Filter GPU", nbins, 0, last); + auto hRechitsEBCPU = new TH1D("RechitsEBCPU", "RechitsEBCPU; No. of Rechits. No Filter GPU", nbins, 0, last); + auto hRechitsEEGPU = new TH1D("RechitsEEGPU", "RechitsEEGPU; No. of Rechits. No Filter GPU", nbins, 0, last); + auto hRechitsEECPU = new TH1D("RechitsEECPU", "RechitsEECPU; No. of Rechits. No Filter GPU", nbins, 0, last); + auto hRechitsEBGPUvsCPU = + new TH2D("RechitsEBGPUvsCPU", "RechitsEBGPUvsCPU; CPU; GPU. No Filter GPU", last, 0, last, last, 0, last); + auto hRechitsEEGPUvsCPU = + new TH2D("RechitsEEGPUvsCPU", "RechitsEEGPUvsCPU; CPU; GPU. No Filter GPU", last, 0, last, last, 0, last); + auto hRechitsEBGPUCPUratio = + new TH1D("RechitsEBGPU/CPUratio", "RechitsEBGPU/CPUratio; GPU/CPU. No Filter GPU", 200, 0.95, 1.05); + auto hRechitsEEGPUCPUratio = + new TH1D("RechitsEEGPU/CPUratio", "RechitsEEGPU/CPUratio; GPU/CPU. No Filter GPU", 200, 0.95, 1.05); + auto hRechitsEBdeltavsCPU = + new TH2D("RechitsEBdeltavsCPU", "RechitsEBdeltavsCPU. No Filter GPU", nbins, 0, last, nbins_delta, -delta, delta); + auto hRechitsEEdeltavsCPU = + new TH2D("RechitsEEdeltavsCPU", "RechitsEEdeltavsCPU. No Filter GPU", nbins, 0, last, nbins_delta, -delta, delta); + + // RecHits plots for EB and EE on both GPU and CPU + auto hSelectedRechitsEBGPU = new TH1D("RechitsEBGPU", "RechitsEBGPU; No. of Rechits", nbins, 0, last); + auto hSelectedRechitsEBCPU = new TH1D("RechitsEBCPU", "RechitsEBCPU; No. of Rechits", nbins, 0, last); + auto hSelectedRechitsEEGPU = new TH1D("RechitsEEGPU", "RechitsEEGPU; No. of Rechits", nbins, 0, last); + auto hSelectedRechitsEECPU = new TH1D("RechitsEECPU", "RechitsEECPU; No. of Rechits", nbins, 0, last); + auto hSelectedRechitsEBGPUvsCPU = + new TH2D("RechitsEBGPUvsCPU", "RechitsEBGPUvsCPU; CPU; GPU", last, 0, last, last, 0, last); + auto hSelectedRechitsEEGPUvsCPU = + new TH2D("RechitsEEGPUvsCPU", "RechitsEEGPUvsCPU; CPU; GPU", last, 0, last, last, 0, last); + auto hSelectedRechitsEBGPUCPUratio = + new TH1D("RechitsEBGPU/CPUratio", "RechitsEBGPU/CPUratio; GPU/CPU", 200, 0.95, 1.05); + auto hSelectedRechitsEEGPUCPUratio = + new TH1D("RechitsEEGPU/CPUratio", "RechitsEEGPU/CPUratio; GPU/CPU", 200, 0.95, 1.05); + auto hSelectedRechitsEBdeltavsCPU = + new TH2D("RechitsEBdeltavsCPU", "RechitsEBdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + auto hSelectedRechitsEEdeltavsCPU = + new TH2D("RechitsEEdeltavsCPU", "RechitsEEdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + + // RecHits plots for EB and EE on both GPU and CPU + auto hPositiveRechitsEBGPU = new TH1D("RechitsEBGPU", "RechitsEBGPU; No. of Rechits", nbins, 0, last); + auto hPositiveRechitsEBCPU = new TH1D("RechitsEBCPU", "RechitsEBCPU; No. of Rechits", nbins, 0, last); + auto hPositiveRechitsEEGPU = new TH1D("RechitsEEGPU", "RechitsEEGPU; No. of Rechits", nbins, 0, last); + auto hPositiveRechitsEECPU = new TH1D("RechitsEECPU", "RechitsEECPU; No. of Rechits", nbins, 0, last); + auto hPositiveRechitsEBGPUvsCPU = + new TH2D("RechitsEBGPUvsCPU", "RechitsEBGPUvsCPU; CPU; GPU", last, 0, last, last, 0, last); + auto hPositiveRechitsEEGPUvsCPU = + new TH2D("RechitsEEGPUvsCPU", "RechitsEEGPUvsCPU; CPU; GPU", last, 0, last, last, 0, last); + auto hPositiveRechitsEBGPUCPUratio = + new TH1D("RechitsEBGPU/CPUratio", "RechitsEBGPU/CPUratio; GPU/CPU", 200, 0.95, 1.05); + auto hPositiveRechitsEEGPUCPUratio = + new TH1D("RechitsEEGPU/CPUratio", "RechitsEEGPU/CPUratio; GPU/CPU", 200, 0.95, 1.05); + auto hPositiveRechitsEBdeltavsCPU = + new TH2D("RechitsEBdeltavsCPU", "RechitsEBdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + auto hPositiveRechitsEEdeltavsCPU = + new TH2D("RechitsEEdeltavsCPU", "RechitsEEdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + + // Energies plots for EB and EE on both GPU and CPU + auto hEnergiesEBGPU = new TH1D("EnergiesEBGPU", "EnergiesEBGPU; Energy [GeV]", nbins_energy, 0, last_energy); + auto hEnergiesEEGPU = new TH1D("EnergiesEEGPU", "EnergiesEEGPU; Energy [GeV]", nbins_energy, 0, last_energy); + auto hEnergiesEBCPU = new TH1D("EnergiesEBCPU", "EnergiesEBCPU; Energy [GeV]", nbins_energy, 0, last_energy); + auto hEnergiesEECPU = new TH1D("EnergiesEECPU", "EnergiesEECPU; Energy [GeV]", nbins_energy, 0, last_energy); + auto hEnergiesEBGPUvsCPU = new TH2D( + "EnergiesEBGPUvsCPU", "EnergiesEBGPUvsCPU; CPU; GPU", nbins_energy, 0, last_energy, nbins_energy, 0, last_energy); + auto hEnergiesEEGPUvsCPU = new TH2D( + "EnergiesEEGPUvsCPU", "EnergiesEEGPUvsCPU; CPU; GPU", nbins_energy, 0, last_energy, nbins_energy, 0, last_energy); + auto hEnergiesEBGPUCPUratio = new TH1D("EnergiesEBGPU/CPUratio", "EnergiesEBGPU/CPUratio; GPU/CPU", 100, 0.8, 1.2); + auto hEnergiesEEGPUCPUratio = new TH1D("EnergiesEEGPU/CPUratio", "EnergiesEEGPU/CPUratio; GPU/CPU", 100, 0.8, 1.2); + auto hEnergiesEBdeltavsCPU = + new TH2D("EnergiesEBdeltavsCPU", "EnergiesEBdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + auto hEnergiesEEdeltavsCPU = + new TH2D("EnergiesEEdeltavsCPU", "EnergiesEEdeltavsCPU", nbins, 0, last, nbins_delta, -delta, delta); + + // Chi2 plots for EB and EE on both GPU and CPU + auto hChi2EBGPU = new TH1D("Chi2EBGPU", "Chi2EBGPU; Ch^{2}", nbins_chi2, 0, last_chi2); + auto hChi2EEGPU = new TH1D("Chi2EEGPU", "Chi2EEGPU; Ch^{2}", nbins_chi2, 0, last_chi2); + auto hChi2EBCPU = new TH1D("Chi2EBCPU", "Chi2EBCPU; Ch^{2}", nbins_chi2, 0, last_chi2); + auto hChi2EECPU = new TH1D("Chi2EECPU", "Chi2EECPU; Ch^{2}", nbins_chi2, 0, last_chi2); + auto hChi2EBGPUvsCPU = new TH2D("Chi2EBGPUvsCPU", "Chi2EBGPUvsCPU; CPU; GPU", nbins_chi2, 0, 100, nbins_chi2, 0, 100); + auto hChi2EEGPUvsCPU = new TH2D("Chi2EEGPUvsCPU", "Chi2EEGPUvsCPU; CPU; GPU", nbins_chi2, 0, 100, nbins_chi2, 0, 100); + auto hChi2EBGPUCPUratio = new TH1D("Chi2EBGPU/CPUratio", "Chi2EBGPU/CPUratio; GPU/CPU", 100, 0.8, 1.2); + auto hChi2EEGPUCPUratio = new TH1D("Chi2EEGPU/CPUratio", "Chi2EEGPU/CPUratio; GPU/CPU", 100, 0.8, 1.2); + auto hChi2EBdeltavsCPU = + new TH2D("Chi2EBdeltavsCPU", "Chi2EBdeltavsCPU", nbins_chi2, 0, last_chi2, nbins_delta, -delta, delta); + auto hChi2EEdeltavsCPU = + new TH2D("Chi2EEdeltavsCPU", "Chi2EEdeltavsCPU", nbins_chi2, 0, last_chi2, nbins_delta, -delta, delta); + + // Flags plots for EB and EE on both GPU and CPU + auto hFlagsEBGPU = new TH1D("FlagsEBGPU", "FlagsEBGPU; Flags", nbins_flag, -10, last_flag); + auto hFlagsEBCPU = new TH1D("FlagsEBCPU", "FlagsEBCPU; Flags", nbins_flag, -10, last_flag); + auto hFlagsEEGPU = new TH1D("FlagsEEGPU", "FlagsEEGPU; Flags", nbins_flag, -10, last_flag); + auto hFlagsEECPU = new TH1D("FlagsEECPU", "FlagsEECPU; Flags", nbins_flag, -10, last_flag); + auto hFlagsEBGPUvsCPU = + new TH2D("FlagsEBGPUvsCPU", "FlagsEBGPUvsCPU; CPU; GPU", nbins_flag, -10, last_flag, nbins_flag, -10, last_flag); + auto hFlagsEEGPUvsCPU = + new TH2D("FlagsEEGPUvsCPU", "FlagsEEGPUvsCPU; CPU; GPU", nbins_flag, -10, last_flag, nbins_flag, -10, last_flag); + auto hFlagsEBGPUCPUratio = new TH1D("FlagsEBGPU/CPUratio", "FlagsEBGPU/CPUratio; GPU/CPU", 50, -5, 10); + auto hFlagsEEGPUCPUratio = new TH1D("FlagsEEGPU/CPUratio", "FlagsEEGPU/CPUratio; GPU/CPU", 50, -5, 10); + auto hFlagsEBdeltavsCPU = + new TH2D("FlagsEBdeltavsCPU", "FlagsEBdeltavsCPU", nbins_flag, -10, last_flag, nbins_delta, -delta, delta); + auto hFlagsEEdeltavsCPU = + new TH2D("FlagsEEdeltavsCPU", "FlagsEEdeltavsCPU", nbins_flag, -10, last_flag, nbins_delta, -delta, delta); + + // Extras plots for EB and EE on both GPU and CPU + auto hExtrasEBGPU = new TH1D("ExtrasEBGPU", "ExtrasEBGPU; No. of Extras", nbins_extra, 0, last_extra); + auto hExtrasEBCPU = new TH1D("ExtrasEBCPU", "ExtrasEBCPU; No. of Extras", nbins_extra, 0, last_extra); + auto hExtrasEEGPU = new TH1D("ExtrasEEGPU", "ExtrasEEGPU; No. of Extras", nbins_extra, 0, last_extra); + auto hExtrasEECPU = new TH1D("ExtrasEECPU", "ExtrasEECPU; No. of Extras", nbins_extra, 0, last_extra); + auto hExtrasEBGPUvsCPU = new TH2D( + "ExtrasEBGPUvsCPU", "ExtrasEBGPUvsCPU; CPU; GPU", nbins_extra, 0, last_extra, nbins_extra, 0, last_extra); + auto hExtrasEEGPUvsCPU = new TH2D( + "ExtrasEEGPUvsCPU", "ExtrasEEGPUvsCPU; CPU; GPU", nbins_extra, 0, last_extra, nbins_extra, 0, last_extra); + auto hExtrasEBGPUCPUratio = new TH1D("ExtrasEBGPU/CPUratio", "ExtrasEBGPU/CPUratio; GPU/CPU", 50, 0.0, 2.0); + auto hExtrasEEGPUCPUratio = new TH1D("ExtrasEEGPU/CPUratio", "ExtrasEEGPU/CPUratio; GPU/CPU", 50, 0.0, 2.0); + auto hExtrasEBdeltavsCPU = + new TH2D("ExtrasEBdeltavsCPU", "ExtrasEBdeltavsCPU", nbins_extra, 0, last_extra, nbins_delta, -delta, delta); + auto hExtrasEEdeltavsCPU = + new TH2D("ExtrasEEdeltavsCPU", "ExtrasEEdeltavsCPU", nbins_extra, 0, last_extra, nbins_delta, -delta, delta); + + // input file setup for tree + std::cout << "validating file " << fileName << std::endl; + TFile rf{fileName.c_str()}; + TTree *rt = (TTree *)rf.Get("Events"); + + // Allocating the appropriate data to their respective pointers + rt->SetBranchAddress("ecalTagsoaecalRecHit_ecalCPURecHitProducer_EcalRecHitsEB_RECO.", &wgpuEB); + rt->SetBranchAddress("ecalTagsoaecalRecHit_ecalCPURecHitProducer_EcalRecHitsEE_RECO.", &wgpuEE); + rt->SetBranchAddress("EcalRecHitsSorted_ecalRecHit_EcalRecHitsEB_RECO.", &wcpuEB); + rt->SetBranchAddress("EcalRecHitsSorted_ecalRecHit_EcalRecHitsEE_RECO.", &wcpuEE); + + // constexpr float eps_diff = 1e-3; + + // accumulate sizes for events and sizes of each event on both GPU and CPU + // auto const nentries = rt->GetEntries(); + int nentries = rt->GetEntries(); + + //---- AM: tests + if (nentries > 1000) { + nentries = 1000; + } + // nentries = 1; + + std::cout << "#events to validate over: " << nentries << std::endl; + for (int ie = 0; ie < nentries; ++ie) { + rt->GetEntry(ie); + + // const char* ordinal[] = { "th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th" }; + auto cpu_eb_size = wcpuEB->bareProduct().size(); + auto cpu_ee_size = wcpuEE->bareProduct().size(); + auto gpu_eb_size = wgpuEB->bareProduct().energy.size(); + auto gpu_ee_size = wgpuEE->bareProduct().energy.size(); + float eb_ratio = (float)gpu_eb_size / cpu_eb_size; + float ee_ratio = (float)gpu_ee_size / cpu_ee_size; + + // Filling up the histograms on events sizes for EB and EE on both GPU and CPU + hRechitsEBGPU->Fill(gpu_eb_size); + hRechitsEBCPU->Fill(cpu_eb_size); + hRechitsEEGPU->Fill(gpu_ee_size); + hRechitsEECPU->Fill(cpu_ee_size); + hRechitsEBGPUvsCPU->Fill(cpu_eb_size, gpu_eb_size); + hRechitsEEGPUvsCPU->Fill(cpu_ee_size, gpu_ee_size); + hRechitsEBGPUCPUratio->Fill(eb_ratio); + hRechitsEEGPUCPUratio->Fill(ee_ratio); + hRechitsEBdeltavsCPU->Fill(cpu_eb_size, gpu_eb_size - cpu_eb_size); + hRechitsEEdeltavsCPU->Fill(cpu_ee_size, gpu_ee_size - cpu_ee_size); + + /* + * // condition that sizes on GPU and CPU should be the same for EB or EE + * if (cpu_eb_size != gpu_eb_size or cpu_ee_size != gpu_ee_size) { + * std::cerr << ie << ordinal[ie % 10] << " entry:\n" + * << " EB size: " << std::setw(4) << cpu_eb_size << " (cpu) vs " << std::setw(4) << gpu_eb_size << " (gpu)\n" + * << " EE size: " << std::setw(4) << cpu_ee_size << " (cpu) vs " << std::setw(4) << gpu_ee_size << " (gpu)" << std::endl; + * + * continue; + } + assert(wgpuEB->bareProduct().energy.size() == wcpuEB->bareProduct().size()); + assert(wgpuEE->bareProduct().energy.size() == wcpuEE->bareProduct().size()); + auto const neb = wcpuEB->bareProduct().size(); //like cpu_eb_size but set to constant + auto const nee = wcpuEE->bareProduct().size(); //like cpu_ee_size but set to constant + */ + + uint selected_gpu_eb_size = 0; + uint selected_gpu_ee_size = 0; + + uint positive_gpu_eb_size = 0; + uint positive_gpu_ee_size = 0; + + // EB: + for (uint32_t i = 0; i < gpu_eb_size; ++i) { + auto const did_gpu = wgpuEB->bareProduct().did[i]; // set the did for the current RecHit + // Set the variables for GPU + auto const enr_gpu = wgpuEB->bareProduct().energy[i]; + auto const chi2_gpu = wgpuEB->bareProduct().chi2[i]; + auto const flag_gpu = wgpuEB->bareProduct().flagBits[i]; + auto const extra_gpu = wgpuEB->bareProduct().extra[i]; + + // you have "-1" if the crystal is not selected + if (enr_gpu >= 0) { + selected_gpu_eb_size++; + + if (enr_gpu > 0) { + positive_gpu_eb_size++; + } + + // find the Rechit on CPU reflecting the same did + auto const cpu_iter = wcpuEB->bareProduct().find(DetId{did_gpu}); + if (cpu_iter == wcpuEB->bareProduct().end()) { + // std::cerr << ie << ordinal[ie % 10] << " entry\n" + // << " Did not find a DetId " << did_gpu_eb + // << " in a CPU collection\n"; + std::cerr << " Did not find a DetId " << did_gpu << " in a CPU collection\n"; + continue; + } + // Set the variables for CPU + auto const enr_cpu = cpu_iter->energy(); + auto const chi2_cpu = cpu_iter->chi2(); + // auto const flag_cpu = cpu_iter->flagBits(); + auto const flag_cpu = 1; + // auto const extra_cpu = cpu_iter->extra(); + auto const extra_cpu = 1; + // auto const flag_cpu = cpu_iter->flagBits() ? cpu_iter->flagBits():-1; + // auto const extra_cpu = cpu_iter->extra() ? cpu_iter->extra():-1; + + // AM: TEST + // if (extra_cpu != 10) continue; + + // Fill the energy and Chi2 histograms for GPU and CPU and their comparisons with delta + hEnergiesEBGPU->Fill(enr_gpu); + hEnergiesEBCPU->Fill(enr_cpu); + // std::cout<<"EB CPU Energy:\t"<Fill(enr_cpu, enr_gpu); + hEnergiesEBGPUCPUratio->Fill(enr_gpu / enr_cpu); + hEnergiesEBdeltavsCPU->Fill(enr_cpu, enr_gpu - enr_cpu); + + hChi2EBGPU->Fill(chi2_gpu); + hChi2EBCPU->Fill(chi2_cpu); + hChi2EBGPUvsCPU->Fill(chi2_cpu, chi2_gpu); + hChi2EBGPUCPUratio->Fill(chi2_gpu / chi2_cpu); + hChi2EBdeltavsCPU->Fill(chi2_cpu, chi2_gpu - chi2_cpu); + + hFlagsEBGPU->Fill(flag_gpu); + hFlagsEBCPU->Fill(flag_cpu); + hFlagsEBGPUvsCPU->Fill(flag_cpu, flag_gpu); + hFlagsEBGPUCPUratio->Fill(flag_cpu ? flag_gpu / flag_cpu : -1); + hFlagsEBdeltavsCPU->Fill(flag_cpu, flag_gpu - flag_cpu); + + hExtrasEBGPU->Fill(extra_gpu); + hExtrasEBCPU->Fill(extra_cpu); + hExtrasEBGPUvsCPU->Fill(extra_cpu, extra_gpu); + hExtrasEBGPUCPUratio->Fill(extra_cpu ? extra_gpu / extra_cpu : -1); + hExtrasEBdeltavsCPU->Fill(extra_cpu, extra_gpu - extra_cpu); + + // Check if abs difference between GPU and CPU values for energy and Chi2 are smaller than eps, if not print message + // if ((std::abs(enr_gpu - enr_cpu) >= eps_diff) or + // (std::abs(chi2_gpu - chi2_cpu) >= eps_diff) or std::isnan(chi2_gpu)) + // { + // printf("EB eventid = %d chid = %d energy_gpu = %f energy_cpu %f chi2_gpu = %f chi2_cpu = %f\n", + // ie, i, enr_gpu, enr_cpu, chi2_gpu, chi2_cpu); + // if (std::isnan(chi2_gpu)) + // printf("*** nan ***\n"); + // } + } + } + + // EE: + for (uint32_t i = 0; i < gpu_ee_size; ++i) { + auto const did_gpu = wgpuEE->bareProduct().did[i]; // set the did for the current RecHit + // Set the variables for GPU + auto const enr_gpu = wgpuEE->bareProduct().energy[i]; + auto const chi2_gpu = wgpuEE->bareProduct().chi2[i]; + auto const flag_gpu = wgpuEE->bareProduct().flagBits[i]; + auto const extra_gpu = wgpuEE->bareProduct().extra[i]; + + // you have "-1" if the crystal is not selected + if (enr_gpu >= 0) { + selected_gpu_ee_size++; + + if (enr_gpu > 0) { + positive_gpu_ee_size++; + } + + // find the Rechit on CPU reflecting the same did + auto const cpu_iter = wcpuEE->bareProduct().find(DetId{did_gpu}); + if (cpu_iter == wcpuEE->bareProduct().end()) { + // std::cerr << ie << ordinal[ie % 10] << " entry\n" + // << " Did not find a DetId " << did_gpu + // << " in a CPU collection\n"; + std::cerr << " Did not find a DetId " << did_gpu << " in a CPU collection\n"; + continue; + } + // Set the variables for CPU + auto const enr_cpu = cpu_iter->energy(); + auto const chi2_cpu = cpu_iter->chi2(); + // auto const flag_cpu = cpu_iter->flagBits(); + auto const flag_cpu = 1; + // auto const extra_cpu = cpu_iter->extra(); + auto const extra_cpu = 1; + // auto const flag_cpu = cpu_iter->flagBits()?cpu_iter->flagBits():-1; + // auto const extra_cpu = cpu_iter->extra()?cpu_iter->extra():-1; + + // AM: TEST + // if (extra_cpu != 10) continue; + + // Fill the energy and Chi2 histograms for GPU and CPU and their comparisons with delta + hEnergiesEEGPU->Fill(enr_gpu); + hEnergiesEECPU->Fill(enr_cpu); + hEnergiesEEGPUvsCPU->Fill(enr_cpu, enr_gpu); + hEnergiesEEGPUCPUratio->Fill(enr_gpu / enr_cpu); + hEnergiesEEdeltavsCPU->Fill(enr_cpu, enr_gpu - enr_cpu); + + hChi2EEGPU->Fill(chi2_gpu); + hChi2EECPU->Fill(chi2_cpu); + hChi2EEGPUvsCPU->Fill(chi2_cpu, chi2_gpu); + hChi2EEGPUCPUratio->Fill(chi2_gpu / chi2_cpu); + hChi2EEdeltavsCPU->Fill(chi2_cpu, chi2_gpu - chi2_cpu); + + hFlagsEEGPU->Fill(flag_gpu); + hFlagsEECPU->Fill(flag_cpu); + hFlagsEEGPUvsCPU->Fill(flag_cpu, flag_gpu); + hFlagsEEGPUCPUratio->Fill(flag_cpu ? flag_gpu / flag_cpu : -1); + hFlagsEEdeltavsCPU->Fill(flag_cpu, flag_gpu - flag_cpu); + + hExtrasEEGPU->Fill(extra_gpu); + hExtrasEECPU->Fill(extra_cpu); + hExtrasEEGPUvsCPU->Fill(extra_cpu, extra_gpu); + hExtrasEEGPUCPUratio->Fill(extra_cpu ? extra_gpu / extra_cpu : -1); + hExtrasEEdeltavsCPU->Fill(extra_cpu, extra_gpu - extra_cpu); + + // Check if abs difference between GPU and CPU values for energy and Chi2 are smaller than eps, if not print message + // if ((std::abs(enr_gpu - enr_cpu) >= eps_diff) or + // (std::abs(chi2_gpu - chi2_cpu) >= eps_diff) or std::isnan(chi2_gpu)) + // { + // printf("EE eventid = %d chid = %d energy_gpu = %f energy_cpu %f chi2_gpu = %f chi2_cpu = %f\n", + // ie, i, enr_gpu, enr_cpu, chi2_gpu, chi2_cpu); + // if (std::isnan(chi2_gpu)) + // printf("*** nan ***\n"); + // } + } + } + + // + // now the rechit counting + // + float selected_eb_ratio = (float)selected_gpu_eb_size / cpu_eb_size; + float selected_ee_ratio = (float)selected_gpu_ee_size / cpu_ee_size; + + // Filling up the histograms on events sizes for EB and EE on both GPU and CPU + hSelectedRechitsEBGPU->Fill(selected_gpu_eb_size); + hSelectedRechitsEBCPU->Fill(cpu_eb_size); + hSelectedRechitsEEGPU->Fill(selected_gpu_ee_size); + hSelectedRechitsEECPU->Fill(cpu_ee_size); + hSelectedRechitsEBGPUvsCPU->Fill(cpu_eb_size, selected_gpu_eb_size); + hSelectedRechitsEEGPUvsCPU->Fill(cpu_ee_size, selected_gpu_ee_size); + hSelectedRechitsEBGPUCPUratio->Fill(selected_eb_ratio); + hSelectedRechitsEEGPUCPUratio->Fill(selected_ee_ratio); + hSelectedRechitsEBdeltavsCPU->Fill(cpu_eb_size, selected_gpu_eb_size - cpu_eb_size); + hSelectedRechitsEEdeltavsCPU->Fill(cpu_ee_size, selected_gpu_ee_size - cpu_ee_size); + + // + // now the rechit counting + // + + uint positive_cpu_eb_size = 0; + uint positive_cpu_ee_size = 0; + + // EB: + for (uint32_t i = 0; i < cpu_eb_size; ++i) { + auto const enr_cpu = wcpuEB->bareProduct()[i].energy(); + if (enr_cpu > 0) { + positive_cpu_eb_size++; + } + } + // EE: + for (uint32_t i = 0; i < cpu_ee_size; ++i) { + auto const enr_cpu = wcpuEE->bareProduct()[i].energy(); + if (enr_cpu > 0) { + positive_cpu_ee_size++; + } + } + + float positive_eb_ratio = (float)positive_gpu_eb_size / positive_cpu_eb_size; + float positive_ee_ratio = (float)positive_gpu_ee_size / positive_cpu_ee_size; + + // Filling up the histograms on events sizes for EB and EE on both GPU and CPU + hPositiveRechitsEBGPU->Fill(positive_gpu_eb_size); + hPositiveRechitsEBCPU->Fill(positive_cpu_eb_size); + hPositiveRechitsEEGPU->Fill(positive_gpu_ee_size); + hPositiveRechitsEECPU->Fill(positive_cpu_ee_size); + hPositiveRechitsEBGPUvsCPU->Fill(positive_cpu_eb_size, positive_gpu_eb_size); + hPositiveRechitsEEGPUvsCPU->Fill(positive_cpu_ee_size, positive_gpu_ee_size); + hPositiveRechitsEBGPUCPUratio->Fill(positive_eb_ratio); + hPositiveRechitsEEGPUCPUratio->Fill(positive_ee_ratio); + hPositiveRechitsEBdeltavsCPU->Fill(positive_cpu_eb_size, positive_gpu_eb_size - positive_cpu_eb_size); + hPositiveRechitsEEdeltavsCPU->Fill(positive_cpu_ee_size, positive_gpu_ee_size - positive_cpu_ee_size); + + if (cpu_eb_size != selected_gpu_eb_size or cpu_ee_size != selected_gpu_ee_size) { + // std::cerr << ie << ordinal[ie % 10] << " entry:\n" + std::cerr << ie << " entry:\n" + << " EB size: " << std::setw(4) << cpu_eb_size << " (cpu) vs " << std::setw(4) << selected_gpu_eb_size + << " (gpu)\n" + << " EE size: " << std::setw(4) << cpu_ee_size << " (cpu) vs " << std::setw(4) << selected_gpu_ee_size + << " (gpu)" << std::endl; + } + } + + // Plotting the results: + { + // Canvases Setup: + TCanvas cAllRechits("AllRechits", "AllRechits", 1750, 860); + cAllRechits.Divide(3, 2); + TCanvas cRechits("Rechits", "Rechits", 1750, 860); + cRechits.Divide(3, 2); + TCanvas cRechitsPositive("RechitsPositive", "RechitsPositive", 1750, 860); + cRechitsPositive.Divide(3, 2); + TCanvas cEnergies("Energies", "Energies", 1750, 860); + cEnergies.Divide(3, 2); + TCanvas cChi2("Chi2", "Chi2", 1750, 860); + cChi2.Divide(3, 2); + TCanvas cFlags("Flags", "Flags", 1750, 860); + cFlags.Divide(3, 2); + TCanvas cExtras("Extras", "Extras", 1750, 860); + cExtras.Divide(3, 2); + + // Plotting the sizes of GPU vs CPU for each event of EB + cAllRechits.cd(1); + { + gPad->SetLogy(); + hRechitsEBCPU->SetLineColor(kRed); + hRechitsEBCPU->SetLineWidth(2); + hRechitsEBCPU->Draw(""); + hRechitsEBGPU->SetLineColor(kBlue); + hRechitsEBGPU->SetLineWidth(2); + hRechitsEBGPU->Draw("sames"); + cAllRechits.Update(); + auto stats = (TPaveStats *)hRechitsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cAllRechits.cd(4); + { + gPad->SetLogy(); + hRechitsEECPU->SetLineColor(kRed); + hRechitsEECPU->SetLineWidth(2); + hRechitsEECPU->Draw(""); + hRechitsEEGPU->SetLineColor(kBlue); + hRechitsEEGPU->SetLineWidth(2); + hRechitsEEGPU->Draw("sames"); + cAllRechits.Update(); + auto stats = (TPaveStats *)hRechitsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cAllRechits.cd(2); + { + gStyle->SetPalette(55); + hRechitsEBGPUvsCPU->Draw("COLZ"); + } + cAllRechits.cd(5); + { + gStyle->SetPalette(55); + hRechitsEEGPUvsCPU->Draw("COLZ"); + } + cAllRechits.cd(3); + { + gPad->SetLogy(); + //hRechitsEBdeltavsCPU->Draw("COLZ"); + hRechitsEBGPUCPUratio->Draw(""); + } + cAllRechits.cd(6); + { + gPad->SetLogy(); + //hRechitsEEdeltavsCPU->Draw("COLZ"); + hRechitsEEGPUCPUratio->Draw(""); + } + cAllRechits.SaveAs("ecal-allrechits.root"); + cAllRechits.SaveAs("ecal-allrechits.png"); + + // Plotting the sizes of GPU vs CPU for each event of EB + cRechits.cd(1); + { + gPad->SetLogy(); + hSelectedRechitsEBCPU->SetLineColor(kRed); + hSelectedRechitsEBCPU->SetLineWidth(2); + hSelectedRechitsEBCPU->Draw(""); + hSelectedRechitsEBGPU->SetLineColor(kBlue); + hSelectedRechitsEBGPU->SetLineWidth(2); + hSelectedRechitsEBGPU->Draw("sames"); + cRechits.Update(); + auto stats = (TPaveStats *)hSelectedRechitsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechits.cd(4); + { + gPad->SetLogy(); + hSelectedRechitsEECPU->SetLineColor(kRed); + hSelectedRechitsEECPU->SetLineWidth(2); + hSelectedRechitsEECPU->Draw(""); + hSelectedRechitsEEGPU->SetLineColor(kBlue); + hSelectedRechitsEEGPU->SetLineWidth(2); + hSelectedRechitsEEGPU->Draw("sames"); + cRechits.Update(); + auto stats = (TPaveStats *)hSelectedRechitsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechits.cd(2); + { + gStyle->SetPalette(55); + hSelectedRechitsEBGPUvsCPU->Draw("COLZ"); + } + cRechits.cd(5); + { + gStyle->SetPalette(55); + hSelectedRechitsEEGPUvsCPU->Draw("COLZ"); + } + cRechits.cd(3); + { + gPad->SetLogy(); + //hSelectedRechitsEBdeltavsCPU->Draw("COLZ"); + hSelectedRechitsEBGPUCPUratio->Draw(""); + } + cRechits.cd(6); + { + gPad->SetLogy(); + //hSelectedRechitsEEdeltavsCPU->Draw("COLZ"); + hSelectedRechitsEEGPUCPUratio->Draw(""); + } + cRechits.SaveAs("ecal-rechits.root"); + cRechits.SaveAs("ecal-rechits.png"); + + // Plotting the sizes of GPU vs CPU for each event of EB + cRechitsPositive.cd(1); + { + gPad->SetLogy(); + hPositiveRechitsEBCPU->SetLineColor(kRed); + hPositiveRechitsEBCPU->SetLineWidth(2); + hPositiveRechitsEBCPU->Draw(""); + hPositiveRechitsEBGPU->SetLineColor(kBlue); + hPositiveRechitsEBGPU->SetLineWidth(2); + hPositiveRechitsEBGPU->Draw("sames"); + cRechitsPositive.Update(); + auto stats = (TPaveStats *)hPositiveRechitsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechitsPositive.cd(4); + { + gPad->SetLogy(); + hPositiveRechitsEECPU->SetLineColor(kRed); + hPositiveRechitsEECPU->SetLineWidth(2); + hPositiveRechitsEECPU->Draw(""); + hPositiveRechitsEEGPU->SetLineColor(kBlue); + hPositiveRechitsEEGPU->SetLineWidth(2); + hPositiveRechitsEEGPU->Draw("sames"); + cRechitsPositive.Update(); + auto stats = (TPaveStats *)hPositiveRechitsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cRechitsPositive.cd(2); + { + gStyle->SetPalette(55); + hPositiveRechitsEBGPUvsCPU->Draw("COLZ"); + } + cRechitsPositive.cd(5); + { + gStyle->SetPalette(55); + hPositiveRechitsEEGPUvsCPU->Draw("COLZ"); + } + cRechitsPositive.cd(3); + { + gPad->SetLogy(); + //hPositiveRechitsEBdeltavsCPU->Draw("COLZ"); + hPositiveRechitsEBGPUCPUratio->Draw(""); + } + cRechitsPositive.cd(6); + { + gPad->SetLogy(); + //hPositiveRechitsEEdeltavsCPU->Draw("COLZ"); + hPositiveRechitsEEGPUCPUratio->Draw(""); + } + cRechitsPositive.SaveAs("ecal-rechits-positive.root"); + cRechitsPositive.SaveAs("ecal-rechits-positive.png"); + + cEnergies.cd(1); + { + gPad->SetLogy(); + hEnergiesEBCPU->SetLineColor(kBlack); + hEnergiesEBCPU->SetLineWidth(2); + hEnergiesEBCPU->Draw(""); + hEnergiesEBGPU->SetLineColor(kBlue); + hEnergiesEBGPU->SetLineWidth(2); + hEnergiesEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hEnergiesEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cEnergies.cd(4); + { + gPad->SetLogy(); + hEnergiesEECPU->SetLineColor(kBlack); + hEnergiesEECPU->SetLineWidth(2); + hEnergiesEECPU->Draw(""); + hEnergiesEEGPU->SetLineColor(kBlue); + hEnergiesEEGPU->SetLineWidth(2); + hEnergiesEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hEnergiesEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cEnergies.cd(2); + { hEnergiesEBGPUvsCPU->Draw("COLZ"); } + cEnergies.cd(5); + { hEnergiesEEGPUvsCPU->Draw("COLZ"); } + cEnergies.cd(3); + { + gPad->SetLogy(); + //hEnergiesEBdeltavsCPU->Draw("COLZ"); + hEnergiesEBGPUCPUratio->Draw(""); + } + cEnergies.cd(6); + { + gPad->SetLogy(); + //hEnergiesEEdeltavsCPU->Draw("COLZ"); + hEnergiesEEGPUCPUratio->Draw(""); + } + cEnergies.SaveAs("ecal-energies.root"); + cEnergies.SaveAs("ecal-energies.png"); + + cChi2.cd(1); + { + gPad->SetLogy(); + hChi2EBCPU->SetLineColor(kBlack); + hChi2EBCPU->SetLineWidth(2); + hChi2EBCPU->Draw(""); + hChi2EBGPU->SetLineColor(kBlue); + hChi2EBGPU->SetLineWidth(2); + hChi2EBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hChi2EBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cChi2.cd(4); + { + gPad->SetLogy(); + hChi2EECPU->SetLineColor(kBlack); + hChi2EECPU->SetLineWidth(2); + hChi2EECPU->Draw(""); + hChi2EEGPU->SetLineColor(kBlue); + hChi2EEGPU->SetLineWidth(2); + hChi2EEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hChi2EEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cChi2.cd(2); + { hChi2EBGPUvsCPU->Draw("COLZ"); } + cChi2.cd(5); + { hChi2EEGPUvsCPU->Draw("COLZ"); } + cChi2.cd(3); + { + gPad->SetLogy(); + //hChi2EBdeltavsCPU->Draw("COLZ"); + hChi2EBGPUCPUratio->Draw(""); + } + cChi2.cd(6); + { + gPad->SetLogy(); + //hChi2EEdeltavsCPU->Draw("COLZ"); + hChi2EEGPUCPUratio->Draw(""); + } + cChi2.SaveAs("ecal-chi2.root"); + cChi2.SaveAs("ecal-chi2.png"); + + cFlags.cd(1); + { + gPad->SetLogy(); + hFlagsEBCPU->SetLineColor(kBlack); + hFlagsEBCPU->SetLineWidth(2); + hFlagsEBCPU->Draw(""); + hFlagsEBGPU->SetLineColor(kBlue); + hFlagsEBGPU->SetLineWidth(2); + hFlagsEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hFlagsEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cFlags.cd(4); + { + gPad->SetLogy(); + hFlagsEECPU->SetLineColor(kBlack); + hFlagsEECPU->SetLineWidth(2); + hFlagsEECPU->Draw(""); + hFlagsEEGPU->SetLineColor(kBlue); + hFlagsEEGPU->SetLineWidth(2); + hFlagsEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hFlagsEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cFlags.cd(2); + { hFlagsEBGPUvsCPU->Draw("COLZ"); } + cFlags.cd(5); + { hFlagsEEGPUvsCPU->Draw("COLZ"); } + cFlags.cd(3); + { + gPad->SetLogy(); + //hFlagsEBdeltavsCPU->Draw("COLZ"); + hFlagsEBGPUCPUratio->Draw(""); + } + cFlags.cd(6); + { + gPad->SetLogy(); + //hFlagsEEdeltavsCPU->Draw("COLZ"); + hFlagsEEGPUCPUratio->Draw(""); + } + cFlags.SaveAs("ecal-flags.root"); + cFlags.SaveAs("ecal-flags.png"); + + cExtras.cd(1); + { + gPad->SetLogy(); + hExtrasEBCPU->SetLineColor(kBlack); + hExtrasEBCPU->SetLineWidth(2); + hExtrasEBCPU->Draw(""); + hExtrasEBGPU->SetLineColor(kBlue); + hExtrasEBGPU->SetLineWidth(2); + hExtrasEBGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hExtrasEBGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cExtras.cd(4); + { + gPad->SetLogy(); + hExtrasEECPU->SetLineColor(kBlack); + hExtrasEECPU->SetLineWidth(2); + hExtrasEECPU->Draw(""); + hExtrasEEGPU->SetLineColor(kBlue); + hExtrasEEGPU->SetLineWidth(2); + hExtrasEEGPU->Draw("sames"); + gPad->Update(); + auto stats = (TPaveStats *)hExtrasEEGPU->FindObject("stats"); + auto y2 = stats->GetY2NDC(); + auto y1 = stats->GetY1NDC(); + stats->SetY2NDC(y1); + stats->SetY1NDC(y1 - (y2 - y1)); + } + cExtras.cd(2); + { hExtrasEBGPUvsCPU->Draw("COLZ"); } + cExtras.cd(5); + { hExtrasEEGPUvsCPU->Draw("COLZ"); } + cExtras.cd(3); + { + gPad->SetLogy(); + //hExtrasEBdeltavsCPU->Draw("COLZ"); + hExtrasEBGPUCPUratio->Draw(""); + } + cExtras.cd(6); + { + gPad->SetLogy(); + //hExtrasEEdeltavsCPU->Draw("COLZ"); + hExtrasEEGPUCPUratio->Draw(""); + } + cExtras.SaveAs("ecal-extras.root"); + cExtras.SaveAs("ecal-extras.png"); + } + + // Close all open files + rf.Close(); + rfout.Write(); + rfout.Close(); + + return 0; +} diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h new file mode 100644 index 0000000000000..a3f65d0b509fc --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalGainRatiosGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalGainRatiosGPU_h + +#include "CondFormats/EcalObjects/interface/EcalGainRatios.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalGainRatiosGPU { +public: + struct Product { + ~Product(); + float *gain12Over6 = nullptr, *gain6Over1 = nullptr; + }; + +#ifndef __CUDACC__ + + // rearrange pedestals + EcalGainRatiosGPU(EcalGainRatios const&); + + // will call dealloation for Product thru ~Product + ~EcalGainRatiosGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalGainRatiosGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> gain12Over6_; + std::vector> gain6Over1_; + + cms::cuda::ESProduct product_; + +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalGainRatiosGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h new file mode 100644 index 0000000000000..4b5401ff0316f --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalIntercalibConstantsGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalIntercalibConstantsGPU_h + +#include "CondFormats/EcalObjects/interface/EcalIntercalibConstants.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalIntercalibConstantsGPU { +public: + struct Product { + ~Product(); + float* values = nullptr; + }; + +#ifndef __CUDACC__ + // + EcalIntercalibConstantsGPU(EcalIntercalibConstants const&); + + // will call dealloation for Product thru ~Product + ~EcalIntercalibConstantsGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // TODO: do this centrally + // get offset for hashes. equals number of barrel items + uint32_t getOffset() const { return valuesEB_.size(); } + + // + static std::string name() { return std::string{"ecalIntercalibConstantsGPU"}; } + +private: + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalIntercalibConstantsGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h new file mode 100644 index 0000000000000..4a6cd34fcd171 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h @@ -0,0 +1,53 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosGPU_h + +#include "CondFormats/EcalObjects/interface/EcalLaserAPDPNRatios.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalLaserAPDPNRatiosGPU { +public: + struct Product { + ~Product(); + float *p1 = nullptr; + float *p2 = nullptr; + float *p3 = nullptr; + edm::TimeValue_t *t1 = nullptr; + edm::TimeValue_t *t2 = nullptr; + edm::TimeValue_t *t3 = nullptr; + }; + +#ifndef __CUDACC__ + + // + EcalLaserAPDPNRatiosGPU(EcalLaserAPDPNRatios const &); + + // will call dealloation for Product thru ~Product + ~EcalLaserAPDPNRatiosGPU() = default; + + // get device pointers + Product const &getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalLaserAPDPNRatiosGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector > p1_; + std::vector > p2_; + std::vector > p3_; + + std::vector > t1_; + std::vector > t2_; + std::vector > t3_; + + cms::cuda::ESProduct product_; + +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h new file mode 100644 index 0000000000000..985bfd9579f7c --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosRefGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosRefGPU_h + +#include "CondFormats/EcalObjects/interface/EcalLaserAPDPNRatiosRef.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalLaserAPDPNRatiosRefGPU { +public: + struct Product { + ~Product(); + float* values = nullptr; + }; + +#ifndef __CUDACC__ + // + EcalLaserAPDPNRatiosRefGPU(EcalLaserAPDPNRatiosRef const&); + + // will call dealloation for Product thru ~Product + ~EcalLaserAPDPNRatiosRefGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // TODO: do this centrally + // get offset for hashes. equals number of barrel items + uint32_t getOffset() const { return valuesEB_.size(); } + + // + static std::string name() { return std::string{"ecalLaserAPDPNRatiosRefGPU"}; } + +private: + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAPDPNRatiosRefGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h new file mode 100644 index 0000000000000..9dd05e9ee3c4d --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAlphasGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAlphasGPU_h + +#include "CondFormats/EcalObjects/interface/EcalLaserAlphas.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalLaserAlphasGPU { +public: + struct Product { + ~Product(); + float* values = nullptr; + }; + +#ifndef __CUDACC__ + // + EcalLaserAlphasGPU(EcalLaserAlphas const&); + + // will call dealloation for Product thru ~Product + ~EcalLaserAlphasGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // TODO: do this centrally + // get offset for hashes. equals number of barrel items + uint32_t getOffset() const { return valuesEB_.size(); } + + // + static std::string name() { return std::string{"ecalLaserAlphasGPU"}; } + +private: + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalLaserAlphasGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h new file mode 100644 index 0000000000000..343bdf1dd1afc --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h @@ -0,0 +1,53 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalLinearCorrectionsGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalLinearCorrectionsGPU_h + +#include "CondFormats/EcalObjects/interface/EcalLinearCorrections.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalLinearCorrectionsGPU { +public: + struct Product { + ~Product(); + float *p1 = nullptr; + float *p2 = nullptr; + float *p3 = nullptr; + edm::TimeValue_t *t1 = nullptr; + edm::TimeValue_t *t2 = nullptr; + edm::TimeValue_t *t3 = nullptr; + }; + +#ifndef __CUDACC__ + + // + EcalLinearCorrectionsGPU(EcalLinearCorrections const &); + + // will call dealloation for Product thru ~Product + ~EcalLinearCorrectionsGPU() = default; + + // get device pointers + Product const &getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalLinearCorrectionsGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> p1_; + std::vector> p2_; + std::vector> p3_; + + std::vector> t1_; + std::vector> t2_; + std::vector> t3_; + + cms::cuda::ESProduct product_; + +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalLinearCorrectionsGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h new file mode 100644 index 0000000000000..56aa0579ff77f --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h @@ -0,0 +1,39 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalMultifitParametersGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalMultifitParametersGPU_h + +#include + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalMultifitParametersGPU { +public: + struct Product { + ~Product(); + double *amplitudeFitParametersEB, *amplitudeFitParametersEE, *timeFitParametersEB, *timeFitParametersEE; + }; + +#ifndef __CUDACC__ + EcalMultifitParametersGPU(edm::ParameterSet const&); + + ~EcalMultifitParametersGPU() = default; + + Product const& getProduct(cudaStream_t) const; + + std::array> const>, 4> getValues() const { + return {{amplitudeFitParametersEB_, amplitudeFitParametersEE_, timeFitParametersEB_, timeFitParametersEE_}}; + } + +private: + std::vector> amplitudeFitParametersEB_, amplitudeFitParametersEE_, + timeFitParametersEB_, timeFitParametersEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalMultifitParametersGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h new file mode 100644 index 0000000000000..5387c422ddd9e --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h @@ -0,0 +1,47 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalPedestalsGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalPedestalsGPU_h + +#include "CondFormats/EcalObjects/interface/EcalPedestals.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalPedestalsGPU { +public: + struct Product { + ~Product(); + float *mean_x12 = nullptr, *mean_x6 = nullptr, *mean_x1 = nullptr; + float *rms_x12 = nullptr, *rms_x6 = nullptr, *rms_x1 = nullptr; + }; + +#ifndef __CUDACC__ + + // rearrange pedestals + EcalPedestalsGPU(EcalPedestals const &); + + // will call dealloation for Product thru ~Product + ~EcalPedestalsGPU() = default; + + // get device pointers + Product const &getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalPedestalsGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> mean_x12_; + std::vector> rms_x12_; + std::vector> mean_x6_; + std::vector> rms_x6_; + std::vector> mean_x1_; + std::vector> rms_x1_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalPedestalsGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h new file mode 100644 index 0000000000000..6c5a3d9b95e2e --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h @@ -0,0 +1,40 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalPulseCovariancesGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalPulseCovariancesGPU_h + +#include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalPulseCovariancesGPU { +public: + struct Product { + ~Product(); + EcalPulseCovariance* values = nullptr; + }; + +#ifndef __CUDACC__ + // rearrange pedestals + EcalPulseCovariancesGPU(EcalPulseCovariances const&); + + // will call dealloation for Product thru ~Product + ~EcalPulseCovariancesGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalPulseCovariancesGPU"}; } + +private: + // reuse original vectors (although with default allocator) + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalPulseCovariancesGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h new file mode 100644 index 0000000000000..3edb2c9bcdfd3 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h @@ -0,0 +1,40 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalPulseShapesGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalPulseShapesGPU_h + +#include "CondFormats/EcalObjects/interface/EcalPulseShapes.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalPulseShapesGPU { +public: + struct Product { + ~Product(); + EcalPulseShape* values = nullptr; + }; + +#ifndef __CUDACC__ + // rearrange pedestals + EcalPulseShapesGPU(EcalPulseShapes const&); + + // will call dealloation for Product thru ~Product + ~EcalPulseShapesGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalPulseShapesGPU"}; } + +private: + // reuse original vectors (although with default allocator) + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalPulseShapesGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h new file mode 100644 index 0000000000000..c5d3dd0388d15 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h @@ -0,0 +1,47 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalRecHitParametersGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalRecHitParametersGPU_h + +#include + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalRecHitParametersGPU { +public: + struct Product { + ~Product(); + int *ChannelStatusToBeExcluded, *expanded_v_DB_reco_flags; + uint32_t *expanded_Sizes_v_DB_reco_flags, *expanded_flagbit_v_DB_reco_flags; + }; + +#ifndef __CUDACC__ + EcalRecHitParametersGPU(edm::ParameterSet const &); + + ~EcalRecHitParametersGPU() = default; + + Product const &getProduct(cudaStream_t) const; + + using intvec = std::reference_wrapper> const>; + using uint32vec = std::reference_wrapper> const>; + std::tuple getValues() const { + return {ChannelStatusToBeExcluded_, + expanded_v_DB_reco_flags_, + expanded_Sizes_v_DB_reco_flags_, + expanded_flagbit_v_DB_reco_flags_}; + } + +private: + std::vector> ChannelStatusToBeExcluded_; + std::vector> expanded_v_DB_reco_flags_; + std::vector> expanded_Sizes_v_DB_reco_flags_, + expanded_flagbit_v_DB_reco_flags_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalRecHitParametersGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h new file mode 100644 index 0000000000000..7d4d3cc60fd5c --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h @@ -0,0 +1,42 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalRechitADCToGeVConstantGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalRechitADCToGeVConstantGPU_h + +#include "CondFormats/EcalObjects/interface/EcalADCToGeVConstant.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalRechitADCToGeVConstantGPU { +public: + struct Product { + ~Product(); + float* adc2gev = nullptr; + }; + +#ifndef __CUDACC__ + + // + EcalRechitADCToGeVConstantGPU(EcalADCToGeVConstant const&); + + // will call dealloation for Product thru ~Product + ~EcalRechitADCToGeVConstantGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalRechitADCToGeVConstantGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> adc2gev_; + + cms::cuda::ESProduct product_; + +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalRechitADCToGeVConstantGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h new file mode 100644 index 0000000000000..bab99ab656c2d --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h @@ -0,0 +1,42 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalRechitChannelStatusGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalRechitChannelStatusGPU_h + +#include "CondFormats/EcalObjects/interface/EcalChannelStatus.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalRechitChannelStatusGPU { +public: + struct Product { + ~Product(); + uint16_t* status = nullptr; + }; + +#ifndef __CUDACC__ + + // + EcalRechitChannelStatusGPU(EcalChannelStatus const&); + + // will call dealloation for Product thru ~Product + ~EcalRechitChannelStatusGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalRechitChannelStatusGPU"}; } + +private: + // in the future, we need to arrange so to avoid this copy on the host + // store eb first then ee + std::vector> status_; + + cms::cuda::ESProduct product_; + +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalRechitChannelStatusGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h new file mode 100644 index 0000000000000..e1dee2d505e6c --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h @@ -0,0 +1,44 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalSamplesCorrelationGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalSamplesCorrelationGPU_h + +#include "CondFormats/EcalObjects/interface/EcalSamplesCorrelation.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalSamplesCorrelationGPU { +public: + struct Product { + ~Product(); + double *EBG12SamplesCorrelation = nullptr, *EBG6SamplesCorrelation = nullptr, *EBG1SamplesCorrelation = nullptr; + double *EEG12SamplesCorrelation = nullptr, *EEG6SamplesCorrelation = nullptr, *EEG1SamplesCorrelation = nullptr; + }; + +#ifndef __CUDACC__ + // rearrange pedestals + EcalSamplesCorrelationGPU(EcalSamplesCorrelation const&); + + // will call dealloation for Product thru ~Product + ~EcalSamplesCorrelationGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalSamplesCorrelationGPU"}; } + +private: + std::vector const& EBG12SamplesCorrelation_; + std::vector const& EBG6SamplesCorrelation_; + std::vector const& EBG1SamplesCorrelation_; + std::vector const& EEG12SamplesCorrelation_; + std::vector const& EEG6SamplesCorrelation_; + std::vector const& EEG1SamplesCorrelation_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalSamplesCorrelationGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h new file mode 100644 index 0000000000000..9e2bf0aa18909 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h @@ -0,0 +1,49 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalTimeBiasCorrectionsGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalTimeBiasCorrectionsGPU_h + +#include "CondFormats/EcalObjects/interface/EcalTimeBiasCorrections.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalTimeBiasCorrectionsGPU { +public: + struct Product { + ~Product(); + float *EBTimeCorrAmplitudeBins, *EBTimeCorrShiftBins; + float *EETimeCorrAmplitudeBins, *EETimeCorrShiftBins; + int EBTimeCorrAmplitudeBinsSize, EETimeCorrAmplitudeBinsSize; + }; + + // rearrange pedestals + EcalTimeBiasCorrectionsGPU(EcalTimeBiasCorrections const&); + +#ifndef __CUDACC__ + + // will call dealloation for Product thru ~Product + ~EcalTimeBiasCorrectionsGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // + static std::string name() { return std::string{"ecalTimeBiasCorrectionsGPU"}; } +#endif // __CUDACC__ + + std::vector const& EBTimeCorrAmplitudeBins() const { return EBTimeCorrAmplitudeBins_; } + std::vector const& EETimeCorrAmplitudeBins() const { return EETimeCorrAmplitudeBins_; } + +private: + std::vector const& EBTimeCorrAmplitudeBins_; + std::vector const& EBTimeCorrShiftBins_; + std::vector const& EETimeCorrAmplitudeBins_; + std::vector const& EETimeCorrShiftBins_; + +#ifndef __CUDACC__ + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalTimeBiasCorrectionsGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h b/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h new file mode 100644 index 0000000000000..823334d433cc2 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h @@ -0,0 +1,43 @@ +#ifndef RecoLocalCalo_EcalRecAlgos_interface_EcalTimeCalibConstantsGPU_h +#define RecoLocalCalo_EcalRecAlgos_interface_EcalTimeCalibConstantsGPU_h + +#include "CondFormats/EcalObjects/interface/EcalTimeCalibConstants.h" + +#ifndef __CUDACC__ +#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" +#endif // __CUDACC__ + +class EcalTimeCalibConstantsGPU { +public: + struct Product { + ~Product(); + float* values = nullptr; + }; + +#ifndef __CUDACC__ + // rearrange pedestals + EcalTimeCalibConstantsGPU(EcalTimeCalibConstants const&); + + // will call dealloation for Product thru ~Product + ~EcalTimeCalibConstantsGPU() = default; + + // get device pointers + Product const& getProduct(cudaStream_t) const; + + // TODO: do this centrally + // get offset for hashes. equals number of barrel items + uint32_t getOffset() const { return valuesEB_.size(); } + + // + static std::string name() { return std::string{"ecalTimeCalibConstantsGPU"}; } + +private: + std::vector const& valuesEB_; + std::vector const& valuesEE_; + + cms::cuda::ESProduct product_; +#endif // __CUDACC__ +}; + +#endif // RecoLocalCalo_EcalRecAlgos_interface_EcalTimeCalibConstantsGPU_h diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalGainRatiosGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalGainRatiosGPU.cc new file mode 100644 index 0000000000000..d5980d8a757aa --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalGainRatiosGPU.cc @@ -0,0 +1,52 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalGainRatiosGPU::EcalGainRatiosGPU(EcalGainRatios const& values) + : gain12Over6_(values.size()), gain6Over1_(values.size()) { + // fill in eb + auto const& barrelValues = values.barrelItems(); + for (unsigned int i = 0; i < barrelValues.size(); i++) { + gain12Over6_[i] = barrelValues[i].gain12Over6(); + gain6Over1_[i] = barrelValues[i].gain6Over1(); + } + + // fill in ee + auto const& endcapValues = values.endcapItems(); + auto const offset = barrelValues.size(); + for (unsigned int i = 0; i < endcapValues.size(); i++) { + gain12Over6_[offset + i] = endcapValues[i].gain12Over6(); + gain6Over1_[offset + i] = endcapValues[i].gain6Over1(); + } +} + +EcalGainRatiosGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(gain12Over6)); + cudaCheck(cudaFree(gain6Over1)); +} + +EcalGainRatiosGPU::Product const& EcalGainRatiosGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalGainRatiosGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.gain12Over6, this->gain12Over6_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.gain6Over1, this->gain6Over1_.size() * sizeof(float))); + // transfer + cudaCheck(cudaMemcpyAsync(product.gain12Over6, + this->gain12Over6_.data(), + this->gain12Over6_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.gain6Over1, + this->gain6Over1_.data(), + this->gain6Over1_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalGainRatiosGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalIntercalibConstantsGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalIntercalibConstantsGPU.cc new file mode 100644 index 0000000000000..dec10cff57dd0 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalIntercalibConstantsGPU.cc @@ -0,0 +1,40 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalIntercalibConstantsGPU::EcalIntercalibConstantsGPU(EcalIntercalibConstants const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalIntercalibConstantsGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalIntercalibConstantsGPU::Product const& EcalIntercalibConstantsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalIntercalibConstantsGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck( + cudaMalloc((void**)&product.values, (this->valuesEB_.size() + this->valuesEE_.size()) * sizeof(float))); + + // offset in floats, not bytes + auto const offset = this->valuesEB_.size(); + + // transfer + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalIntercalibConstantsGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosGPU.cc new file mode 100644 index 0000000000000..4aa92ea6750fe --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosGPU.cc @@ -0,0 +1,86 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalLaserAPDPNRatiosGPU::EcalLaserAPDPNRatiosGPU(EcalLaserAPDPNRatios const& values) + : p1_(values.getLaserMap().size()), + p2_(values.getLaserMap().size()), + p3_(values.getLaserMap().size()), + t1_(values.getTimeMap().size()), + t2_(values.getTimeMap().size()), + t3_(values.getTimeMap().size()) { + // fill in eb + // auto const& barrelValues = values.barrelItems(); + for (unsigned int i = 0; i < values.getLaserMap().barrelItems().size(); i++) { + p1_[i] = values.getLaserMap().barrelItems()[i].p1; + p2_[i] = values.getLaserMap().barrelItems()[i].p2; + p3_[i] = values.getLaserMap().barrelItems()[i].p3; + } + + // fill in ee + // auto const& endcapValues = values.endcapItems(); + auto const offset_laser = values.getLaserMap().barrelItems().size(); + for (unsigned int i = 0; i < values.getLaserMap().endcapItems().size(); i++) { + p1_[offset_laser + i] = values.getLaserMap().endcapItems()[i].p1; + p2_[offset_laser + i] = values.getLaserMap().endcapItems()[i].p2; + p3_[offset_laser + i] = values.getLaserMap().endcapItems()[i].p3; + } + + // Time is a simple std::vector + // typedef std::vector EcalLaserTimeStampMap; + for (unsigned int i = 0; i < values.getTimeMap().size(); i++) { + t1_[i] = values.getTimeMap()[i].t1.value(); + t2_[i] = values.getTimeMap()[i].t2.value(); + t3_[i] = values.getTimeMap()[i].t3.value(); + } +} + +EcalLaserAPDPNRatiosGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(p1)); + cudaCheck(cudaFree(p2)); + cudaCheck(cudaFree(p3)); + cudaCheck(cudaFree(t1)); + cudaCheck(cudaFree(t2)); + cudaCheck(cudaFree(t3)); +} + +EcalLaserAPDPNRatiosGPU::Product const& EcalLaserAPDPNRatiosGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalLaserAPDPNRatiosGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.p1, this->p1_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.p2, this->p2_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.p3, this->p3_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.t1, this->t1_.size() * sizeof(edm::TimeValue_t))); + cudaCheck(cudaMalloc((void**)&product.t2, this->t2_.size() * sizeof(edm::TimeValue_t))); + cudaCheck(cudaMalloc((void**)&product.t3, this->t3_.size() * sizeof(edm::TimeValue_t))); + // transfer + cudaCheck(cudaMemcpyAsync( + product.p1, this->p1_.data(), this->p1_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync( + product.p2, this->p2_.data(), this->p2_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync( + product.p3, this->p3_.data(), this->p3_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t1, + this->t1_.data(), + this->t1_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t2, + this->t2_.data(), + this->t2_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t3, + this->t3_.data(), + this->t3_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalLaserAPDPNRatiosGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosRefGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosRefGPU.cc new file mode 100644 index 0000000000000..8f77cf48fe1d1 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAPDPNRatiosRefGPU.cc @@ -0,0 +1,40 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalLaserAPDPNRatiosRefGPU::EcalLaserAPDPNRatiosRefGPU(EcalLaserAPDPNRatiosRef const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalLaserAPDPNRatiosRefGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalLaserAPDPNRatiosRefGPU::Product const& EcalLaserAPDPNRatiosRefGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalLaserAPDPNRatiosRefGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck( + cudaMalloc((void**)&product.values, (this->valuesEB_.size() + this->valuesEE_.size()) * sizeof(float))); + + // offset in floats, not bytes + auto const offset = this->valuesEB_.size(); + + // transfer + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalLaserAPDPNRatiosRefGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAlphasGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAlphasGPU.cc new file mode 100644 index 0000000000000..91de441bff683 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalLaserAlphasGPU.cc @@ -0,0 +1,40 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalLaserAlphasGPU::EcalLaserAlphasGPU(EcalLaserAlphas const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalLaserAlphasGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalLaserAlphasGPU::Product const& EcalLaserAlphasGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalLaserAlphasGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck( + cudaMalloc((void**)&product.values, (this->valuesEB_.size() + this->valuesEE_.size()) * sizeof(float))); + + // offset in floats, not bytes + auto const offset = this->valuesEB_.size(); + + // transfer + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalLaserAlphasGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalLinearCorrectionsGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalLinearCorrectionsGPU.cc new file mode 100644 index 0000000000000..0af2a9044ab65 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalLinearCorrectionsGPU.cc @@ -0,0 +1,84 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalLinearCorrectionsGPU::EcalLinearCorrectionsGPU(EcalLinearCorrections const& values) + : p1_(values.getValueMap().size()), + p2_(values.getValueMap().size()), + p3_(values.getValueMap().size()), + t1_(values.getTimeMap().size()), + t2_(values.getTimeMap().size()), + t3_(values.getTimeMap().size()) { + // fill in eb + for (unsigned int i = 0; i < values.getValueMap().barrelItems().size(); i++) { + p1_[i] = values.getValueMap().barrelItems()[i].p1; + p2_[i] = values.getValueMap().barrelItems()[i].p2; + p3_[i] = values.getValueMap().barrelItems()[i].p3; + } + + // fill in ee + auto const offset_laser = values.getValueMap().barrelItems().size(); + for (unsigned int i = 0; i < values.getValueMap().endcapItems().size(); i++) { + p1_[offset_laser + i] = values.getValueMap().endcapItems()[i].p1; + p2_[offset_laser + i] = values.getValueMap().endcapItems()[i].p2; + p3_[offset_laser + i] = values.getValueMap().endcapItems()[i].p3; + } + + // Time is a simple std::vector + // typedef std::vector EcalLaserTimeStampMap; + for (unsigned int i = 0; i < values.getTimeMap().size(); i++) { + t1_[i] = values.getTimeMap()[i].t1.value(); + t2_[i] = values.getTimeMap()[i].t2.value(); + t3_[i] = values.getTimeMap()[i].t3.value(); + } +} + +EcalLinearCorrectionsGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(p1)); + cudaCheck(cudaFree(p2)); + cudaCheck(cudaFree(p3)); + cudaCheck(cudaFree(t1)); + cudaCheck(cudaFree(t2)); + cudaCheck(cudaFree(t3)); +} + +EcalLinearCorrectionsGPU::Product const& EcalLinearCorrectionsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalLinearCorrectionsGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.p1, this->p1_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.p2, this->p2_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.p3, this->p3_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.t1, this->t1_.size() * sizeof(edm::TimeValue_t))); + cudaCheck(cudaMalloc((void**)&product.t2, this->t2_.size() * sizeof(edm::TimeValue_t))); + cudaCheck(cudaMalloc((void**)&product.t3, this->t3_.size() * sizeof(edm::TimeValue_t))); + // transfer + cudaCheck(cudaMemcpyAsync( + product.p1, this->p1_.data(), this->p1_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync( + product.p2, this->p2_.data(), this->p2_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync( + product.p3, this->p3_.data(), this->p3_.size() * sizeof(float), cudaMemcpyHostToDevice, cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t1, + this->t1_.data(), + this->t1_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t2, + this->t2_.data(), + this->t2_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.t3, + this->t3_.data(), + this->t3_.size() * sizeof(edm::TimeValue_t), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalLinearCorrectionsGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalMultifitParametersGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalMultifitParametersGPU.cc new file mode 100644 index 0000000000000..010da6444b614 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalMultifitParametersGPU.cc @@ -0,0 +1,66 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalMultifitParametersGPU::EcalMultifitParametersGPU(edm::ParameterSet const& ps) { + auto const& amplitudeFitParametersEB = ps.getParameter>("EBamplitudeFitParameters"); + auto const& amplitudeFitParametersEE = ps.getParameter>("EEamplitudeFitParameters"); + auto const& timeFitParametersEB = ps.getParameter>("EBtimeFitParameters"); + auto const& timeFitParametersEE = ps.getParameter>("EEtimeFitParameters"); + + amplitudeFitParametersEB_.resize(amplitudeFitParametersEB.size()); + amplitudeFitParametersEE_.resize(amplitudeFitParametersEE.size()); + timeFitParametersEB_.resize(timeFitParametersEB.size()); + timeFitParametersEE_.resize(timeFitParametersEE.size()); + + std::copy(amplitudeFitParametersEB.begin(), amplitudeFitParametersEB.end(), amplitudeFitParametersEB_.begin()); + std::copy(amplitudeFitParametersEE.begin(), amplitudeFitParametersEE.end(), amplitudeFitParametersEE_.begin()); + std::copy(timeFitParametersEB.begin(), timeFitParametersEB.end(), timeFitParametersEB_.begin()); + std::copy(timeFitParametersEE.begin(), timeFitParametersEE.end(), timeFitParametersEE_.begin()); +} + +EcalMultifitParametersGPU::Product::~Product() { + cudaCheck(cudaFree(amplitudeFitParametersEB)); + cudaCheck(cudaFree(amplitudeFitParametersEE)); + cudaCheck(cudaFree(timeFitParametersEB)); + cudaCheck(cudaFree(timeFitParametersEE)); +} + +EcalMultifitParametersGPU::Product const& EcalMultifitParametersGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalMultifitParametersGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.amplitudeFitParametersEB, + this->amplitudeFitParametersEB_.size() * sizeof(double))); + cudaCheck(cudaMalloc((void**)&product.amplitudeFitParametersEE, + this->amplitudeFitParametersEE_.size() * sizeof(double))); + cudaCheck(cudaMalloc((void**)&product.timeFitParametersEB, this->timeFitParametersEB_.size() * sizeof(double))); + cudaCheck(cudaMalloc((void**)&product.timeFitParametersEE, this->timeFitParametersEE_.size() * sizeof(double))); + + // transfer + cudaCheck(cudaMemcpyAsync(product.amplitudeFitParametersEB, + this->amplitudeFitParametersEB_.data(), + this->amplitudeFitParametersEB_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.amplitudeFitParametersEE, + this->amplitudeFitParametersEE_.data(), + this->amplitudeFitParametersEE_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.timeFitParametersEB, + this->timeFitParametersEB_.data(), + this->timeFitParametersEB_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.timeFitParametersEE, + this->timeFitParametersEE_.data(), + this->timeFitParametersEE_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + }); + return product; +} + +TYPELOOKUP_DATA_REG(EcalMultifitParametersGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalPedestalsGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalPedestalsGPU.cc new file mode 100644 index 0000000000000..9e3284cd9c7c8 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalPedestalsGPU.cc @@ -0,0 +1,94 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalPedestalsGPU::EcalPedestalsGPU(EcalPedestals const& pedestals) + : mean_x12_(pedestals.size()), + rms_x12_(pedestals.size()), + mean_x6_(pedestals.size()), + rms_x6_(pedestals.size()), + mean_x1_(pedestals.size()), + rms_x1_(pedestals.size()) { + // fill in eb + auto const& barrelValues = pedestals.barrelItems(); + for (unsigned int i = 0; i < barrelValues.size(); i++) { + mean_x12_[i] = barrelValues[i].mean_x12; + rms_x12_[i] = barrelValues[i].rms_x12; + mean_x6_[i] = barrelValues[i].mean_x6; + rms_x6_[i] = barrelValues[i].rms_x6; + mean_x1_[i] = barrelValues[i].mean_x1; + rms_x1_[i] = barrelValues[i].rms_x1; + } + + // fill in ee + auto const& endcapValues = pedestals.endcapItems(); + auto const offset = barrelValues.size(); + for (unsigned int i = 0; i < endcapValues.size(); i++) { + mean_x12_[offset + i] = endcapValues[i].mean_x12; + rms_x12_[offset + i] = endcapValues[i].rms_x12; + mean_x6_[offset + i] = endcapValues[i].mean_x6; + rms_x6_[offset + i] = endcapValues[i].rms_x6; + mean_x1_[offset + i] = endcapValues[i].mean_x1; + rms_x1_[offset + i] = endcapValues[i].rms_x1; + } +} + +EcalPedestalsGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(mean_x12)); + cudaCheck(cudaFree(rms_x12)); + cudaCheck(cudaFree(mean_x6)); + cudaCheck(cudaFree(rms_x6)); + cudaCheck(cudaFree(mean_x1)); + cudaCheck(cudaFree(rms_x1)); +} + +EcalPedestalsGPU::Product const& EcalPedestalsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalPedestalsGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.mean_x12, this->mean_x12_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.rms_x12, this->mean_x12_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.mean_x6, this->mean_x12_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.rms_x6, this->mean_x12_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.mean_x1, this->mean_x12_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.rms_x1, this->mean_x12_.size() * sizeof(float))); + + // transfer + cudaCheck(cudaMemcpyAsync(product.mean_x12, + this->mean_x12_.data(), + this->mean_x12_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.rms_x12, + this->rms_x12_.data(), + this->rms_x12_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.mean_x6, + this->mean_x6_.data(), + this->mean_x6_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.rms_x6, + this->rms_x6_.data(), + this->rms_x6_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.mean_x1, + this->mean_x1_.data(), + this->mean_x1_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.rms_x1, + this->rms_x1_.data(), + this->rms_x1_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalPedestalsGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalPulseCovariancesGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalPulseCovariancesGPU.cc new file mode 100644 index 0000000000000..bbeda99652e22 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalPulseCovariancesGPU.cc @@ -0,0 +1,42 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalPulseCovariancesGPU::EcalPulseCovariancesGPU(EcalPulseCovariances const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalPulseCovariancesGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalPulseCovariancesGPU::Product const& EcalPulseCovariancesGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalPulseCovariancesGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.values, + (this->valuesEE_.size() + this->valuesEB_.size()) * sizeof(EcalPulseCovariance))); + + // offset in terms of sizeof(EcalPulseCovariance) + uint32_t offset = this->valuesEB_.size(); + + // transfer eb + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(EcalPulseCovariance), + cudaMemcpyHostToDevice, + cudaStream)); + + // transfer ee starting at values + offset + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(EcalPulseCovariance), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalPulseCovariancesGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalPulseShapesGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalPulseShapesGPU.cc new file mode 100644 index 0000000000000..aee122a01627d --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalPulseShapesGPU.cc @@ -0,0 +1,42 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalPulseShapesGPU::EcalPulseShapesGPU(EcalPulseShapes const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalPulseShapesGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalPulseShapesGPU::Product const& EcalPulseShapesGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalPulseShapesGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.values, + (this->valuesEE_.size() + this->valuesEB_.size()) * sizeof(EcalPulseShape))); + + // offset in terms of sizeof(EcalPulseShape) - plain c array + uint32_t offset = this->valuesEB_.size(); + + // transfer eb + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(EcalPulseShape), + cudaMemcpyHostToDevice, + cudaStream)); + + // transfer ee starting at values + offset + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(EcalPulseShape), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalPulseShapesGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalRecHitParametersGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalRecHitParametersGPU.cc new file mode 100644 index 0000000000000..0f6812d6d6ffe --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalRecHitParametersGPU.cc @@ -0,0 +1,82 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "CommonTools/Utils/interface/StringToEnumValue.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" + +EcalRecHitParametersGPU::EcalRecHitParametersGPU(edm::ParameterSet const& ps) { + auto const& ChannelStatusToBeExcluded = StringToEnumValue( + ps.getParameter>("ChannelStatusToBeExcluded")); + + ChannelStatusToBeExcluded_.resize(ChannelStatusToBeExcluded.size()); + std::copy(ChannelStatusToBeExcluded.begin(), ChannelStatusToBeExcluded.end(), ChannelStatusToBeExcluded_.begin()); + + // https://github.com/cms-sw/cmssw/blob/266e21cfc9eb409b093e4cf064f4c0a24c6ac293/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitWorkerSimple.cc + + // Traslate string representation of flagsMapDBReco into enum values + const edm::ParameterSet& p = ps.getParameter("flagsMapDBReco"); + std::vector recoflagbitsStrings = p.getParameterNames(); + + for (unsigned int i = 0; i != recoflagbitsStrings.size(); ++i) { + EcalRecHit::Flags recoflagbit = (EcalRecHit::Flags)StringToEnumValue(recoflagbitsStrings[i]); + std::vector dbstatus_s = p.getParameter>(recoflagbitsStrings[i]); + // std::vector dbstatuses; + for (unsigned int j = 0; j != dbstatus_s.size(); ++j) { + EcalChannelStatusCode::Code dbstatus = + (EcalChannelStatusCode::Code)StringToEnumValue(dbstatus_s[j]); + expanded_v_DB_reco_flags_.push_back(dbstatus); + } + + expanded_Sizes_v_DB_reco_flags_.push_back(dbstatus_s.size()); + expanded_flagbit_v_DB_reco_flags_.push_back(recoflagbit); + } +} + +EcalRecHitParametersGPU::Product::~Product() { + cudaCheck(cudaFree(ChannelStatusToBeExcluded)); + cudaCheck(cudaFree(expanded_v_DB_reco_flags)); + cudaCheck(cudaFree(expanded_Sizes_v_DB_reco_flags)); + cudaCheck(cudaFree(expanded_flagbit_v_DB_reco_flags)); +} + +EcalRecHitParametersGPU::Product const& EcalRecHitParametersGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalRecHitParametersGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.ChannelStatusToBeExcluded, + this->ChannelStatusToBeExcluded_.size() * sizeof(int))); + cudaCheck(cudaMalloc((void**)&product.expanded_v_DB_reco_flags, + this->expanded_v_DB_reco_flags_.size() * sizeof(int))); + cudaCheck(cudaMalloc((void**)&product.expanded_Sizes_v_DB_reco_flags, + this->expanded_Sizes_v_DB_reco_flags_.size() * sizeof(uint32_t))); + cudaCheck(cudaMalloc((void**)&product.expanded_flagbit_v_DB_reco_flags, + this->expanded_flagbit_v_DB_reco_flags_.size() * sizeof(uint32_t))); + + // transfer + cudaCheck(cudaMemcpyAsync(product.ChannelStatusToBeExcluded, + this->ChannelStatusToBeExcluded_.data(), + this->ChannelStatusToBeExcluded_.size() * sizeof(int), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.expanded_v_DB_reco_flags, + this->expanded_v_DB_reco_flags_.data(), + this->expanded_v_DB_reco_flags_.size() * sizeof(int), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.expanded_Sizes_v_DB_reco_flags, + this->expanded_Sizes_v_DB_reco_flags_.data(), + this->expanded_Sizes_v_DB_reco_flags_.size() * sizeof(uint32_t), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.expanded_flagbit_v_DB_reco_flags, + this->expanded_flagbit_v_DB_reco_flags_.data(), + this->expanded_flagbit_v_DB_reco_flags_.size() * sizeof(uint32_t), + cudaMemcpyHostToDevice, + cudaStream)); + }); + return product; +} + +TYPELOOKUP_DATA_REG(EcalRecHitParametersGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalRechitADCToGeVConstantGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalRechitADCToGeVConstantGPU.cc new file mode 100644 index 0000000000000..5f01068f95186 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalRechitADCToGeVConstantGPU.cc @@ -0,0 +1,34 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalRechitADCToGeVConstantGPU::EcalRechitADCToGeVConstantGPU(EcalADCToGeVConstant const& values) + : adc2gev_(2) // size is 2, one form EB and one for EE +{ + adc2gev_[0] = values.getEBValue(); + adc2gev_[1] = values.getEEValue(); +} + +EcalRechitADCToGeVConstantGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(adc2gev)); +} + +EcalRechitADCToGeVConstantGPU::Product const& EcalRechitADCToGeVConstantGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalRechitADCToGeVConstantGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.adc2gev, this->adc2gev_.size() * sizeof(float))); + // transfer + cudaCheck(cudaMemcpyAsync(product.adc2gev, + this->adc2gev_.data(), + this->adc2gev_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalRechitADCToGeVConstantGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalRechitChannelStatusGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalRechitChannelStatusGPU.cc new file mode 100644 index 0000000000000..1e6801fbd326a --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalRechitChannelStatusGPU.cc @@ -0,0 +1,42 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalRechitChannelStatusGPU::EcalRechitChannelStatusGPU(EcalChannelStatus const& values) : status_(values.size()) { + // fill in eb + auto const& barrelValues = values.barrelItems(); + for (unsigned int i = 0; i < barrelValues.size(); i++) { + status_[i] = barrelValues[i].getEncodedStatusCode(); + } + + // fill in ee + auto const& endcapValues = values.endcapItems(); + auto const offset = barrelValues.size(); + for (unsigned int i = 0; i < endcapValues.size(); i++) { + status_[offset + i] = endcapValues[i].getEncodedStatusCode(); + } +} + +EcalRechitChannelStatusGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(status)); +} + +EcalRechitChannelStatusGPU::Product const& EcalRechitChannelStatusGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalRechitChannelStatusGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.status, this->status_.size() * sizeof(uint16_t))); + // transfer + cudaCheck(cudaMemcpyAsync(product.status, + this->status_.data(), + this->status_.size() * sizeof(uint16_t), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalRechitChannelStatusGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalSamplesCorrelationGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalSamplesCorrelationGPU.cc new file mode 100644 index 0000000000000..2a98067f51d9e --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalSamplesCorrelationGPU.cc @@ -0,0 +1,76 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalSamplesCorrelationGPU::EcalSamplesCorrelationGPU(EcalSamplesCorrelation const& values) + : EBG12SamplesCorrelation_{values.EBG12SamplesCorrelation}, + EBG6SamplesCorrelation_{values.EBG6SamplesCorrelation}, + EBG1SamplesCorrelation_{values.EBG1SamplesCorrelation}, + EEG12SamplesCorrelation_{values.EEG12SamplesCorrelation}, + EEG6SamplesCorrelation_{values.EEG6SamplesCorrelation}, + EEG1SamplesCorrelation_{values.EEG1SamplesCorrelation} {} + +EcalSamplesCorrelationGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(EBG12SamplesCorrelation)); + cudaCheck(cudaFree(EBG6SamplesCorrelation)); + cudaCheck(cudaFree(EBG1SamplesCorrelation)); + cudaCheck(cudaFree(EEG12SamplesCorrelation)); + cudaCheck(cudaFree(EEG6SamplesCorrelation)); + cudaCheck(cudaFree(EEG1SamplesCorrelation)); +} + +EcalSamplesCorrelationGPU::Product const& EcalSamplesCorrelationGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalSamplesCorrelationGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck(cudaMalloc((void**)&product.EBG12SamplesCorrelation, + this->EBG12SamplesCorrelation_.size() * sizeof(double))); + cudaCheck( + cudaMalloc((void**)&product.EBG6SamplesCorrelation, this->EBG6SamplesCorrelation_.size() * sizeof(double))); + cudaCheck( + cudaMalloc((void**)&product.EBG1SamplesCorrelation, this->EBG1SamplesCorrelation_.size() * sizeof(double))); + cudaCheck(cudaMalloc((void**)&product.EEG12SamplesCorrelation, + this->EEG12SamplesCorrelation_.size() * sizeof(double))); + cudaCheck( + cudaMalloc((void**)&product.EEG6SamplesCorrelation, this->EEG6SamplesCorrelation_.size() * sizeof(double))); + cudaCheck( + cudaMalloc((void**)&product.EEG1SamplesCorrelation, this->EEG1SamplesCorrelation_.size() * sizeof(double))); + // transfer + cudaCheck(cudaMemcpyAsync(product.EBG12SamplesCorrelation, + this->EBG12SamplesCorrelation_.data(), + this->EBG12SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EBG6SamplesCorrelation, + this->EBG6SamplesCorrelation_.data(), + this->EBG6SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EBG1SamplesCorrelation, + this->EBG1SamplesCorrelation_.data(), + this->EBG1SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EEG12SamplesCorrelation, + this->EEG12SamplesCorrelation_.data(), + this->EEG12SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EEG6SamplesCorrelation, + this->EEG6SamplesCorrelation_.data(), + this->EEG6SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EEG1SamplesCorrelation, + this->EEG1SamplesCorrelation_.data(), + this->EEG1SamplesCorrelation_.size() * sizeof(double), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalSamplesCorrelationGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalTimeBiasCorrectionsGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalTimeBiasCorrectionsGPU.cc new file mode 100644 index 0000000000000..9ab0a6302a9c4 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalTimeBiasCorrectionsGPU.cc @@ -0,0 +1,61 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalTimeBiasCorrectionsGPU::EcalTimeBiasCorrectionsGPU(EcalTimeBiasCorrections const& values) + : EBTimeCorrAmplitudeBins_{values.EBTimeCorrAmplitudeBins}, + EBTimeCorrShiftBins_{values.EBTimeCorrShiftBins}, + EETimeCorrAmplitudeBins_{values.EETimeCorrAmplitudeBins}, + EETimeCorrShiftBins_{values.EETimeCorrShiftBins} {} + +EcalTimeBiasCorrectionsGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(EBTimeCorrAmplitudeBins)); + cudaCheck(cudaFree(EBTimeCorrShiftBins)); + cudaCheck(cudaFree(EETimeCorrAmplitudeBins)); + cudaCheck(cudaFree(EETimeCorrShiftBins)); +} + +EcalTimeBiasCorrectionsGPU::Product const& EcalTimeBiasCorrectionsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalTimeBiasCorrectionsGPU::Product& product, cudaStream_t cudaStream) { + // to get the size of vectors later on + // should be removed and host conditions' objects used directly + product.EBTimeCorrAmplitudeBinsSize = this->EBTimeCorrAmplitudeBins_.size(); + product.EETimeCorrAmplitudeBinsSize = this->EETimeCorrAmplitudeBins_.size(); + + // malloc + cudaCheck(cudaMalloc((void**)&product.EBTimeCorrAmplitudeBins, + this->EBTimeCorrAmplitudeBins_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.EBTimeCorrShiftBins, this->EBTimeCorrShiftBins_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.EETimeCorrAmplitudeBins, + this->EETimeCorrAmplitudeBins_.size() * sizeof(float))); + cudaCheck(cudaMalloc((void**)&product.EETimeCorrShiftBins, this->EETimeCorrShiftBins_.size() * sizeof(float))); + // transfer + cudaCheck(cudaMemcpyAsync(product.EBTimeCorrAmplitudeBins, + this->EBTimeCorrAmplitudeBins_.data(), + this->EBTimeCorrAmplitudeBins_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EBTimeCorrShiftBins, + this->EBTimeCorrShiftBins_.data(), + this->EBTimeCorrShiftBins_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EETimeCorrAmplitudeBins, + this->EETimeCorrAmplitudeBins_.data(), + this->EETimeCorrAmplitudeBins_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.EETimeCorrShiftBins, + this->EETimeCorrShiftBins_.data(), + this->EETimeCorrShiftBins_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalTimeBiasCorrectionsGPU); diff --git a/RecoLocalCalo/EcalRecAlgos/src/EcalTimeCalibConstantsGPU.cc b/RecoLocalCalo/EcalRecAlgos/src/EcalTimeCalibConstantsGPU.cc new file mode 100644 index 0000000000000..d724a33f1d4e1 --- /dev/null +++ b/RecoLocalCalo/EcalRecAlgos/src/EcalTimeCalibConstantsGPU.cc @@ -0,0 +1,40 @@ +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h" + +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" + +EcalTimeCalibConstantsGPU::EcalTimeCalibConstantsGPU(EcalTimeCalibConstants const& values) + : valuesEB_{values.barrelItems()}, valuesEE_{values.endcapItems()} {} + +EcalTimeCalibConstantsGPU::Product::~Product() { + // deallocation + cudaCheck(cudaFree(values)); +} + +EcalTimeCalibConstantsGPU::Product const& EcalTimeCalibConstantsGPU::getProduct(cudaStream_t cudaStream) const { + auto const& product = product_.dataForCurrentDeviceAsync( + cudaStream, [this](EcalTimeCalibConstantsGPU::Product& product, cudaStream_t cudaStream) { + // malloc + cudaCheck( + cudaMalloc((void**)&product.values, (this->valuesEB_.size() + this->valuesEE_.size()) * sizeof(float))); + + // offset in floats, not bytes + auto const offset = this->valuesEB_.size(); + + // transfer + cudaCheck(cudaMemcpyAsync(product.values, + this->valuesEB_.data(), + this->valuesEB_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + cudaCheck(cudaMemcpyAsync(product.values + offset, + this->valuesEE_.data(), + this->valuesEE_.size() * sizeof(float), + cudaMemcpyHostToDevice, + cudaStream)); + }); + + return product; +} + +TYPELOOKUP_DATA_REG(EcalTimeCalibConstantsGPU); diff --git a/RecoLocalCalo/EcalRecProducers/BuildFile.xml b/RecoLocalCalo/EcalRecProducers/BuildFile.xml index 25939a2a69b8e..aa19516964fd9 100644 --- a/RecoLocalCalo/EcalRecProducers/BuildFile.xml +++ b/RecoLocalCalo/EcalRecProducers/BuildFile.xml @@ -1,5 +1,11 @@ - + + + + + + + diff --git a/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.cu b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.cu new file mode 100644 index 0000000000000..f1b1a53a78a30 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.cu @@ -0,0 +1,486 @@ +#include +#include + +#include + +#include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h" +#include "CondFormats/EcalObjects/interface/EcalPulseShapes.h" +#include "CondFormats/EcalObjects/interface/EcalSamplesCorrelation.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/Math/interface/approx_exp.h" +#include "DataFormats/Math/interface/approx_log.h" +#include "FWCore/Utilities/interface/CMSUnrollLoop.h" + +#include "AmplitudeComputationCommonKernels.h" +#include "KernelHelpers.h" + +namespace ecal { + namespace multifit { + + /// + /// assume kernel launch configuration is + /// (MAXSAMPLES * nchannels, blocks) + /// + __global__ void kernel_prep_1d_and_initialize(EcalPulseShape const* shapes_in, + uint16_t const* digis_in_eb, + uint32_t const* dids_eb, + uint16_t const* digis_in_ee, + uint32_t const* dids_ee, + SampleVector* amplitudes, + SampleVector* amplitudesForMinimizationEB, + SampleVector* amplitudesForMinimizationEE, + SampleGainVector* gainsNoise, + float const* mean_x1, + float const* mean_x12, + float const* rms_x12, + float const* mean_x6, + float const* gain6Over1, + float const* gain12Over6, + bool* hasSwitchToGain6, + bool* hasSwitchToGain1, + bool* isSaturated, + ::ecal::reco::StorageScalarType* energiesEB, + ::ecal::reco::StorageScalarType* energiesEE, + ::ecal::reco::StorageScalarType* chi2EB, + ::ecal::reco::StorageScalarType* chi2EE, + ::ecal::reco::StorageScalarType* g_pedestalEB, + ::ecal::reco::StorageScalarType* g_pedestalEE, + uint32_t* dids_outEB, + uint32_t* dids_outEE, + uint32_t* flagsEB, + uint32_t* flagsEE, + char* acState, + BXVectorType* bxs, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + bool const gainSwitchUseMaxSampleEB, + bool const gainSwitchUseMaxSampleEE, + int const nchannels) { + constexpr bool dynamicPedestal = false; //---- default to false, ok + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + constexpr int sample_max = 5; + constexpr int full_pulse_max = 9; + int const tx = threadIdx.x + blockIdx.x * blockDim.x; + int const nchannels_per_block = blockDim.x / nsamples; + int const ch = tx / nsamples; + // for accessing input arrays + int const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch; + int const inputTx = ch >= offsetForInputs ? tx - offsetForInputs * 10 : tx; + // eb is first and then ee + auto const* digis_in = ch >= offsetForInputs ? digis_in_ee : digis_in_eb; + auto const* dids = ch >= offsetForInputs ? dids_ee : dids_eb; + int const sample = threadIdx.x % nsamples; + + // need to ref the right ptr + // macro is for clarity and safety +#define ARRANGE(var) auto* var = ch >= offsetForInputs ? var##EE : var##EB + ARRANGE(amplitudesForMinimization); + ARRANGE(energies); + ARRANGE(chi2); + ARRANGE(g_pedestal); + ARRANGE(dids_out); + ARRANGE(flags); +#undef ARRANGE + + if (ch < nchannels) { + // array of 10 x channels per block + // TODO: any other way of doing simple reduction + // assume bool is 1 byte, should be quite safe + extern __shared__ char shared_mem[]; + bool* shr_hasSwitchToGain6 = reinterpret_cast(shared_mem); + bool* shr_hasSwitchToGain1 = shr_hasSwitchToGain6 + nchannels_per_block * nsamples; + bool* shr_hasSwitchToGain0 = shr_hasSwitchToGain1 + nchannels_per_block * nsamples; + bool* shr_isSaturated = shr_hasSwitchToGain0 + nchannels_per_block * nsamples; + bool* shr_hasSwitchToGain0_tmp = shr_isSaturated + nchannels_per_block * nsamples; + char* shr_counts = reinterpret_cast(shr_hasSwitchToGain0_tmp) + nchannels_per_block * nsamples; + + // + // indices + // + auto const did = DetId{dids[inputCh]}; + auto const isBarrel = did.subdetId() == EcalBarrel; + // TODO offset for ee, 0 for eb + auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId()); + + // + // pulse shape template + + // will be used in the future for setting state + auto const rmsForChecking = rms_x12[hashedId]; + + // + // amplitudes + // + int const adc = ecal::mgpa::adc(digis_in[inputTx]); + int const gainId = ecal::mgpa::gainId(digis_in[inputTx]); + SampleVector::Scalar amplitude = 0.; + SampleVector::Scalar pedestal = 0.; + SampleVector::Scalar gainratio = 0.; + + // store into shared mem for initialization + shr_hasSwitchToGain6[threadIdx.x] = gainId == EcalMgpaBitwiseGain6; + shr_hasSwitchToGain1[threadIdx.x] = gainId == EcalMgpaBitwiseGain1; + shr_hasSwitchToGain0_tmp[threadIdx.x] = gainId == EcalMgpaBitwiseGain0; + shr_hasSwitchToGain0[threadIdx.x] = shr_hasSwitchToGain0_tmp[threadIdx.x]; + shr_counts[threadIdx.x] = 0; + __syncthreads(); + + // non-divergent branch (except for the last 4 threads) + if (threadIdx.x <= blockDim.x - 5) { + CMS_UNROLL_LOOP + for (int i = 0; i < 5; i++) + shr_counts[threadIdx.x] += shr_hasSwitchToGain0[threadIdx.x + i]; + } + shr_isSaturated[threadIdx.x] = shr_counts[threadIdx.x] == 5; + + // + // unrolled reductions + // + if (sample < 5) { + shr_hasSwitchToGain6[threadIdx.x] = + shr_hasSwitchToGain6[threadIdx.x] || shr_hasSwitchToGain6[threadIdx.x + 5]; + shr_hasSwitchToGain1[threadIdx.x] = + shr_hasSwitchToGain1[threadIdx.x] || shr_hasSwitchToGain1[threadIdx.x + 5]; + + // duplication of hasSwitchToGain0 in order not to + // introduce another syncthreads + shr_hasSwitchToGain0_tmp[threadIdx.x] = + shr_hasSwitchToGain0_tmp[threadIdx.x] || shr_hasSwitchToGain0_tmp[threadIdx.x + 5]; + } + __syncthreads(); + + if (sample < 2) { + // note, both threads per channel take value [3] twice to avoid another if + shr_hasSwitchToGain6[threadIdx.x] = shr_hasSwitchToGain6[threadIdx.x] || + shr_hasSwitchToGain6[threadIdx.x + 2] || + shr_hasSwitchToGain6[threadIdx.x + 3]; + shr_hasSwitchToGain1[threadIdx.x] = shr_hasSwitchToGain1[threadIdx.x] || + shr_hasSwitchToGain1[threadIdx.x + 2] || + shr_hasSwitchToGain1[threadIdx.x + 3]; + + shr_hasSwitchToGain0_tmp[threadIdx.x] = shr_hasSwitchToGain0_tmp[threadIdx.x] || + shr_hasSwitchToGain0_tmp[threadIdx.x + 2] || + shr_hasSwitchToGain0_tmp[threadIdx.x + 3]; + + // sample < 2 -> first 2 threads of each channel will be used here + // => 0 -> will compare 3 and 4 and put into 0 + // => 1 -> will compare 4 and 5 and put into 1 + shr_isSaturated[threadIdx.x] = shr_isSaturated[threadIdx.x + 3] || shr_isSaturated[threadIdx.x + 4]; + } + __syncthreads(); + + bool check_hasSwitchToGain0 = false; + + if (sample == 0) { + shr_hasSwitchToGain6[threadIdx.x] = + shr_hasSwitchToGain6[threadIdx.x] || shr_hasSwitchToGain6[threadIdx.x + 1]; + shr_hasSwitchToGain1[threadIdx.x] = + shr_hasSwitchToGain1[threadIdx.x] || shr_hasSwitchToGain1[threadIdx.x + 1]; + shr_hasSwitchToGain0_tmp[threadIdx.x] = + shr_hasSwitchToGain0_tmp[threadIdx.x] || shr_hasSwitchToGain0_tmp[threadIdx.x + 1]; + + hasSwitchToGain6[ch] = shr_hasSwitchToGain6[threadIdx.x]; + hasSwitchToGain1[ch] = shr_hasSwitchToGain1[threadIdx.x]; + + // set only for the threadIdx.x corresponding to sample==0 + check_hasSwitchToGain0 = shr_hasSwitchToGain0_tmp[threadIdx.x]; + + shr_isSaturated[threadIdx.x + 3] = shr_isSaturated[threadIdx.x] || shr_isSaturated[threadIdx.x + 1]; + isSaturated[ch] = shr_isSaturated[threadIdx.x + 3]; + } + + // TODO: w/o this sync, there is a race + // if (threadIdx == sample_max) below uses max sample thread, not for 0 sample + // check if we can remove it + __syncthreads(); + + // TODO: divergent branch + if (gainId == 0 || gainId == 3) { + pedestal = mean_x1[hashedId]; + gainratio = gain6Over1[hashedId] * gain12Over6[hashedId]; + gainsNoise[ch](sample) = 2; + } else if (gainId == 1) { + pedestal = mean_x12[hashedId]; + gainratio = 1.; + gainsNoise[ch](sample) = 0; + } else if (gainId == 2) { + pedestal = mean_x6[hashedId]; + gainratio = gain12Over6[hashedId]; + gainsNoise[ch](sample) = 1; + } + + // TODO: compile time constant -> branch should be non-divergent + if (dynamicPedestal) + amplitude = static_cast(adc) * gainratio; + else + amplitude = (static_cast(adc) - pedestal) * gainratio; + amplitudes[ch][sample] = amplitude; + +#ifdef ECAL_RECO_CUDA_DEBUG + printf("%d %d %d %d %f %f %f\n", tx, ch, sample, adc, amplitude, pedestal, gainratio); + if (adc == 0) + printf("adc is zero\n"); +#endif + + // + // initialization + // + amplitudesForMinimization[inputCh](sample) = 0; + bxs[ch](sample) = sample - 5; + + // select the thread for the max sample + //---> hardcoded above to be 5th sample, ok + if (sample == sample_max) { + // + // initialization + // + acState[ch] = static_cast(MinimizationState::NotFinished); + energies[inputCh] = 0; + chi2[inputCh] = 0; + g_pedestal[inputCh] = 0; + uint32_t flag = 0; + dids_out[inputCh] = did.rawId(); + + // start of this channel in shared mem + int const chStart = threadIdx.x - sample_max; + // thread for the max sample in shared mem + int const threadMax = threadIdx.x; + auto const gainSwitchUseMaxSample = isBarrel ? gainSwitchUseMaxSampleEB : gainSwitchUseMaxSampleEE; + + // this flag setting is applied to all of the cases + if (shr_hasSwitchToGain6[chStart]) + flag |= 0x1 << EcalUncalibratedRecHit::kHasSwitchToGain6; + if (shr_hasSwitchToGain1[chStart]) + flag |= 0x1 << EcalUncalibratedRecHit::kHasSwitchToGain1; + + // this corresponds to cpu branching on lastSampleBeforeSaturation + // likely false + if (check_hasSwitchToGain0) { + // assign for the case some sample having gainId == 0 + //energies[inputCh] = amplitudes[ch][sample_max]; + energies[inputCh] = amplitude; + + // check if samples before sample_max have true + bool saturated_before_max = false; + CMS_UNROLL_LOOP + for (char ii = 0; ii < 5; ii++) + saturated_before_max = saturated_before_max || shr_hasSwitchToGain0[chStart + ii]; + + // if saturation is in the max sample and not in the first 5 + if (!saturated_before_max && shr_hasSwitchToGain0[threadMax]) + energies[inputCh] = 49140; // 4095 * 12 (maximum ADC range * MultiGainPreAmplifier (MGPA) gain) + // This is the actual maximum range that is set when we saturate. + //---- AM FIXME : no pedestal subtraction??? + //It should be "(4095. - pedestal) * gainratio" + + // set state flag to terminate further processing of this channel + acState[ch] = static_cast(MinimizationState::Precomputed); + flag |= 0x1 << EcalUncalibratedRecHit::kSaturated; + flags[inputCh] = flag; + return; + } + + // according to cpu version + // auto max_amplitude = amplitudes[ch][sample_max]; + auto const max_amplitude = amplitude; + // according to cpu version + auto shape_value = shapes_in[hashedId].pdfval[full_pulse_max - 7]; + // note, no syncing as the same thread will be accessing here + bool hasGainSwitch = + shr_hasSwitchToGain6[chStart] || shr_hasSwitchToGain1[chStart] || shr_isSaturated[chStart + 3]; + + // pedestal is final unconditionally + g_pedestal[inputCh] = pedestal; + if (hasGainSwitch && gainSwitchUseMaxSample) { + // thread for sample=0 will access the right guys + energies[inputCh] = max_amplitude / shape_value; + acState[ch] = static_cast(MinimizationState::Precomputed); + flags[inputCh] = flag; + return; + } + + // this happens cause sometimes rms_x12 is 0... + // needs to be checkec why this is the case + // general case here is that noisecov is a Zero matrix + if (rmsForChecking == 0) { + acState[ch] = static_cast(MinimizationState::Precomputed); + flags[inputCh] = flag; + return; + } + + // for the case when no shortcuts were taken + flags[inputCh] = flag; + } + } + } + + /// + /// assume kernel launch configuration is + /// ([MAXSAMPLES, MAXSAMPLES], nchannels) + /// + __global__ void kernel_prep_2d(SampleGainVector const* gainNoise, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + float const* rms_x12, + float const* rms_x6, + float const* rms_x1, + float const* gain12Over6, + float const* gain6Over1, + double const* G12SamplesCorrelationEB, + double const* G6SamplesCorrelationEB, + double const* G1SamplesCorrelationEB, + double const* G12SamplesCorrelationEE, + double const* G6SamplesCorrelationEE, + double const* G1SamplesCorrelationEE, + SampleMatrix* noisecov, + PulseMatrixType* pulse_matrix, + EcalPulseShape const* pulse_shape, + bool const* hasSwitchToGain6, + bool const* hasSwitchToGain1, + bool const* isSaturated, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs) { + int const ch = blockIdx.x; + int const tx = threadIdx.x; + int const ty = threadIdx.y; + constexpr float addPedestalUncertainty = 0.f; + constexpr bool dynamicPedestal = false; + constexpr bool simplifiedNoiseModelForGainSwitch = true; //---- default is true + + // to access input arrays (ids and digis only) + int const inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch; + auto const* dids = ch >= offsetForInputs ? dids_ee : dids_eb; + + bool tmp0 = hasSwitchToGain6[ch]; + bool tmp1 = hasSwitchToGain1[ch]; + auto const did = DetId{dids[inputCh]}; + auto const isBarrel = did.subdetId() == EcalBarrel; + auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId()); + auto const G12SamplesCorrelation = isBarrel ? G12SamplesCorrelationEB : G12SamplesCorrelationEE; + auto const* G6SamplesCorrelation = isBarrel ? G6SamplesCorrelationEB : G6SamplesCorrelationEE; + auto const* G1SamplesCorrelation = isBarrel ? G1SamplesCorrelationEB : G1SamplesCorrelationEE; + bool tmp2 = isSaturated[ch]; + bool hasGainSwitch = tmp0 || tmp1 || tmp2; + auto const vidx = std::abs(ty - tx); + + // non-divergent branch for all threads per block + if (hasGainSwitch) { + // TODO: did not include simplified noise model + float noise_value = 0; + + // non-divergent branch - all threads per block + // TODO: all of these constants indicate that + // that these parts could be splitted into completely different + // kernels and run one of them only depending on the config + if (simplifiedNoiseModelForGainSwitch) { + int isample_max = 5; // according to cpu defs + int gainidx = gainNoise[ch][isample_max]; + + // non-divergent branches + if (gainidx == 0) + noise_value = rms_x12[hashedId] * rms_x12[hashedId] * G12SamplesCorrelation[vidx]; + if (gainidx == 1) + noise_value = gain12Over6[hashedId] * gain12Over6[hashedId] * rms_x6[hashedId] * rms_x6[hashedId] * + G6SamplesCorrelation[vidx]; + if (gainidx == 2) + noise_value = gain12Over6[hashedId] * gain12Over6[hashedId] * gain6Over1[hashedId] * gain6Over1[hashedId] * + rms_x1[hashedId] * rms_x1[hashedId] * G1SamplesCorrelation[vidx]; + if (!dynamicPedestal && addPedestalUncertainty > 0.f) + noise_value += addPedestalUncertainty * addPedestalUncertainty; + } else { + int gainidx = 0; + char mask = gainidx; + int pedestal = gainNoise[ch][ty] == mask ? 1 : 0; + // NB: gainratio is 1, that is why it does not appear in the formula + noise_value += rms_x12[hashedId] * rms_x12[hashedId] * pedestal * G12SamplesCorrelation[vidx]; + // non-divergent branch + if (!dynamicPedestal && addPedestalUncertainty > 0.f) { + noise_value += addPedestalUncertainty * addPedestalUncertainty * pedestal; // gainratio is 1 + } + + // + gainidx = 1; + mask = gainidx; + pedestal = gainNoise[ch][ty] == mask ? 1 : 0; + noise_value += gain12Over6[hashedId] * gain12Over6[hashedId] * rms_x6[hashedId] * rms_x6[hashedId] * + pedestal * G6SamplesCorrelation[vidx]; + // non-divergent branch + if (!dynamicPedestal && addPedestalUncertainty > 0.f) { + noise_value += gain12Over6[hashedId] * gain12Over6[hashedId] * addPedestalUncertainty * + addPedestalUncertainty * pedestal; + } + + // + gainidx = 2; + mask = gainidx; + pedestal = gainNoise[ch][ty] == mask ? 1 : 0; + float tmp = gain6Over1[hashedId] * gain12Over6[hashedId]; + noise_value += tmp * tmp * rms_x1[hashedId] * rms_x1[hashedId] * pedestal * G1SamplesCorrelation[vidx]; + // non-divergent branch + if (!dynamicPedestal && addPedestalUncertainty > 0.f) { + noise_value += tmp * tmp * addPedestalUncertainty * addPedestalUncertainty * pedestal; + } + } + + noisecov[ch](ty, tx) = noise_value; + } else { + auto rms = rms_x12[hashedId]; + float noise_value = rms * rms * G12SamplesCorrelation[vidx]; + if (!dynamicPedestal && addPedestalUncertainty > 0.f) { + //---- add fully correlated component to noise covariance to inflate pedestal uncertainty + noise_value += addPedestalUncertainty * addPedestalUncertainty; + } + noisecov[ch](ty, tx) = noise_value; + } + + // pulse matrix + int const posToAccess = 9 - tx + ty; // see cpu for reference + float const value = posToAccess >= 7 ? pulse_shape[hashedId].pdfval[posToAccess - 7] : 0; + pulse_matrix[ch](ty, tx) = value; + } + + __global__ void kernel_permute_results(SampleVector* amplitudes, + BXVectorType const* activeBXs, + ::ecal::reco::StorageScalarType* energies, + char const* acState, + int const nchannels) { + // constants + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + int const tx = threadIdx.x + blockIdx.x * blockDim.x; + int const ch = tx / nsamples; + int const sampleidx = tx % nsamples; // this is to address activeBXs + + if (ch >= nchannels) + return; + + // channels that have amplitude precomputed do not need results to be permuted + auto const state = static_cast(acState[ch]); + if (state == MinimizationState::Precomputed) + return; + + // configure shared memory and cp into it + extern __shared__ char smem[]; + SampleVector::Scalar* values = reinterpret_cast(smem); + values[threadIdx.x] = amplitudes[ch](sampleidx); + __syncthreads(); + + // get the sample for this bx + auto const sample = static_cast(activeBXs[ch](sampleidx)) + 5; + + // store back to global + amplitudes[ch](sample) = values[threadIdx.x]; + + // store sample 5 separately + // only for the case when minimization was performed + // not for cases with precomputed amplitudes + if (sample == 5) + energies[ch] = values[threadIdx.x]; + } + + } // namespace multifit +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.h new file mode 100644 index 0000000000000..479c623e83f62 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationCommonKernels.h @@ -0,0 +1,104 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationCommonKernels_h +#define RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationCommonKernels_h + +#include "Common.h" +#include "DeclsForKernels.h" +#include "EigenMatrixTypes_gpu.h" + +class EcalPulseShape; +// this flag setting is applied to all of the cases +class EcalPulseCovariance; +class EcalUncalibratedRecHit; + +namespace ecal { + namespace multifit { + + /// + /// assume kernel launch configuration is + /// (MAXSAMPLES * nchannels, blocks) + /// TODO: is there a point to split this kernel further to separate reductions + /// + __global__ void kernel_prep_1d_and_initialize(EcalPulseShape const* shapes_in, + uint16_t const* digis_in_eb, + uint32_t const* dids_eb, + uint16_t const* digis_in_ee, + uint32_t const* dids_ee, + SampleVector* amplitudes, + SampleVector* amplitudesForMinimizationEB, + SampleVector* amplitudesForMinimizationEE, + SampleGainVector* gainsNoise, + float const* mean_x1, + float const* mean_x12, + float const* rms_x12, + float const* mean_x6, + float const* gain6Over1, + float const* gain12Over6, + bool* hasSwitchToGain6, + bool* hasSwitchToGain1, + bool* isSaturated, + ::ecal::reco::StorageScalarType* energiesEB, + ::ecal::reco::StorageScalarType* energiesEE, + ::ecal::reco::StorageScalarType* chi2EB, + ::ecal::reco::StorageScalarType* chi2EE, + ::ecal::reco::StorageScalarType* pedestalEB, + ::ecal::reco::StorageScalarType* pedestalEE, + uint32_t* dids_outEB, + uint32_t* dids_outEE, + uint32_t* flagsEB, + uint32_t* flagsEE, + char* acState, + BXVectorType* bxs, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + bool const gainSwitchUseMaxSampleEB, + bool const gainSwitchUseMaxSampleEE, + int const nchannels); + + /// + /// assume kernel launch configuration is + /// ([MAXSAMPLES, MAXSAMPLES], nchannels) + /// + __global__ void kernel_prep_2d(SampleGainVector const* gainNoise, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + float const* rms_x12, + float const* rms_x6, + float const* rms_x1, + float const* gain12Over6, + float const* gain6Over1, + double const* G12SamplesCorrelationEB, + double const* G6SamplesCorrelationEB, + double const* G1SamplesCorrelationEB, + double const* G12SamplesCorrelationEE, + double const* G6SamplesCorrelationEE, + double const* G1SamplesCorrelationEE, + SampleMatrix* noisecov, + PulseMatrixType* pulse_matrix, + EcalPulseShape const* pulse_shape, + bool const* hasSwitchToGain6, + bool const* hasSwitchToGain1, + bool const* isSaturated, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs); + + __global__ void kernel_permute_results(SampleVector* amplitudes, + BXVectorType const* activeBXs, + ::ecal::reco::StorageScalarType* energies, + char const* acState, + int const nchannels); + +/// +/// Build an Ecal RecHit. +/// TODO: Use SoA data structures on the host directly +/// the reason for removing this from minimize kernel is to isolate the minimize + +/// again, building an aos rec hit involves strides... -> bad memory access pattern +/// +#ifdef RUN_BUILD_AOS_RECHIT + __global__ void kernel_build_rechit( + float const* energies, float const* chi2s, uint32_t* dids, EcalUncalibratedRecHit* rechits, int nchannels); +#endif // RUN_BUILD_AOS_RECHIT + + } // namespace multifit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationCommonKernels_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu new file mode 100644 index 0000000000000..e5eff86d15ec7 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.cu @@ -0,0 +1,305 @@ +#include +#include + +#include + +#include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h" +#include "CondFormats/EcalObjects/interface/EcalPulseShapes.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/Math/interface/approx_exp.h" +#include "DataFormats/Math/interface/approx_log.h" +#include "FWCore/Utilities/interface/CMSUnrollLoop.h" + +#include "AmplitudeComputationCommonKernels.h" +#include "AmplitudeComputationKernels.h" +#include "KernelHelpers.h" + +namespace ecal { + namespace multifit { + + template + __device__ __forceinline__ bool update_covariance(EcalPulseCovariance const& pulse_covariance, + MatrixType& inverse_cov, + SampleVector const& amplitudes) { + constexpr int nsamples = SampleVector::RowsAtCompileTime; + constexpr int npulses = BXVectorType::RowsAtCompileTime; + + CMS_UNROLL_LOOP + for (unsigned int ipulse = 0; ipulse < npulses; ipulse++) { + auto const amplitude = amplitudes.coeff(ipulse); + if (amplitude == 0) + continue; + + // FIXME: ipulse - 5 -> ipulse - firstOffset + int bx = ipulse - 5; + int first_sample_t = std::max(0, bx + 3); + int offset = -3 - bx; + + auto const value_sq = amplitude * amplitude; + + for (int col = first_sample_t; col < nsamples; col++) { + for (int row = col; row < nsamples; row++) { + inverse_cov(row, col) += value_sq * __ldg(&pulse_covariance.covval[row + offset][col + offset]); + } + } + } + + return true; + } + + /// + /// launch ctx parameters are (nchannels / block, blocks) + /// TODO: trivial impl for now, there must be a way to improve + /// + /// Conventions: + /// - amplitudes -> solution vector, what we are fitting for + /// - samples -> raw detector responses + /// - passive constraint - satisfied constraint + /// - active constraint - unsatisfied (yet) constraint + /// + __global__ void kernel_minimize(uint32_t const* dids_eb, + uint32_t const* dids_ee, + SampleMatrix const* __restrict__ noisecov, + EcalPulseCovariance const* __restrict__ pulse_covariance, + BXVectorType* bxs, + SampleVector const* __restrict__ samples, + SampleVector* amplitudesEB, + SampleVector* amplitudesEE, + PulseMatrixType const* __restrict__ pulse_matrix, + ::ecal::reco::StorageScalarType* chi2sEB, + ::ecal::reco::StorageScalarType* chi2sEE, + ::ecal::reco::StorageScalarType* energiesEB, + ::ecal::reco::StorageScalarType* energiesEE, + char* acState, + int nchannels, + int max_iterations, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs) { + // FIXME: ecal has 10 samples and 10 pulses.... + // but this needs to be properly treated and renamed everywhere + constexpr auto NSAMPLES = SampleMatrix::RowsAtCompileTime; + constexpr auto NPULSES = SampleMatrix::ColsAtCompileTime; + static_assert(NSAMPLES == NPULSES); + + using DataType = SampleVector::Scalar; + + extern __shared__ char shrmem[]; + DataType* shrMatrixLForFnnlsStorage = + reinterpret_cast(shrmem) + calo::multifit::MapSymM::total * threadIdx.x; + DataType* shrAtAStorage = reinterpret_cast(shrmem) + + calo::multifit::MapSymM::total * (threadIdx.x + blockDim.x); + + // channel + int idx = threadIdx.x + blockDim.x * blockIdx.x; + +// ref the right ptr +#define ARRANGE(var) auto* var = idx >= offsetForInputs ? var##EE : var##EB + ARRANGE(amplitudes); + ARRANGE(chi2s); + ARRANGE(energies); +#undef ARRANGE + + if (idx < nchannels) { + if (static_cast(acState[idx]) == MinimizationState::Precomputed) + return; + + // get the hash + int const inputCh = idx >= offsetForInputs ? idx - offsetForInputs : idx; + auto const* dids = idx >= offsetForInputs ? dids_ee : dids_eb; + auto const did = DetId{dids[inputCh]}; + auto const isBarrel = did.subdetId() == EcalBarrel; + auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId()); + + // inits + int iter = 0; + int npassive = 0; + + calo::multifit::ColumnVector pulseOffsets; + CMS_UNROLL_LOOP + for (int i = 0; i < NPULSES; ++i) + pulseOffsets(i) = i; + + calo::multifit::ColumnVector resultAmplitudes; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NPULSES; counter++) + resultAmplitudes(counter) = 0; + + // inits + //SampleDecompLLT covariance_decomposition; + //SampleMatrix inverse_cov; + // SampleVector::Scalar chi2 = 0, chi2_now = 0; + float chi2 = 0, chi2_now = 0; + + // loop until ocnverge + while (true) { + if (iter >= max_iterations) + break; + + //inverse_cov = noisecov[idx]; + //DataType covMatrixStorage[MapSymM::total]; + DataType* covMatrixStorage = shrMatrixLForFnnlsStorage; + calo::multifit::MapSymM covMatrix{covMatrixStorage}; + int counter = 0; + CMS_UNROLL_LOOP + for (int col = 0; col < NSAMPLES; col++) { + CMS_UNROLL_LOOP + for (int row = col; row < NSAMPLES; row++) + covMatrixStorage[counter++] = __ldg(&noisecov[idx].coeffRef(row, col)); + } + update_covariance(pulse_covariance[hashedId], covMatrix, resultAmplitudes); + + // compute actual covariance decomposition + //covariance_decomposition.compute(inverse_cov); + //auto const& matrixL = covariance_decomposition.matrixL(); + DataType matrixLStorage[calo::multifit::MapSymM::total]; + calo::multifit::MapSymM matrixL{matrixLStorage}; + calo::multifit::compute_decomposition_unrolled(matrixL, covMatrix); + + // L * A = P + calo::multifit::ColMajorMatrix A; + calo::multifit::solve_forward_subst_matrix(A, pulse_matrix[idx], matrixL); + + // L b = s + float reg_b[NSAMPLES]; + calo::multifit::solve_forward_subst_vector(reg_b, samples[idx], matrixL); + + // FIXME: shared mem + //DataType AtAStorage[MapSymM::total]; + calo::multifit::MapSymM AtA{shrAtAStorage}; + //SampleMatrix AtA; + SampleVector Atb; + CMS_UNROLL_LOOP + for (int icol = 0; icol < NPULSES; icol++) { + float reg_ai[NSAMPLES]; + + // load column icol + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + reg_ai[counter] = A(counter, icol); + + // compute diagoanl + float sum = 0.f; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum += reg_ai[counter] * reg_ai[counter]; + + // store + AtA(icol, icol) = sum; + + // go thru the other columns + CMS_UNROLL_LOOP + for (int j = icol + 1; j < NPULSES; j++) { + // load column j + float reg_aj[NSAMPLES]; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + reg_aj[counter] = A(counter, j); + + // accum + float sum = 0.f; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum += reg_aj[counter] * reg_ai[counter]; + + // store + //AtA(icol, j) = sum; + AtA(j, icol) = sum; + } + + // Atb accum + float sum_atb = 0.f; + CMS_UNROLL_LOOP + for (int counter = 0; counter < NSAMPLES; counter++) + sum_atb += reg_ai[counter] * reg_b[counter]; + + // store atb + Atb(icol) = sum_atb; + } + + // FIXME: shared mem + //DataType matrixLForFnnlsStorage[MapSymM::total]; + calo::multifit::MapSymM matrixLForFnnls{shrMatrixLForFnnlsStorage}; + + calo::multifit::fnnls(AtA, + Atb, + //amplitudes[idx], + resultAmplitudes, + npassive, + pulseOffsets, + matrixLForFnnls, + 1e-11, + 500, + 16, + 2); + + calo::multifit::calculateChiSq(matrixL, pulse_matrix[idx], resultAmplitudes, samples[idx], chi2_now); + + auto deltachi2 = chi2_now - chi2; + chi2 = chi2_now; + + if (std::abs(deltachi2) < 1e-3) + break; + + //---- AM: TEST + //---- it was 3 lines above, now here as in the CPU version + ++iter; + } + + // store to global output values + // FIXME: amplitudes are used in global directly + chi2s[inputCh] = chi2; + energies[inputCh] = resultAmplitudes(5); + + CMS_UNROLL_LOOP + for (int counter = 0; counter < NPULSES; counter++) + amplitudes[inputCh](counter) = resultAmplitudes(counter); + } + } + + namespace v1 { + + void minimization_procedure(EventInputDataGPU const& eventInputGPU, + EventOutputDataGPU& eventOutputGPU, + EventDataForScratchGPU& scratch, + ConditionsProducts const& conditions, + ConfigurationParameters const& configParameters, + cudaStream_t cudaStream) { + using DataType = SampleVector::Scalar; + unsigned int totalChannels = eventInputGPU.ebDigis.size + eventInputGPU.eeDigis.size; + // unsigned int threads_min = conf.threads.x; + // TODO: configure from python + unsigned int threads_min = configParameters.kernelMinimizeThreads[0]; + unsigned int blocks_min = threads_min > totalChannels ? 1 : (totalChannels + threads_min - 1) / threads_min; + uint32_t const offsetForHashes = conditions.offsetForHashes; + uint32_t const offsetForInputs = eventInputGPU.ebDigis.size; + auto const nbytesShared = 2 * threads_min * + calo::multifit::MapSymM::total * + sizeof(DataType); + kernel_minimize<<>>( + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.ids.get(), + (SampleMatrix*)scratch.noisecov.get(), + conditions.pulseCovariances.values, + (BXVectorType*)scratch.activeBXs.get(), + (SampleVector*)scratch.samples.get(), + (SampleVector*)eventOutputGPU.recHitsEB.amplitudesAll.get(), + (SampleVector*)eventOutputGPU.recHitsEE.amplitudesAll.get(), + (PulseMatrixType*)scratch.pulse_matrix.get(), + eventOutputGPU.recHitsEB.chi2.get(), + eventOutputGPU.recHitsEE.chi2.get(), + eventOutputGPU.recHitsEB.amplitude.get(), + eventOutputGPU.recHitsEE.amplitude.get(), + scratch.acState.get(), + totalChannels, + 50, + offsetForHashes, + offsetForInputs); + cudaCheck(cudaGetLastError()); + } + + } // namespace v1 + + } // namespace multifit +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.h new file mode 100644 index 0000000000000..b8202f75b653b --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/AmplitudeComputationKernels.h @@ -0,0 +1,29 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationKernels_h +#define RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationKernels_h + +#include "Common.h" +#include "DeclsForKernels.h" +#include "EigenMatrixTypes_gpu.h" + +class EcalPulseShape; +class EcalPulseCovariance; +class EcalUncalibratedRecHit; + +namespace ecal { + namespace multifit { + + namespace v1 { + + void minimization_procedure(EventInputDataGPU const& eventInputGPU, + EventOutputDataGPU& eventOutputGPU, + EventDataForScratchGPU& scratch, + ConditionsProducts const& conditions, + ConfigurationParameters const& configParameters, + cudaStream_t cudaStream); + + } + + } // namespace multifit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_AmplitudeComputationKernels_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/BuildFile.xml b/RecoLocalCalo/EcalRecProducers/plugins/BuildFile.xml index 25fe21603e864..61eed4689fd20 100644 --- a/RecoLocalCalo/EcalRecProducers/plugins/BuildFile.xml +++ b/RecoLocalCalo/EcalRecProducers/plugins/BuildFile.xml @@ -1,18 +1,22 @@ - - - - - - + + + + + + - - - + + + + + + - + - + + diff --git a/RecoLocalCalo/EcalRecProducers/plugins/Common.h b/RecoLocalCalo/EcalRecProducers/plugins/Common.h new file mode 100644 index 0000000000000..55f5f613ed356 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/Common.h @@ -0,0 +1,17 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_Common_h +#define RecoLocalCalo_EcalRecProducers_plugins_Common_h + +// a workaround for std::abs not being a constexpr function +namespace ecal { + + // temporary + namespace mgpa { + + constexpr int adc(uint16_t sample) { return sample & 0xfff; } + constexpr int gainId(uint16_t sample) { return (sample >> 12) & 0x3; } + + } // namespace mgpa + +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_Common_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h new file mode 100644 index 0000000000000..cac63b6b30112 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/DeclsForKernels.h @@ -0,0 +1,325 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_DeclsForKernels_h +#define RecoLocalCalo_EcalRecProducers_plugins_DeclsForKernels_h + +#include + +#include +#include + +#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h" +#include "CondFormats/EcalObjects/interface/EcalChannelStatus.h" +#include "CondFormats/EcalObjects/interface/EcalChannelStatusCode.h" +#include "CondFormats/EcalObjects/interface/EcalGainRatios.h" +#include "CondFormats/EcalObjects/interface/EcalPedestals.h" +#include "CondFormats/EcalObjects/interface/EcalTimeBiasCorrections.h" +#include "CondFormats/EcalObjects/interface/EcalTimeOffsetConstant.h" +#include "CondFormats/EcalObjects/interface/EcalWeightSet.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h" + +#include "EigenMatrixTypes_gpu.h" + +struct EcalPulseShape; +class EcalSampleMask; +class EcalTimeBiasCorrections; +struct EcalPulseCovariance; +class EcalDigiCollection; +class EcalXtalGroupId; +class EcalSamplesCorrelation; +class EBDigiCollection; +class EEDigiCollection; + +namespace ecal { + namespace multifit { + + enum class TimeComputationState : char { NotFinished = 0, Finished = 1 }; + enum class MinimizationState : char { + NotFinished = 0, + Finished = 1, + Precomputed = 2, + }; + + // + struct EventInputDataGPU { + ecal::DigisCollection const& ebDigis; + ecal::DigisCollection const& eeDigis; + }; + + // parameters have a fixed type + // Can we go by with single precision + struct ConfigurationParameters { + using type = double; + // device ptrs + type *amplitudeFitParametersEB = nullptr, *amplitudeFitParametersEE = nullptr; + + uint32_t timeFitParametersSizeEB, timeFitParametersSizeEE; + // device ptrs + type *timeFitParametersEB = nullptr, *timeFitParametersEE = nullptr; + + type timeFitLimitsFirstEB, timeFitLimitsFirstEE; + type timeFitLimitsSecondEB, timeFitLimitsSecondEE; + + type timeConstantTermEB, timeConstantTermEE; + + type timeNconstEB, timeNconstEE; + + type amplitudeThreshEE, amplitudeThreshEB; + + type outOfTimeThreshG12pEB, outOfTimeThreshG12mEB; + type outOfTimeThreshG12pEE, outOfTimeThreshG12mEE; + type outOfTimeThreshG61pEE, outOfTimeThreshG61mEE; + type outOfTimeThreshG61pEB, outOfTimeThreshG61mEB; + + std::array kernelMinimizeThreads; + + bool shouldRunTimingComputation; + + uint32_t maxNumberHitsEB; + uint32_t maxNumberHitsEE; + }; + + struct EventOutputDataGPU { + UncalibratedRecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE; + + void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + auto const sizeEB = configParameters.maxNumberHitsEB; + recHitsEB.amplitudesAll = cms::cuda::make_device_unique( + sizeEB * EcalDataFrame::MAXSAMPLES, cudaStream); + recHitsEB.amplitude = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.chi2 = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.pedestal = cms::cuda::make_device_unique(sizeEB, cudaStream); + + if (configParameters.shouldRunTimingComputation) { + recHitsEB.jitter = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.jitterError = cms::cuda::make_device_unique(sizeEB, cudaStream); + } + + recHitsEB.did = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.flags = cms::cuda::make_device_unique(sizeEB, cudaStream); + + auto const sizeEE = configParameters.maxNumberHitsEE; + recHitsEE.amplitudesAll = cms::cuda::make_device_unique( + sizeEE * EcalDataFrame::MAXSAMPLES, cudaStream); + recHitsEE.amplitude = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.chi2 = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.pedestal = cms::cuda::make_device_unique(sizeEE, cudaStream); + + if (configParameters.shouldRunTimingComputation) { + recHitsEE.jitter = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.jitterError = cms::cuda::make_device_unique(sizeEE, cudaStream); + } + + recHitsEE.did = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.flags = cms::cuda::make_device_unique(sizeEE, cudaStream); + } + }; + + template + constexpr auto getLength() -> uint32_t { + return EigenM::RowsAtCompileTime * EigenM::ColsAtCompileTime; + } + + struct EventDataForScratchGPU { + using SVT = SampleVector::Scalar; + using SGVT = SampleGainVector::Scalar; + using SMT = SampleMatrix::Scalar; + using PMT = PulseMatrixType::Scalar; + using BXVT = BXVectorType::Scalar; + + cms::cuda::device::unique_ptr samples; + cms::cuda::device::unique_ptr gainsNoise; + + cms::cuda::device::unique_ptr noisecov; + cms::cuda::device::unique_ptr pulse_matrix; + cms::cuda::device::unique_ptr activeBXs; + cms::cuda::device::unique_ptr acState; + + cms::cuda::device::unique_ptr hasSwitchToGain6, hasSwitchToGain1, isSaturated; + + cms::cuda::device::unique_ptr sample_values, sample_value_errors; + cms::cuda::device::unique_ptr useless_sample_values; + cms::cuda::device::unique_ptr chi2sNullHypot; + cms::cuda::device::unique_ptr sum0sNullHypot; + cms::cuda::device::unique_ptr sumAAsNullHypot; + cms::cuda::device::unique_ptr pedestal_nums; + cms::cuda::device::unique_ptr tMaxAlphaBetas, tMaxErrorAlphaBetas; + cms::cuda::device::unique_ptr accTimeMax, accTimeWgt; + cms::cuda::device::unique_ptr ampMaxAlphaBeta, ampMaxError; + cms::cuda::device::unique_ptr timeMax, timeError; + cms::cuda::device::unique_ptr tcState; + + void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + constexpr auto svlength = getLength(); + constexpr auto sgvlength = getLength(); + constexpr auto smlength = getLength(); + constexpr auto pmlength = getLength(); + constexpr auto bxvlength = getLength(); + auto const size = configParameters.maxNumberHitsEB + configParameters.maxNumberHitsEE; + + auto alloc = [cudaStream](auto& var, uint32_t size) { + using element_type = typename std::remove_reference_t::element_type; + var = cms::cuda::make_device_unique(size, cudaStream); + }; + + alloc(samples, size * svlength); + alloc(gainsNoise, size * sgvlength); + + alloc(noisecov, size * smlength); + alloc(pulse_matrix, size * pmlength); + alloc(activeBXs, size * bxvlength); + alloc(acState, size); + + alloc(hasSwitchToGain6, size); + alloc(hasSwitchToGain1, size); + alloc(isSaturated, size); + + if (configParameters.shouldRunTimingComputation) { + alloc(sample_values, size * svlength); + alloc(sample_value_errors, size * svlength); + alloc(useless_sample_values, size * EcalDataFrame::MAXSAMPLES); + alloc(chi2sNullHypot, size); + alloc(sum0sNullHypot, size); + alloc(sumAAsNullHypot, size); + alloc(pedestal_nums, size); + + alloc(tMaxAlphaBetas, size); + alloc(tMaxErrorAlphaBetas, size); + alloc(accTimeMax, size); + alloc(accTimeWgt, size); + alloc(ampMaxAlphaBeta, size); + alloc(ampMaxError, size); + alloc(timeMax, size); + alloc(timeError, size); + alloc(tcState, size); + } + } + }; + + // const refs products to conditions + struct ConditionsProducts { + EcalPedestalsGPU::Product const& pedestals; + EcalGainRatiosGPU::Product const& gainRatios; + EcalPulseShapesGPU::Product const& pulseShapes; + EcalPulseCovariancesGPU::Product const& pulseCovariances; + EcalSamplesCorrelationGPU::Product const& samplesCorrelation; + EcalTimeBiasCorrectionsGPU::Product const& timeBiasCorrections; + EcalTimeCalibConstantsGPU::Product const& timeCalibConstants; + EcalSampleMask const& sampleMask; + EcalTimeOffsetConstant const& timeOffsetConstant; + uint32_t offsetForHashes; + EcalMultifitParametersGPU::Product const& multifitParameters; + }; + + struct xyz { + int x, y, z; + }; + + struct conf_data { + xyz threads; + bool runV1; + cudaStream_t cuStream; + }; + + } // namespace multifit +} // namespace ecal + +// +// ECAL Rechit producer +// + +namespace ecal { + namespace rechit { + + // parameters that are read in the configuration file for rechit producer + struct ConfigurationParameters { + // device ptrs + int* ChannelStatusToBeExcluded = nullptr; + uint32_t ChannelStatusToBeExcludedSize; + + bool killDeadChannels; + + bool recoverEBIsolatedChannels; + bool recoverEEIsolatedChannels; + bool recoverEBVFE; + bool recoverEEVFE; + bool recoverEBFE; + bool recoverEEFE; + + float EBLaserMIN; + float EELaserMIN; + float EBLaserMAX; + float EELaserMAX; + + int* expanded_v_DB_reco_flags; + uint32_t* expanded_Sizes_v_DB_reco_flags; + uint32_t* expanded_flagbit_v_DB_reco_flags; + uint32_t expanded_v_DB_reco_flagsSize; + + uint32_t flagmask; + uint32_t maxNumberHitsEB; + uint32_t maxNumberHitsEE; + }; + + struct EventOutputDataGPU { + RecHit<::calo::common::DevStoragePolicy> recHitsEB, recHitsEE; + + void allocate(ConfigurationParameters const& configParameters, cudaStream_t cudaStream) { + //---- configParameters -> needed only to decide if to save the timing information or not + auto const sizeEB = configParameters.maxNumberHitsEB; + recHitsEB.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); + recHitsEB.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); + recHitsEB.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEB, cudaStream); + recHitsEB.flagBits = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.extra = cms::cuda::make_device_unique(sizeEB, cudaStream); + recHitsEB.did = cms::cuda::make_device_unique(sizeEB, cudaStream); + + auto const sizeEE = configParameters.maxNumberHitsEE; + recHitsEE.energy = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); + recHitsEE.time = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); + recHitsEE.chi2 = cms::cuda::make_device_unique<::ecal::reco::StorageScalarType[]>(sizeEE, cudaStream); + recHitsEE.flagBits = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.extra = cms::cuda::make_device_unique(sizeEE, cudaStream); + recHitsEE.did = cms::cuda::make_device_unique(sizeEE, cudaStream); + } + }; + + struct EventInputDataGPU { + ecal::UncalibratedRecHit const& ebUncalibRecHits; + ecal::UncalibratedRecHit const& eeUncalibRecHits; + }; + + // const refs products to conditions + struct ConditionsProducts { + EcalRechitADCToGeVConstantGPU::Product const& ADCToGeV; + EcalIntercalibConstantsGPU::Product const& Intercalib; + EcalRechitChannelStatusGPU::Product const& ChannelStatus; + + EcalLaserAPDPNRatiosGPU::Product const& LaserAPDPNRatios; + EcalLaserAPDPNRatiosRefGPU::Product const& LaserAPDPNRatiosRef; + EcalLaserAlphasGPU::Product const& LaserAlphas; + EcalLinearCorrectionsGPU::Product const& LinearCorrections; + + uint32_t offsetForHashes; + }; + + } // namespace rechit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_DeclsForKernels_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalCPURecHitProducer.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalCPURecHitProducer.cc new file mode 100644 index 0000000000000..3de6b62898925 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalCPURecHitProducer.cc @@ -0,0 +1,168 @@ +//#define ECAL_RECO_CUDA_DEBUG + +#ifdef ECAL_RECO_CUDA_DEBUG +#include +#endif + +// framework +#include "FWCore/Framework/interface/stream/EDProducer.h" + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" + +// algorithm specific + +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" + +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" + +class EcalCPURecHitProducer : public edm::stream::EDProducer { +public: + explicit EcalCPURecHitProducer(edm::ParameterSet const& ps); + ~EcalCPURecHitProducer() override = default; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + using InputProduct = cms::cuda::Product>; + edm::EDGetTokenT recHitsInEBToken_, recHitsInEEToken_; + using OutputProduct = ecal::RecHit>; + edm::EDPutTokenT recHitsOutEBToken_, recHitsOutEEToken_; + + OutputProduct recHitsEB_, recHitsEE_; + bool containsTimingInformation_; +}; + +void EcalCPURecHitProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("recHitsInLabelEB", edm::InputTag{"ecalRecHitProducerGPU", "EcalRecHitsEB"}); + desc.add("recHitsInLabelEE", edm::InputTag{"ecalRecHitProducerGPU", "EcalRecHitsEE"}); + desc.add("recHitsOutLabelEB", "EcalRecHitsEB"); + desc.add("recHitsOutLabelEE", "EcalRecHitsEE"); + desc.add("containsTimingInformation", false); + + confDesc.addWithDefaultLabel(desc); +} + +EcalCPURecHitProducer::EcalCPURecHitProducer(const edm::ParameterSet& ps) + : recHitsInEBToken_{consumes(ps.getParameter("recHitsInLabelEB"))}, + recHitsInEEToken_{consumes(ps.getParameter("recHitsInLabelEE"))}, + recHitsOutEBToken_{produces(ps.getParameter("recHitsOutLabelEB"))}, + recHitsOutEEToken_{produces(ps.getParameter("recHitsOutLabelEE"))}, + containsTimingInformation_{ps.getParameter("containsTimingInformation")} {} + +void EcalCPURecHitProducer::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder taskHolder) { + // retrieve data/ctx + auto const& ebRecHitsProduct = event.get(recHitsInEBToken_); + auto const& eeRecHitsProduct = event.get(recHitsInEEToken_); + cms::cuda::ScopedContextAcquire ctx{ebRecHitsProduct, std::move(taskHolder)}; + auto const& ebRecHits = ctx.get(ebRecHitsProduct); + auto const& eeRecHits = ctx.get(eeRecHitsProduct); + + // resize the output buffers + recHitsEB_.resize(ebRecHits.size); + recHitsEE_.resize(eeRecHits.size); + +#ifdef ECAL_RECO_CUDA_DEBUG + std::cout << " [EcalCPURecHitProducer::acquire] ebRecHits.size = " << ebRecHits.size << std::endl; + std::cout << " [EcalCPURecHitProducer::acquire] eeRecHits.size = " << eeRecHits.size << std::endl; +#endif + + // enqeue transfers + cudaCheck(cudaMemcpyAsync(recHitsEB_.did.data(), + ebRecHits.did.get(), + recHitsEB_.did.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + cudaCheck(cudaMemcpyAsync(recHitsEE_.did.data(), + eeRecHits.did.get(), + recHitsEE_.did.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + // + // ./CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h:using StorageScalarType = float; + // + + cudaCheck(cudaMemcpyAsync(recHitsEB_.energy.data(), + ebRecHits.energy.get(), + recHitsEB_.energy.size() * sizeof(::ecal::reco::StorageScalarType), + cudaMemcpyDeviceToHost, + ctx.stream())); + cudaCheck(cudaMemcpyAsync(recHitsEE_.energy.data(), + eeRecHits.energy.get(), + recHitsEE_.energy.size() * sizeof(::ecal::reco::StorageScalarType), + cudaMemcpyDeviceToHost, + ctx.stream())); + + cudaCheck(cudaMemcpyAsync(recHitsEB_.chi2.data(), + ebRecHits.chi2.get(), + recHitsEB_.chi2.size() * sizeof(::ecal::reco::StorageScalarType), + cudaMemcpyDeviceToHost, + ctx.stream())); + cudaCheck(cudaMemcpyAsync(recHitsEE_.chi2.data(), + eeRecHits.chi2.get(), + recHitsEE_.chi2.size() * sizeof(::ecal::reco::StorageScalarType), + cudaMemcpyDeviceToHost, + ctx.stream())); + + cudaCheck(cudaMemcpyAsync(recHitsEB_.extra.data(), + ebRecHits.extra.get(), + recHitsEB_.extra.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + cudaCheck(cudaMemcpyAsync(recHitsEE_.extra.data(), + eeRecHits.extra.get(), + recHitsEE_.extra.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + + cudaCheck(cudaMemcpyAsync(recHitsEB_.flagBits.data(), + ebRecHits.flagBits.get(), + recHitsEB_.flagBits.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + cudaCheck(cudaMemcpyAsync(recHitsEE_.flagBits.data(), + eeRecHits.flagBits.get(), + recHitsEE_.flagBits.size() * sizeof(uint32_t), + cudaMemcpyDeviceToHost, + ctx.stream())); + +#ifdef ECAL_RECO_CUDA_DEBUG + for (unsigned int ieb = 0; ieb < ebRecHits.size; ieb++) { + if (recHitsEB_.extra[ieb] != 0) + std::cout << " [ " << ieb << " :: " << ebRecHits.size << " ] [ " << recHitsEB_.did[ieb] + << " ] eb extra = " << recHitsEB_.extra[ieb] << std::endl; + } + + for (unsigned int ieb = 0; ieb < ebRecHits.size; ieb++) { + if (recHitsEB_.energy[ieb] != 0) + std::cout << " [ " << ieb << " :: " << ebRecHits.size << " ] [ " << recHitsEB_.did[ieb] + << " ] eb energy = " << recHitsEB_.energy[ieb] << std::endl; + } + + for (unsigned int iee = 0; iee < eeRecHits.size; iee++) { + if (recHitsEE_.energy[iee] != 0) + std::cout << " [ " << iee << " :: " << eeRecHits.size << " ] [ " << recHitsEE_.did[iee] + << " ] ee energy = " << recHitsEE_.energy[iee] << std::endl; + } +#endif +} + +void EcalCPURecHitProducer::produce(edm::Event& event, edm::EventSetup const& setup) { + // put into event + event.emplace(recHitsOutEBToken_, std::move(recHitsEB_)); + event.emplace(recHitsOutEEToken_, std::move(recHitsEE_)); +} + +DEFINE_FWK_MODULE(EcalCPURecHitProducer); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalCPUUncalibRecHitProducer.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalCPUUncalibRecHitProducer.cc new file mode 100644 index 0000000000000..801d378c7c391 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalCPUUncalibRecHitProducer.cc @@ -0,0 +1,120 @@ +#include + +// framework +#include "FWCore/Framework/interface/stream/EDProducer.h" + +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" + +// algorithm specific + +#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" + +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" + +class EcalCPUUncalibRecHitProducer : public edm::stream::EDProducer { +public: + explicit EcalCPUUncalibRecHitProducer(edm::ParameterSet const& ps); + ~EcalCPUUncalibRecHitProducer() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + using InputProduct = cms::cuda::Product>; + edm::EDGetTokenT recHitsInEBToken_, recHitsInEEToken_; + using OutputProduct = ecal::UncalibratedRecHit>; + edm::EDPutTokenT recHitsOutEBToken_, recHitsOutEEToken_; + + OutputProduct recHitsEB_, recHitsEE_; + bool containsTimingInformation_; +}; + +void EcalCPUUncalibRecHitProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("recHitsInLabelEB", edm::InputTag{"ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEB"}); + desc.add("recHitsInLabelEE", edm::InputTag{"ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEE"}); + desc.add("recHitsOutLabelEB", "EcalUncalibRecHitsEB"); + desc.add("recHitsOutLabelEE", "EcalUncalibRecHitsEE"); + desc.add("containsTimingInformation", false); + + confDesc.add("ecalCPUUncalibRecHitProducer", desc); +} + +EcalCPUUncalibRecHitProducer::EcalCPUUncalibRecHitProducer(const edm::ParameterSet& ps) + : recHitsInEBToken_{consumes(ps.getParameter("recHitsInLabelEB"))}, + recHitsInEEToken_{consumes(ps.getParameter("recHitsInLabelEE"))}, + recHitsOutEBToken_{produces(ps.getParameter("recHitsOutLabelEB"))}, + recHitsOutEEToken_{produces(ps.getParameter("recHitsOutLabelEE"))}, + containsTimingInformation_{ps.getParameter("containsTimingInformation")} {} + +EcalCPUUncalibRecHitProducer::~EcalCPUUncalibRecHitProducer() {} + +void EcalCPUUncalibRecHitProducer::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder taskHolder) { + // retrieve data/ctx + auto const& ebRecHitsProduct = event.get(recHitsInEBToken_); + auto const& eeRecHitsProduct = event.get(recHitsInEEToken_); + cms::cuda::ScopedContextAcquire ctx{ebRecHitsProduct, std::move(taskHolder)}; + auto const& ebRecHits = ctx.get(ebRecHitsProduct); + auto const& eeRecHits = ctx.get(eeRecHitsProduct); + + // resize the output buffers + recHitsEB_.resize(ebRecHits.size); + recHitsEE_.resize(eeRecHits.size); + + auto lambdaToTransfer = [&ctx](auto& dest, auto* src) { + using vector_type = typename std::remove_reference::type; + using type = typename vector_type::value_type; + using src_type = typename std::remove_pointer::type; + static_assert(std::is_same::value && "dst and src data types do not match"); + cudaCheck(cudaMemcpyAsync(dest.data(), src, dest.size() * sizeof(type), cudaMemcpyDeviceToHost, ctx.stream())); + }; + + // enqeue transfers + lambdaToTransfer(recHitsEB_.did, ebRecHits.did.get()); + lambdaToTransfer(recHitsEE_.did, eeRecHits.did.get()); + + lambdaToTransfer(recHitsEB_.amplitudesAll, ebRecHits.amplitudesAll.get()); + lambdaToTransfer(recHitsEE_.amplitudesAll, eeRecHits.amplitudesAll.get()); + + lambdaToTransfer(recHitsEB_.amplitude, ebRecHits.amplitude.get()); + lambdaToTransfer(recHitsEE_.amplitude, eeRecHits.amplitude.get()); + + lambdaToTransfer(recHitsEB_.chi2, ebRecHits.chi2.get()); + lambdaToTransfer(recHitsEE_.chi2, eeRecHits.chi2.get()); + + lambdaToTransfer(recHitsEB_.pedestal, ebRecHits.pedestal.get()); + lambdaToTransfer(recHitsEE_.pedestal, eeRecHits.pedestal.get()); + + lambdaToTransfer(recHitsEB_.flags, ebRecHits.flags.get()); + lambdaToTransfer(recHitsEE_.flags, eeRecHits.flags.get()); + + if (containsTimingInformation_) { + lambdaToTransfer(recHitsEB_.jitter, ebRecHits.jitter.get()); + lambdaToTransfer(recHitsEE_.jitter, eeRecHits.jitter.get()); + + lambdaToTransfer(recHitsEB_.jitterError, ebRecHits.jitterError.get()); + lambdaToTransfer(recHitsEE_.jitterError, eeRecHits.jitterError.get()); + } +} + +void EcalCPUUncalibRecHitProducer::produce(edm::Event& event, edm::EventSetup const& setup) { + // tmp vectors + auto recHitsOutEB = std::make_unique(std::move(recHitsEB_)); + auto recHitsOutEE = std::make_unique(std::move(recHitsEE_)); + + // put into event + event.put(recHitsOutEBToken_, std::move(recHitsOutEB)); + event.put(recHitsOutEEToken_, std::move(recHitsOutEE)); +} + +DEFINE_FWK_MODULE(EcalCPUUncalibRecHitProducer); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalESProducersGPUDefs.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalESProducersGPUDefs.cc new file mode 100644 index 0000000000000..3118d54c6a7e9 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalESProducersGPUDefs.cc @@ -0,0 +1,88 @@ +#include "CondFormats/DataRecord/interface/EcalADCToGeVConstantRcd.h" +#include "CondFormats/DataRecord/interface/EcalChannelStatusRcd.h" +#include "CondFormats/DataRecord/interface/EcalGainRatiosRcd.h" +#include "CondFormats/DataRecord/interface/EcalIntercalibConstantsRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAPDPNRatiosRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAPDPNRatiosRefRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAlphasRcd.h" +#include "CondFormats/DataRecord/interface/EcalLinearCorrectionsRcd.h" +#include "CondFormats/DataRecord/interface/EcalPedestalsRcd.h" +#include "CondFormats/DataRecord/interface/EcalPulseCovariancesRcd.h" +#include "CondFormats/DataRecord/interface/EcalPulseShapesRcd.h" +#include "CondFormats/DataRecord/interface/EcalSamplesCorrelationRcd.h" +#include "CondFormats/DataRecord/interface/EcalTimeBiasCorrectionsRcd.h" +#include "CondFormats/DataRecord/interface/EcalTimeCalibConstantsRcd.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h" + +using EcalPedestalsGPUESProducer = ConvertingESProducerT; + +using EcalGainRatiosGPUESProducer = ConvertingESProducerT; + +using EcalPulseShapesGPUESProducer = ConvertingESProducerT; + +using EcalPulseCovariancesGPUESProducer = + ConvertingESProducerT; + +using EcalSamplesCorrelationGPUESProducer = + ConvertingESProducerT; + +using EcalTimeBiasCorrectionsGPUESProducer = + ConvertingESProducerT; + +using EcalTimeCalibConstantsGPUESProducer = + ConvertingESProducerT; + +using EcalRechitADCToGeVConstantGPUESProducer = + ConvertingESProducerT; + +using EcalIntercalibConstantsGPUESProducer = + ConvertingESProducerT; + +using EcalRechitChannelStatusGPUESProducer = + ConvertingESProducerT; + +using EcalLaserAPDPNRatiosGPUESProducer = + ConvertingESProducerT; + +using EcalLaserAPDPNRatiosRefGPUESProducer = + ConvertingESProducerT; + +using EcalLaserAlphasGPUESProducer = ConvertingESProducerT; + +using EcalLinearCorrectionsGPUESProducer = + ConvertingESProducerT; + +// +// This below also creates the .py config files, as described in HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h +// + +DEFINE_FWK_EVENTSETUP_MODULE(EcalPedestalsGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalGainRatiosGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalPulseShapesGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalPulseCovariancesGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalSamplesCorrelationGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalTimeBiasCorrectionsGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalTimeCalibConstantsGPUESProducer); + +DEFINE_FWK_EVENTSETUP_MODULE(EcalRechitADCToGeVConstantGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalIntercalibConstantsGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalRechitChannelStatusGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalLaserAPDPNRatiosGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalLaserAPDPNRatiosRefGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalLaserAlphasGPUESProducer); +DEFINE_FWK_EVENTSETUP_MODULE(EcalLinearCorrectionsGPUESProducer); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalMultifitParametersGPUESProducer.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalMultifitParametersGPUESProducer.cc new file mode 100644 index 0000000000000..1743df5aa945d --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalMultifitParametersGPUESProducer.cc @@ -0,0 +1,78 @@ +#include +#include +#include + +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/ESProductHost.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/EventSetupRecordIntervalFinder.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/Framework/interface/SourceFactory.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ReusableObjectHolder.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h" + +class EcalMultifitParametersGPUESProducer : public edm::ESProducer, public edm::EventSetupRecordIntervalFinder { +public: + EcalMultifitParametersGPUESProducer(edm::ParameterSet const&); + ~EcalMultifitParametersGPUESProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions&); + std::unique_ptr produce(JobConfigurationGPURecord const&); + +protected: + void setIntervalFor(const edm::eventsetup::EventSetupRecordKey&, + const edm::IOVSyncValue&, + edm::ValidityInterval&) override; + +private: + edm::ParameterSet const pset_; +}; + +EcalMultifitParametersGPUESProducer::EcalMultifitParametersGPUESProducer(edm::ParameterSet const& pset) : pset_{pset} { + setWhatProduced(this); + findingRecord(); +} + +void EcalMultifitParametersGPUESProducer::setIntervalFor(const edm::eventsetup::EventSetupRecordKey& iKey, + const edm::IOVSyncValue& iTime, + edm::ValidityInterval& oInterval) { + oInterval = edm::ValidityInterval(edm::IOVSyncValue::beginOfTime(), edm::IOVSyncValue::endOfTime()); +} + +void EcalMultifitParametersGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& desc) { + edm::ParameterSetDescription d; + d.add>("pulseOffsets", {-3, -2, -1, 0, 1, 2, 3, 4}); + d.add>("EBtimeFitParameters", + {-2.015452e+00, + 3.130702e+00, + -1.234730e+01, + 4.188921e+01, + -8.283944e+01, + 9.101147e+01, + -5.035761e+01, + 1.105621e+01}); + d.add>("EEtimeFitParameters", + {-2.390548e+00, + 3.553628e+00, + -1.762341e+01, + 6.767538e+01, + -1.332130e+02, + 1.407432e+02, + -7.541106e+01, + 1.620277e+01}); + d.add>("EBamplitudeFitParameters", {1.138, 1.652}); + d.add>("EEamplitudeFitParameters", {1.890, 1.400}); + desc.addWithDefaultLabel(d); +} + +std::unique_ptr EcalMultifitParametersGPUESProducer::produce( + JobConfigurationGPURecord const&) { + return std::make_unique(pset_); +} + +DEFINE_FWK_EVENTSETUP_SOURCE(EcalMultifitParametersGPUESProducer); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.cu b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.cu new file mode 100644 index 0000000000000..6e1b2a66c2507 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.cu @@ -0,0 +1,676 @@ +#include + +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" + +#include "EcalRecHitBuilderKernels.h" +#include "KernelHelpers.h" + +namespace ecal { + namespace rechit { + + // uncalibrecHit flags + enum UncalibRecHitFlags { + kGood = -1, // channel is good (mutually exclusive with other states) setFlagBit(kGood) reset flags_ to zero + kPoorReco, // channel has been badly reconstructed (e.g. bad shape, bad chi2 etc.) + kSaturated, // saturated channel + kOutOfTime, // channel out of time + kLeadingEdgeRecovered, // saturated channel: energy estimated from the leading edge before saturation + kHasSwitchToGain6, // at least one data frame is in G6 + kHasSwitchToGain1 // at least one data frame is in G1 + }; + + // recHit flags + enum RecHitFlags { + RecHitFlags_kGood = 0, // channel ok, the energy and time measurement are reliable + RecHitFlags_kPoorReco, // the energy is available from the UncalibRecHit, but approximate (bad shape, large chi2) + RecHitFlags_kOutOfTime, // the energy is available from the UncalibRecHit (sync reco), but the event is out of time + RecHitFlags_kFaultyHardware, // The energy is available from the UncalibRecHit, channel is faulty at some hardware level (e.g. noisy) + RecHitFlags_kNoisy, // the channel is very noisy + RecHitFlags_kPoorCalib, // the energy is available from the UncalibRecHit, but the calibration of the channel is poor + RecHitFlags_kSaturated, // saturated channel (recovery not tried) + RecHitFlags_kLeadingEdgeRecovered, // saturated channel: energy estimated from the leading edge before saturation + RecHitFlags_kNeighboursRecovered, // saturated/isolated dead: energy estimated from neighbours + RecHitFlags_kTowerRecovered, // channel in TT with no data link, info retrieved from Trigger Primitive + RecHitFlags_kDead, // channel is dead and any recovery fails + RecHitFlags_kKilled, // MC only flag: the channel is killed in the real detector + RecHitFlags_kTPSaturated, // the channel is in a region with saturated TP + RecHitFlags_kL1SpikeFlag, // the channel is in a region with TP with sFGVB = 0 + RecHitFlags_kWeird, // the signal is believed to originate from an anomalous deposit (spike) + RecHitFlags_kDiWeird, // the signal is anomalous, and neighbors another anomalous signal + RecHitFlags_kHasSwitchToGain6, // at least one data frame is in G6 + RecHitFlags_kHasSwitchToGain1, // at least one data frame is in G1 + // + RecHitFlags_kUnknown // to ease the interface with functions returning flags. + }; + + // status code + enum EcalChannelStatusCode_Code { + kOk = 0, + kDAC, + kNoLaser, + kNoisy, + kNNoisy, + kNNNoisy, + kNNNNoisy, + kNNNNNoisy, + kFixedG6, + kFixedG1, + kFixedG0, + kNonRespondingIsolated, + kDeadVFE, + kDeadFE, + kNoDataNoTP + }; + + __global__ void kernel_create_ecal_rehit( + // configuration + int const* ChannelStatusToBeExcluded, + uint32_t ChannelStatusToBeExcludedSize, + bool const killDeadChannels, + bool const recoverEBIsolatedChannels, + bool const recoverEEIsolatedChannels, + bool const recoverEBVFE, + bool const recoverEEVFE, + bool const recoverEBFE, + bool const recoverEEFE, + float const EBLaserMIN, + float const EELaserMIN, + float const EBLaserMAX, + float const EELaserMAX, + // for flags setting + int const* expanded_v_DB_reco_flags, // FIXME AM: to be checked + uint32_t const* expanded_Sizes_v_DB_reco_flags, + uint32_t const* expanded_flagbit_v_DB_reco_flags, + uint32_t expanded_v_DB_reco_flagsSize, + uint32_t flagmask, + // conditions + float const* adc2gev, + float const* intercalib, + uint16_t const* status, + float const* apdpnrefs, + float const* alphas, + // input for transparency corrections + float const* p1, + float const* p2, + float const* p3, + edm::TimeValue_t const* t1, + edm::TimeValue_t const* t2, + edm::TimeValue_t const* t3, + // input for linear corrections + float const* lp1, + float const* lp2, + float const* lp3, + edm::TimeValue_t const* lt1, + edm::TimeValue_t const* lt2, + edm::TimeValue_t const* lt3, + // time, used for time dependent corrections + edm::TimeValue_t const event_time, + // input + uint32_t const* did_eb, + uint32_t const* did_ee, + ::ecal::reco::StorageScalarType const* amplitude_eb, // in adc counts + ::ecal::reco::StorageScalarType const* amplitude_ee, // in adc counts + ::ecal::reco::StorageScalarType const* time_eb, + ::ecal::reco::StorageScalarType const* time_ee, + ::ecal::reco::StorageScalarType const* chi2_eb, + ::ecal::reco::StorageScalarType const* chi2_ee, + uint32_t const* flags_eb, + uint32_t const* flags_ee, + // output + uint32_t* didEB, + uint32_t* didEE, + ::ecal::reco::StorageScalarType* energyEB, // in energy [GeV] + ::ecal::reco::StorageScalarType* energyEE, // in energy [GeV] + ::ecal::reco::StorageScalarType* timeEB, + ::ecal::reco::StorageScalarType* timeEE, + ::ecal::reco::StorageScalarType* chi2EB, + ::ecal::reco::StorageScalarType* chi2EE, + uint32_t* flagBitsEB, + uint32_t* flagBitsEE, + uint32_t* extraEB, + uint32_t* extraEE, + // other + int const nchannels, + uint32_t const nChannelsBarrel, + uint32_t const offsetForHashes) { + // + // NB: energy "type_wrapper::type" most likely std::vector + // + + for (int ch = threadIdx.x + blockDim.x * blockIdx.x; ch < nchannels; ch += blockDim.x * gridDim.x) { + bool isEndcap = (ch >= nChannelsBarrel); + + int const inputCh = isEndcap ? ch - nChannelsBarrel : ch; + + uint32_t const* didCh = isEndcap ? did_ee : did_eb; + + // arrange to access the right ptrs +#define ARRANGE(var) auto* var = isEndcap ? var##EE : var##EB + ARRANGE(did); + ARRANGE(energy); + ARRANGE(chi2); + ARRANGE(flagBits); + ARRANGE(extra); +#undef ARRANGE + + // only two values, EB or EE + // AM : FIXME : why not using "isBarrel" ? isBarrel ? adc2gev[0] : adc2gev[1] + float adc2gev_to_use = isEndcap ? adc2gev[1] // ee + : adc2gev[0]; // eb + + // first EB and then EE + + ::ecal::reco::StorageScalarType const* amplitude = isEndcap ? amplitude_ee : amplitude_eb; + + ::ecal::reco::StorageScalarType const* chi2_in = isEndcap ? chi2_ee : chi2_eb; + + uint32_t const* flags_in = isEndcap ? flags_ee : flags_eb; + + // simple copy + did[inputCh] = didCh[inputCh]; + + auto const did_to_use = DetId{didCh[inputCh]}; + + auto const isBarrel = did_to_use.subdetId() == EcalBarrel; + auto const hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did_to_use.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did_to_use.rawId()); + + float const intercalib_to_use = intercalib[hashedId]; + + // get laser coefficient + float lasercalib = 1.; + + // + // AM: ideas + // + // One possibility is to create the map of laser corrections once on CPU + // for all crystals and push them on GPU. + // Then only if the LS is different, update the laser correction + // The variation within a LS is not worth pursuing (<< 0.1% !!) + // and below the precision we can claim on the laser corrections (right?). + // This will save quite some time (also for the CPU version?) + // + + int iLM = 1; + + if (isBarrel) { + iLM = ecal::reconstruction::laser_monitoring_region_EB(did_to_use.rawId()); + } else { + iLM = ecal::reconstruction::laser_monitoring_region_EE(did_to_use.rawId()); + } + + long long t_i = 0, t_f = 0; + float p_i = 0, p_f = 0; + long long lt_i = 0, lt_f = 0; + float lp_i = 0, lp_f = 0; + + // laser + if (event_time >= t1[iLM - 1] && event_time < t2[iLM - 1]) { + t_i = t1[iLM - 1]; + t_f = t2[iLM - 1]; + p_i = p1[hashedId]; + p_f = p2[hashedId]; + } else if (event_time >= t2[iLM - 1] && event_time <= t3[iLM - 1]) { + t_i = t2[iLM - 1]; + t_f = t3[iLM - 1]; + p_i = p2[hashedId]; + p_f = p3[hashedId]; + } else if (event_time < t1[iLM - 1]) { + t_i = t1[iLM - 1]; + t_f = t2[iLM - 1]; + p_i = p1[hashedId]; + p_f = p2[hashedId]; + + } else if (event_time > t3[iLM - 1]) { + t_i = t2[iLM - 1]; + t_f = t3[iLM - 1]; + p_i = p2[hashedId]; + p_f = p3[hashedId]; + } + + // linear corrections + if (event_time >= lt1[iLM - 1] && event_time < lt2[iLM - 1]) { + lt_i = lt1[iLM - 1]; + lt_f = lt2[iLM - 1]; + lp_i = lp1[hashedId]; + lp_f = lp2[hashedId]; + } else if (event_time >= lt2[iLM - 1] && event_time <= lt3[iLM - 1]) { + lt_i = lt2[iLM - 1]; + lt_f = lt3[iLM - 1]; + lp_i = lp2[hashedId]; + lp_f = lp3[hashedId]; + } else if (event_time < lt1[iLM - 1]) { + lt_i = lt1[iLM - 1]; + lt_f = lt2[iLM - 1]; + lp_i = lp1[hashedId]; + lp_f = lp2[hashedId]; + + } else if (event_time > lt3[iLM - 1]) { + lt_i = lt2[iLM - 1]; + lt_f = lt3[iLM - 1]; + lp_i = lp2[hashedId]; + lp_f = lp3[hashedId]; + } + + // apdpnref and alpha + float apdpnref = apdpnrefs[hashedId]; + float alpha = alphas[hashedId]; + + // now calculate transparency correction + if (apdpnref != 0 && (t_i - t_f) != 0 && (lt_i - lt_f) != 0) { + long long tt = event_time; // never subtract two unsigned! + float interpolatedLaserResponse = + p_i / apdpnref + float(tt - t_i) * (p_f - p_i) / (apdpnref * float(t_f - t_i)); + + float interpolatedLinearResponse = + lp_i / apdpnref + float(tt - lt_i) * (lp_f - lp_i) / (apdpnref * float(lt_f - lt_i)); // FIXED BY FC + + if (interpolatedLinearResponse > 2.f || interpolatedLinearResponse < 0.1f) { + interpolatedLinearResponse = 1.f; + } + if (interpolatedLaserResponse <= 0.) { + // AM : how the heck is it possible? + // interpolatedLaserResponse = 0.0001; + lasercalib = 1.; + + } else { + float interpolatedTransparencyResponse = interpolatedLaserResponse / interpolatedLinearResponse; + + // ... and now this: + lasercalib = 1.f / (std::pow(interpolatedTransparencyResponse, alpha) * interpolatedLinearResponse); + } + } + + // + // Check for channels to be excluded from reconstruction + // + // + // Default energy? Not to be updated if "ChannelStatusToBeExcluded" + // Exploited later by the module "EcalRecHitConvertGPU2CPUFormat" + // + energy[inputCh] = -1; //---- AM: default, un-physical, ok + + // truncate the chi2 + if (chi2_in[inputCh] > 64) + chi2[inputCh] = 64; + else + chi2[inputCh] = chi2_in[inputCh]; + + // default values for the flags + flagBits[inputCh] = 0; + extra[inputCh] = 0; + + static const int chStatusMask = 0x1f; + // ChannelStatusToBeExcluded is a "int" then I put "dbstatus" to be the same + int dbstatus = EcalChannelStatusCode_Code((status[hashedId]) & chStatusMask); + if (ChannelStatusToBeExcludedSize != 0) { + bool skip_this_channel = false; + for (int ich_to_check = 0; ich_to_check < ChannelStatusToBeExcludedSize; ich_to_check++) { + if (ChannelStatusToBeExcluded[ich_to_check] == dbstatus) { + skip_this_channel = true; + break; + } + } + if (skip_this_channel) { + // skip this channel + continue; + } + } + + // Take our association map of dbstatuses-> recHit flagbits and return the apporpriate flagbit word + + // + // AM: get the smaller "flagbit_counter" with match + // + + uint32_t temporary_flagBits = 0; + + int iterator_flags = 0; + bool need_to_exit = false; + int flagbit_counter = 0; + while (!need_to_exit) { + iterator_flags = 0; + for (unsigned int i = 0; i != expanded_v_DB_reco_flagsSize; ++i) { + // check the correct "flagbit" + if (expanded_flagbit_v_DB_reco_flags[i] == flagbit_counter) { + for (unsigned int j = 0; j < expanded_Sizes_v_DB_reco_flags[i]; j++) { + if (expanded_v_DB_reco_flags[iterator_flags] == dbstatus) { + temporary_flagBits = 0x1 << expanded_flagbit_v_DB_reco_flags[i]; + need_to_exit = true; + break; // also from the big loop!!! + } + iterator_flags++; + } + } else { + // if not, got to the next bunch directly + iterator_flags += expanded_Sizes_v_DB_reco_flags[i]; + } + + if (need_to_exit) { + break; + } + } + flagbit_counter += 1; + } + + flagBits[inputCh] = temporary_flagBits; + + if ((flagmask & temporary_flagBits) && killDeadChannels) { + // skip this channel + continue; + } + + // + // multiply the adc counts with factors to get GeV + // + + // energy[ch] = amplitude[inputCh] * adc2gev_to_use * intercalib_to_use ; + energy[inputCh] = amplitude[inputCh] * adc2gev_to_use * intercalib_to_use * lasercalib; + + // Time is not saved so far, FIXME + // time[ch] = time_in[inputCh]; + + // NB: calculate the "flagBits extra" --> not really "flags", but actually an encoded version of energy uncertainty, time unc., ... + + // + // extra packing ... + // + + uint32_t offset; + uint32_t width; + uint32_t value; + + float chi2_temp = chi2[inputCh]; + if (chi2_temp > 64) + chi2_temp = 64; + // use 7 bits + uint32_t rawChi2 = lround(chi2_temp / 64. * ((1 << 7) - 1)); + + offset = 0; + width = 7; + value = 0; + + uint32_t mask = ((1 << width) - 1) << offset; + value &= ~mask; + value |= (rawChi2 & ((1U << width) - 1)) << offset; + + // rawEnergy is actually "error" !!! + uint32_t rawEnergy = 0; + + // AM: FIXME: this is not propagated currently to the uncalibrecHit collection SOA + // if you want to store this in "extra", we need first to add it to the uncalibrecHit results + // then it will be something like the following + // amplitudeError[inputCh] * adc2gev_to_use * intercalib_to_use * lasercalib + // + // + + float amplitudeError_ch = 0.; // amplitudeError[ch]; + + if (amplitudeError_ch > 0.001) { + static constexpr float p10[] = {1.e-2f, 1.e-1f, 1.f, 1.e1f, 1.e2f, 1.e3f, 1.e4f, 1.e5f, 1.e6f}; + int b = amplitudeError_ch < p10[4] ? 0 : 5; + for (; b < 9; ++b) + if (amplitudeError_ch < p10[b]) + break; + + uint16_t exponent = b; + + static constexpr float ip10[] = {1.e5f, 1.e4f, 1.e3f, 1.e2f, 1.e1f, 1.e0f, 1.e-1f, 1.e-2f, 1.e-3f, 1.e-4}; + uint16_t significand = lround(amplitudeError_ch * ip10[exponent]); + // use 13 bits (3 exponent, 10 significand) + rawEnergy = exponent << 10 | significand; + } + + offset = 8; + width = 13; + // value from last change, ok + + mask = ((1 << width) - 1) << offset; + value &= ~mask; + value |= (rawEnergy & ((1U << width) - 1)) << offset; + + uint32_t jitterErrorBits = 0; + jitterErrorBits = jitterErrorBits & 0xFF; + + offset = 24; + width = 8; + // value from last change, ok + + mask = ((1 << width) - 1) << offset; + value &= ~mask; + value |= (jitterErrorBits & ((1U << width) - 1)) << offset; + + // + // now finally set "extra[ch]" + // + extra[inputCh] = value; + + // + // additional flags setting + // + // using correctly the flags as calculated at the UncalibRecHit stage + // + // Now fill flags + + bool good = true; + + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kLeadingEdgeRecovered))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kLeadingEdgeRecovered)); + good = false; + } + + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kSaturated))) { + // leading edge recovery failed - still keep the information + // about the saturation and do not flag as dead + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kSaturated)); + good = false; + } + + // + // AM: why do we have two tests one after the other checking almost the same thing??? + // Please clean up the code, ... also the original one! + // + // uncalibRH.isSaturated() ---> + // + // bool EcalUncalibratedRecHit::isSaturated() const { + // return EcalUncalibratedRecHit::checkFlag(kSaturated); + // } + // + // + + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kSaturated))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kSaturated)); + good = false; + } + + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kOutOfTime))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kOutOfTime)); + good = false; + } + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kPoorReco))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kPoorReco)); + good = false; + } + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kHasSwitchToGain6))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kHasSwitchToGain6)); + } + if (flags_in[inputCh] & (0x1 << (UncalibRecHitFlags::kHasSwitchToGain1))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kHasSwitchToGain1)); + } + + if (good) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kGood)); + } + + if ((isBarrel && (lasercalib < EBLaserMIN || lasercalib > EBLaserMAX)) || + (!isBarrel && (lasercalib < EELaserMIN || lasercalib > EELaserMAX))) { + flagBits[inputCh] |= (0x1 << (RecHitFlags::RecHitFlags_kPoorCalib)); + } + + // recover, killing, and other stuff + + // + // Structure: + // EB + // EE + // + // + // - single MVA + // - democratic sharing + // - kill all the other cases + // + + bool is_Single = false; + bool is_FE = false; + bool is_VFE = false; + + bool is_recoverable = false; // DetIdToBeRecovered + + if (dbstatus == 10 || dbstatus == 11 || dbstatus == 12) { + is_recoverable = true; + } + + if (is_recoverable) { + if (dbstatus == EcalChannelStatusCode_Code::kDeadVFE) { + is_VFE = true; + } else if (dbstatus == EcalChannelStatusCode_Code::kDeadVFE) { + is_FE = true; + } else { + is_Single = true; + } + + // EB + if (isBarrel) { + if (is_Single || is_FE || is_VFE) { + // single MVA + if (is_Single && (recoverEBIsolatedChannels || !killDeadChannels)) { + } + // decmocratic sharing + else if (is_FE && (recoverEBFE || !killDeadChannels)) { + } + // kill all the other cases + else { + energy[inputCh] = 0.; // Need to set also the flags ... + } + } + } + // EE + else { + if (is_Single || is_FE || is_VFE) { + // single MVA + if (is_Single && (recoverEBIsolatedChannels || !killDeadChannels)) { + } + // decmocratic sharing + else if (is_FE && (recoverEBFE || !killDeadChannels)) { + // + // Code is definitely too long ... + // + + } + // kill all the other cases + else { + energy[inputCh] = 0.; // Need to set also the flags ... + } + } + } + } + + } // end channel + } + + // host version, to be called by the plugin + void create_ecal_rehit(EventInputDataGPU const& eventInputGPU, + EventOutputDataGPU& eventOutputGPU, + // eventDataForScratchGPU_, + ConditionsProducts const& conditions, + ConfigurationParameters const& configParameters, + uint32_t const nChannelsBarrel, + edm::TimeValue_t const event_time, + cudaStream_t cudaStream) { + int nchannels = eventInputGPU.ebUncalibRecHits.size + eventInputGPU.eeUncalibRecHits.size; + + unsigned int nchannels_per_block = 16; + unsigned int threads_min = nchannels_per_block; + unsigned int blocks_min = (nchannels + threads_min - 1) / threads_min; // TEST : to be optimized (AM) + + // + // kernel create rechit + // + + kernel_create_ecal_rehit<<>>( + // configuration + configParameters.ChannelStatusToBeExcluded, + configParameters.ChannelStatusToBeExcludedSize, + configParameters.killDeadChannels, + configParameters.recoverEBIsolatedChannels, + configParameters.recoverEEIsolatedChannels, + configParameters.recoverEBVFE, + configParameters.recoverEEVFE, + configParameters.recoverEBFE, + configParameters.recoverEEFE, + configParameters.EBLaserMIN, + configParameters.EELaserMIN, + configParameters.EBLaserMAX, + configParameters.EELaserMAX, + // for flags setting + configParameters.expanded_v_DB_reco_flags, + configParameters.expanded_Sizes_v_DB_reco_flags, + configParameters.expanded_flagbit_v_DB_reco_flags, + configParameters.expanded_v_DB_reco_flagsSize, + configParameters.flagmask, + // conditions + conditions.ADCToGeV.adc2gev, + conditions.Intercalib.values, + conditions.ChannelStatus.status, + conditions.LaserAPDPNRatiosRef.values, + conditions.LaserAlphas.values, + // input for transparency corrections + conditions.LaserAPDPNRatios.p1, + conditions.LaserAPDPNRatios.p2, + conditions.LaserAPDPNRatios.p3, + conditions.LaserAPDPNRatios.t1, + conditions.LaserAPDPNRatios.t2, + conditions.LaserAPDPNRatios.t3, + // input for linear corrections + conditions.LinearCorrections.p1, + conditions.LinearCorrections.p2, + conditions.LinearCorrections.p3, + conditions.LinearCorrections.t1, + conditions.LinearCorrections.t2, + conditions.LinearCorrections.t3, + // time, used for time dependent corrections + event_time, + // input + eventInputGPU.ebUncalibRecHits.did.get(), + eventInputGPU.eeUncalibRecHits.did.get(), + eventInputGPU.ebUncalibRecHits.amplitude.get(), + eventInputGPU.eeUncalibRecHits.amplitude.get(), + eventInputGPU.ebUncalibRecHits.jitter.get(), + eventInputGPU.eeUncalibRecHits.jitter.get(), + eventInputGPU.ebUncalibRecHits.chi2.get(), + eventInputGPU.eeUncalibRecHits.chi2.get(), + eventInputGPU.ebUncalibRecHits.flags.get(), + eventInputGPU.eeUncalibRecHits.flags.get(), + // output + eventOutputGPU.recHitsEB.did.get(), + eventOutputGPU.recHitsEE.did.get(), + eventOutputGPU.recHitsEB.energy.get(), + eventOutputGPU.recHitsEE.energy.get(), + eventOutputGPU.recHitsEB.time.get(), + eventOutputGPU.recHitsEE.time.get(), + eventOutputGPU.recHitsEB.chi2.get(), + eventOutputGPU.recHitsEE.chi2.get(), + eventOutputGPU.recHitsEB.flagBits.get(), + eventOutputGPU.recHitsEE.flagBits.get(), + eventOutputGPU.recHitsEB.extra.get(), + eventOutputGPU.recHitsEE.extra.get(), + // other + nchannels, + nChannelsBarrel, + conditions.offsetForHashes); + } + + } // namespace rechit + +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.h new file mode 100644 index 0000000000000..cb9c7f435d7b3 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitBuilderKernels.h @@ -0,0 +1,93 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_EcalRecHitBuilderKernels_h +#define RecoLocalCalo_EcalRecProducers_plugins_EcalRecHitBuilderKernels_h + +// +// Builder of ECAL RecHits on GPU +// + +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/Provenance/interface/Timestamp.h" + +#include "Common.h" +#include "DeclsForKernels.h" + +namespace ecal { + namespace rechit { + + __global__ void kernel_create_ecal_rehit( + // configuration + int const* ChannelStatusToBeExcluded, + uint32_t ChannelStatusToBeExcludedSize, + bool killDeadChannels, + bool const recoverEBIsolatedChannels, + bool const recoverEEIsolatedChannels, + bool const recoverEBVFE, + bool const recoverEEVFE, + bool const recoverEBFE, + bool const recoverEEFE, + // for flags setting + int const* expanded_v_DB_reco_flags, + uint32_t const* expanded_Sizes_v_DB_reco_flags, + uint32_t const* expanded_flagbit_v_DB_reco_flags, + uint32_t expanded_v_DB_reco_flagsSize, + uint32_t flagmask, + // conditions + float const* adc2gev, + float const* intercalib, + uint16_t const* status, + float const* apdpnrefs, + float const* alphas, + // input for transparency corrections + float const* p1, + float const* p2, + float const* p3, + edm::TimeValue_t const* t1, + edm::TimeValue_t const* t2, + edm::TimeValue_t const* t3, + // input for linear corrections + float const* lp1, + float const* lp2, + float const* lp3, + edm::TimeValue_t const* lt1, + edm::TimeValue_t const* lt2, + edm::TimeValue_t const* lt3, + // time, used for time dependent corrections + edm::TimeValue_t const event_time, + // input + uint32_t const* did_eb, + uint32_t const* did_ee, + ::ecal::reco::StorageScalarType const* amplitude_eb, // in adc counts + ::ecal::reco::StorageScalarType const* amplitude_ee, // in adc counts + ::ecal::reco::StorageScalarType const* time_eb, + ::ecal::reco::StorageScalarType const* time_ee, + ::ecal::reco::StorageScalarType const* chi2_eb, + ::ecal::reco::StorageScalarType const* chi2_ee, + uint32_t const* flags_eb, + uint32_t const* flags_ee, + // output + uint32_t* did, + ::ecal::reco::StorageScalarType* energy, // in energy [GeV] + ::ecal::reco::StorageScalarType* time, + ::ecal::reco::StorageScalarType* chi2, + uint32_t* flagBits, + uint32_t* extra, + int const nchannels, + uint32_t const nChannelsBarrel, + uint32_t const offsetForHashes); + + // host version, to be called by the plugin + + void create_ecal_rehit(EventInputDataGPU const& eventInputGPU, + EventOutputDataGPU& eventOutputGPU, + ConditionsProducts const& conditions, + ConfigurationParameters const& configParameters, + uint32_t const nChannelsBarrel, + edm::TimeValue_t const event_time, + cudaStream_t cudaStream); + + } // namespace rechit + +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_EcalRecHitBuilderKernels_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitConvertGPU2CPUFormat.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitConvertGPU2CPUFormat.cc new file mode 100644 index 0000000000000..6df36f4a8b592 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitConvertGPU2CPUFormat.cc @@ -0,0 +1,98 @@ +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "Common.h" + +class EcalRecHitConvertGPU2CPUFormat : public edm::stream::EDProducer<> { +public: + explicit EcalRecHitConvertGPU2CPUFormat(edm::ParameterSet const& ps); + ~EcalRecHitConvertGPU2CPUFormat() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + using InputProduct = ecal::RecHit>; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + const edm::EDGetTokenT recHitsGPUEB_; + const edm::EDGetTokenT recHitsGPUEE_; + + const std::string recHitsLabelCPUEB_, recHitsLabelCPUEE_; +}; + +void EcalRecHitConvertGPU2CPUFormat::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("recHitsLabelGPUEB", edm::InputTag("ecalRecHitProducerGPU", "EcalRecHitsGPUEB")); + desc.add("recHitsLabelGPUEE", edm::InputTag("ecalRecHitProducerGPU", "EcalRecHitsGPUEE")); + + desc.add("recHitsLabelCPUEB", "EcalRecHitsEB"); + desc.add("recHitsLabelCPUEE", "EcalRecHitsEE"); + + confDesc.addWithDefaultLabel(desc); +} + +EcalRecHitConvertGPU2CPUFormat::EcalRecHitConvertGPU2CPUFormat(const edm::ParameterSet& ps) + : recHitsGPUEB_{consumes(ps.getParameter("recHitsLabelGPUEB"))}, + recHitsGPUEE_{consumes(ps.getParameter("recHitsLabelGPUEE"))}, + recHitsLabelCPUEB_{ps.getParameter("recHitsLabelCPUEB")}, + recHitsLabelCPUEE_{ps.getParameter("recHitsLabelCPUEE")} { + produces(recHitsLabelCPUEB_); + produces(recHitsLabelCPUEE_); +} + +EcalRecHitConvertGPU2CPUFormat::~EcalRecHitConvertGPU2CPUFormat() {} + +void EcalRecHitConvertGPU2CPUFormat::produce(edm::Event& event, edm::EventSetup const& setup) { + auto const& hRecHitsGPUEB = event.get(recHitsGPUEB_); + auto const& hRecHitsGPUEE = event.get(recHitsGPUEE_); + + auto recHitsCPUEB = std::make_unique(); + auto recHitsCPUEE = std::make_unique(); + recHitsCPUEB->reserve(hRecHitsGPUEB.energy.size()); + recHitsCPUEE->reserve(hRecHitsGPUEE.energy.size()); + + for (uint32_t i = 0; i < hRecHitsGPUEB.energy.size(); ++i) { + // + // Save only if energy is >= 0 ! + // This is extremely important because the channels that were supposed + // to be excluded get "-1" as energy + // + + if (hRecHitsGPUEB.energy[i] >= 0) { + recHitsCPUEB->emplace_back(DetId{hRecHitsGPUEB.did[i]}, + hRecHitsGPUEB.energy[i], + hRecHitsGPUEB.time[i], + hRecHitsGPUEB.extra[i], + hRecHitsGPUEB.flagBits[i]); + } + } + + for (uint32_t i = 0; i < hRecHitsGPUEE.energy.size(); ++i) { + // + // Save only if energy is >= 0 ! + // This is extremely important because the channels that were supposed + // to be excluded get "-1" as energy + // + + if (hRecHitsGPUEE.energy[i] >= 0) { + recHitsCPUEE->emplace_back(DetId{hRecHitsGPUEE.did[i]}, + hRecHitsGPUEE.energy[i], + hRecHitsGPUEE.time[i], + hRecHitsGPUEE.extra[i], + hRecHitsGPUEE.flagBits[i]); + } + } + + event.put(std::move(recHitsCPUEB), recHitsLabelCPUEB_); + event.put(std::move(recHitsCPUEE), recHitsLabelCPUEE_); +} + +DEFINE_FWK_MODULE(EcalRecHitConvertGPU2CPUFormat); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitParametersGPUESProducer.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitParametersGPUESProducer.cc new file mode 100644 index 0000000000000..a63ed42cb2b70 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitParametersGPUESProducer.cc @@ -0,0 +1,83 @@ +#include +#include +#include + +#include "FWCore/Framework/interface/ESProducer.h" +#include "FWCore/Framework/interface/ESProductHost.h" +#include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/EventSetupRecordIntervalFinder.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/ModuleFactory.h" +#include "FWCore/Framework/interface/SourceFactory.h" +#include "FWCore/Framework/interface/eventsetuprecord_registration_macro.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/ReusableObjectHolder.h" +#include "FWCore/Utilities/interface/typelookup.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h" + +class EcalRecHitParametersGPUESProducer : public edm::ESProducer, public edm::EventSetupRecordIntervalFinder { +public: + EcalRecHitParametersGPUESProducer(edm::ParameterSet const&); + ~EcalRecHitParametersGPUESProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions&); + std::unique_ptr produce(JobConfigurationGPURecord const&); + +protected: + void setIntervalFor(const edm::eventsetup::EventSetupRecordKey&, + const edm::IOVSyncValue&, + edm::ValidityInterval&) override; + +private: + edm::ParameterSet const pset_; +}; + +EcalRecHitParametersGPUESProducer::EcalRecHitParametersGPUESProducer(edm::ParameterSet const& pset) : pset_{pset} { + setWhatProduced(this); + findingRecord(); +} + +void EcalRecHitParametersGPUESProducer::setIntervalFor(const edm::eventsetup::EventSetupRecordKey& iKey, + const edm::IOVSyncValue& iTime, + edm::ValidityInterval& oInterval) { + oInterval = edm::ValidityInterval(edm::IOVSyncValue::beginOfTime(), edm::IOVSyncValue::endOfTime()); +} + +void EcalRecHitParametersGPUESProducer::fillDescriptions(edm::ConfigurationDescriptions& desc) { + edm::ParameterSetDescription d; + + //---- db statuses to be exluded from reconstruction + d.add>("ChannelStatusToBeExcluded", + { + "kDAC", + "kNoisy", + "kNNoisy", + "kFixedG6", + "kFixedG1", + "kFixedG0", + "kNonRespondingIsolated", + "kDeadVFE", + "kDeadFE", + "kNoDataNoTP", + }); + + // reco flags association to DB flag + edm::ParameterSetDescription desc_list_flagsMapDBReco; + desc_list_flagsMapDBReco.add>("kGood", {"kOk", "kDAC", "kNoLaser", "kNoisy"}); + desc_list_flagsMapDBReco.add>("kNoisy", {"kNNoisy", "kFixedG6", "kFixedG1"}); + desc_list_flagsMapDBReco.add>("kNeighboursRecovered", + {"kFixedG0", "kNonRespondingIsolated", "kDeadVFE"}); + desc_list_flagsMapDBReco.add>("kTowerRecovered", {"kDeadFE"}); + desc_list_flagsMapDBReco.add>("kDead", {"kNoDataNoTP"}); + + d.add("flagsMapDBReco", desc_list_flagsMapDBReco); + + desc.addWithDefaultLabel(d); +} + +std::unique_ptr EcalRecHitParametersGPUESProducer::produce(JobConfigurationGPURecord const&) { + return std::make_unique(pset_); +} + +DEFINE_FWK_EVENTSETUP_SOURCE(EcalRecHitParametersGPUESProducer); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc new file mode 100644 index 0000000000000..a6dabd37f8439 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalRecHitProducerGPU.cc @@ -0,0 +1,244 @@ +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h" +#include "CommonTools/Utils/interface/StringToEnumValue.h" +#include "CondFormats/DataRecord/interface/EcalADCToGeVConstantRcd.h" +#include "CondFormats/DataRecord/interface/EcalChannelStatusRcd.h" +#include "CondFormats/DataRecord/interface/EcalIntercalibConstantsRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAPDPNRatiosRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAPDPNRatiosRefRcd.h" +#include "CondFormats/DataRecord/interface/EcalLaserAlphasRcd.h" +#include "CondFormats/DataRecord/interface/EcalLinearCorrectionsRcd.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHit.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalIntercalibConstantsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAPDPNRatiosRefGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLaserAlphasGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalLinearCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRecHitParametersGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitADCToGeVConstantGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalRechitChannelStatusGPU.h" + +#include "EcalRecHitBuilderKernels.h" + +class EcalRecHitProducerGPU : public edm::stream::EDProducer { +public: + explicit EcalRecHitProducerGPU(edm::ParameterSet const& ps); + ~EcalRecHitProducerGPU() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + // data + uint32_t neb_, nee_; // extremely important, in particular neb_ + + // gpu input + using InputProduct = cms::cuda::Product>; + edm::EDGetTokenT uncalibRecHitsInEBToken_; + edm::EDGetTokenT uncalibRecHitsInEEToken_; + + // event data + ecal::rechit::EventOutputDataGPU eventOutputDataGPU_; + + cms::cuda::ContextState cudaState_; + + // gpu output + using OutputProduct = cms::cuda::Product>; + edm::EDPutTokenT recHitsTokenEB_, recHitsTokenEE_; + + // configuration parameters + ecal::rechit::ConfigurationParameters configParameters_; + + // conditions handles + edm::ESHandle ADCToGeVConstantHandle_; + edm::ESHandle IntercalibConstantsHandle_; + edm::ESHandle ChannelStatusHandle_; + + edm::ESHandle LaserAPDPNRatiosHandle_; + edm::ESHandle LaserAPDPNRatiosRefHandle_; + edm::ESHandle LaserAlphasHandle_; + edm::ESHandle LinearCorrectionsHandle_; + edm::ESHandle recHitParametersHandle_; + + // Associate reco flagbit (outer vector) to many db status flags (inner vector) + std::vector + expanded_v_DB_reco_flags_; // Transform a map in a vector // FIXME AM: int or uint32 to be checked + std::vector expanded_Sizes_v_DB_reco_flags_; // Saving the size for each piece + std::vector expanded_flagbit_v_DB_reco_flags_; // And the "key" for each key + + uint32_t flagmask_; // do not propagate channels with these flags on +}; + +void EcalRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("uncalibrecHitsInLabelEB", + edm::InputTag("ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEB")); + desc.add("uncalibrecHitsInLabelEE", + edm::InputTag("ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEE")); + + desc.add("recHitsLabelEB", "EcalRecHitsGPUEB"); + desc.add("recHitsLabelEE", "EcalRecHitsGPUEE"); + + desc.add("killDeadChannels", true); + + desc.add("EBLaserMIN", 0.01); + desc.add("EELaserMIN", 0.01); + desc.add("EBLaserMAX", 30.0); + desc.add("EELaserMAX", 30.0); + + desc.add("maxNumberHitsEB", 61200); + desc.add("maxNumberHitsEE", 14648); +} + +EcalRecHitProducerGPU::EcalRecHitProducerGPU(const edm::ParameterSet& ps) { + //---- input + uncalibRecHitsInEBToken_ = consumes(ps.getParameter("uncalibrecHitsInLabelEB")); + uncalibRecHitsInEEToken_ = consumes(ps.getParameter("uncalibrecHitsInLabelEE")); + + //---- output + recHitsTokenEB_ = produces(ps.getParameter("recHitsLabelEB")); + recHitsTokenEE_ = produces(ps.getParameter("recHitsLabelEE")); + + bool killDeadChannels = ps.getParameter("killDeadChannels"); + configParameters_.killDeadChannels = killDeadChannels; + + configParameters_.EBLaserMIN = ps.getParameter("EBLaserMIN"); + configParameters_.EELaserMIN = ps.getParameter("EELaserMIN"); + configParameters_.EBLaserMAX = ps.getParameter("EBLaserMAX"); + configParameters_.EELaserMAX = ps.getParameter("EELaserMAX"); + + // max number of digis to allocate for + configParameters_.maxNumberHitsEB = ps.getParameter("maxNumberHitsEB"); + configParameters_.maxNumberHitsEE = ps.getParameter("maxNumberHitsEE"); + + flagmask_ = 0; + flagmask_ |= 0x1 << EcalRecHit::kNeighboursRecovered; + flagmask_ |= 0x1 << EcalRecHit::kTowerRecovered; + flagmask_ |= 0x1 << EcalRecHit::kDead; + flagmask_ |= 0x1 << EcalRecHit::kKilled; + flagmask_ |= 0x1 << EcalRecHit::kTPSaturated; + flagmask_ |= 0x1 << EcalRecHit::kL1SpikeFlag; + + configParameters_.flagmask = flagmask_; + + // for recovery and killing + + configParameters_.recoverEBIsolatedChannels = ps.getParameter("recoverEBIsolatedChannels"); + configParameters_.recoverEEIsolatedChannels = ps.getParameter("recoverEEIsolatedChannels"); + configParameters_.recoverEBVFE = ps.getParameter("recoverEBVFE"); + configParameters_.recoverEEVFE = ps.getParameter("recoverEEVFE"); + configParameters_.recoverEBFE = ps.getParameter("recoverEBFE"); + configParameters_.recoverEEFE = ps.getParameter("recoverEEFE"); +} + +EcalRecHitProducerGPU::~EcalRecHitProducerGPU() {} + +void EcalRecHitProducerGPU::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder holder) { + // cuda products + auto const& ebUncalibRecHitsProduct = event.get(uncalibRecHitsInEBToken_); + auto const& eeUncalibRecHitsProduct = event.get(uncalibRecHitsInEEToken_); + // raii + cms::cuda::ScopedContextAcquire ctx{ebUncalibRecHitsProduct, std::move(holder), cudaState_}; + // get actual object + auto const& ebUncalibRecHits = ctx.get(ebUncalibRecHitsProduct); + auto const& eeUncalibRecHits = ctx.get(eeUncalibRecHitsProduct); + + ecal::rechit::EventInputDataGPU inputDataGPU{ebUncalibRecHits, eeUncalibRecHits}; + + neb_ = ebUncalibRecHits.size; + nee_ = eeUncalibRecHits.size; + + if ((neb_ > configParameters_.maxNumberHitsEB) || (nee_ > configParameters_.maxNumberHitsEE)) { + edm::LogError("EcalRecHitProducerGPU") + << "max number of channels exceeded. See options 'maxNumberHitsEB and maxNumberHitsEE' "; + } + + int nchannelsEB = ebUncalibRecHits.size; // --> offsetForInput, first EB and then EE + + // conditions + // - laser correction + // - IC + // - adt2gev + + // + setup.get().get(ADCToGeVConstantHandle_); + setup.get().get(IntercalibConstantsHandle_); + setup.get().get(ChannelStatusHandle_); + + setup.get().get(LaserAPDPNRatiosHandle_); + setup.get().get(LaserAPDPNRatiosRefHandle_); + setup.get().get(LaserAlphasHandle_); + setup.get().get(LinearCorrectionsHandle_); + setup.get().get(recHitParametersHandle_); + + auto const& ADCToGeVConstantProduct = ADCToGeVConstantHandle_->getProduct(ctx.stream()); + auto const& IntercalibConstantsProduct = IntercalibConstantsHandle_->getProduct(ctx.stream()); + auto const& ChannelStatusProduct = ChannelStatusHandle_->getProduct(ctx.stream()); + + auto const& LaserAPDPNRatiosProduct = LaserAPDPNRatiosHandle_->getProduct(ctx.stream()); + auto const& LaserAPDPNRatiosRefProduct = LaserAPDPNRatiosRefHandle_->getProduct(ctx.stream()); + auto const& LaserAlphasProduct = LaserAlphasHandle_->getProduct(ctx.stream()); + auto const& LinearCorrectionsProduct = LinearCorrectionsHandle_->getProduct(ctx.stream()); + auto const& recHitParametersProduct = recHitParametersHandle_->getProduct(ctx.stream()); + + // set config ptrs : this is done to avoid changing things downstream + configParameters_.ChannelStatusToBeExcluded = recHitParametersProduct.ChannelStatusToBeExcluded; + configParameters_.ChannelStatusToBeExcludedSize = std::get<0>(recHitParametersHandle_->getValues()).get().size(); + configParameters_.expanded_v_DB_reco_flags = recHitParametersProduct.expanded_v_DB_reco_flags; + configParameters_.expanded_Sizes_v_DB_reco_flags = recHitParametersProduct.expanded_Sizes_v_DB_reco_flags; + configParameters_.expanded_flagbit_v_DB_reco_flags = recHitParametersProduct.expanded_flagbit_v_DB_reco_flags; + configParameters_.expanded_v_DB_reco_flagsSize = std::get<3>(recHitParametersHandle_->getValues()).get().size(); + + // bundle up conditions + ecal::rechit::ConditionsProducts conditions{ADCToGeVConstantProduct, + IntercalibConstantsProduct, + ChannelStatusProduct, + LaserAPDPNRatiosProduct, + LaserAPDPNRatiosRefProduct, + LaserAlphasProduct, + LinearCorrectionsProduct, + IntercalibConstantsHandle_->getOffset()}; + + // dev mem + eventOutputDataGPU_.allocate(configParameters_, ctx.stream()); + + // + // schedule algorithms + // + + edm::TimeValue_t event_time = event.time().value(); + + ecal::rechit::create_ecal_rehit( + inputDataGPU, eventOutputDataGPU_, conditions, configParameters_, nchannelsEB, event_time, ctx.stream()); + + cudaCheck(cudaGetLastError()); +} + +void EcalRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup const& setup) { + cms::cuda::ScopedContextProduce ctx{cudaState_}; + + eventOutputDataGPU_.recHitsEB.size = neb_; + eventOutputDataGPU_.recHitsEE.size = nee_; + + // put into the event + ctx.emplace(event, recHitsTokenEB_, std::move(eventOutputDataGPU_.recHitsEB)); + ctx.emplace(event, recHitsTokenEE_, std::move(eventOutputDataGPU_.recHitsEE)); +} + +DEFINE_FWK_MODULE(EcalRecHitProducerGPU); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitConvertGPU2CPUFormat.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitConvertGPU2CPUFormat.cc new file mode 100644 index 0000000000000..f7e57a61fdd96 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitConvertGPU2CPUFormat.cc @@ -0,0 +1,93 @@ +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "Common.h" + +class EcalUncalibRecHitConvertGPU2CPUFormat : public edm::stream::EDProducer<> { +public: + explicit EcalUncalibRecHitConvertGPU2CPUFormat(edm::ParameterSet const& ps); + ~EcalUncalibRecHitConvertGPU2CPUFormat() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + using InputProduct = ecal::UncalibratedRecHit>; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + const edm::EDGetTokenT recHitsGPUEB_; + const edm::EDGetTokenT recHitsGPUEE_; + + const std::string recHitsLabelCPUEB_, recHitsLabelCPUEE_; +}; + +void EcalUncalibRecHitConvertGPU2CPUFormat::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("recHitsLabelGPUEB", edm::InputTag("ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEB")); + desc.add("recHitsLabelGPUEE", edm::InputTag("ecalUncalibRecHitProducerGPU", "EcalUncalibRecHitsEE")); + + desc.add("recHitsLabelCPUEB", "EcalUncalibRecHitsEB"); + desc.add("recHitsLabelCPUEE", "EcalUncalibRecHitsEE"); + + confDesc.add("ecalUncalibRecHitConvertGPU2CPUFormat", desc); +} + +EcalUncalibRecHitConvertGPU2CPUFormat::EcalUncalibRecHitConvertGPU2CPUFormat(const edm::ParameterSet& ps) + : recHitsGPUEB_{consumes(ps.getParameter("recHitsLabelGPUEB"))}, + recHitsGPUEE_{consumes(ps.getParameter("recHitsLabelGPUEE"))}, + recHitsLabelCPUEB_{ps.getParameter("recHitsLabelCPUEB")}, + recHitsLabelCPUEE_{ps.getParameter("recHitsLabelCPUEE")} { + produces(recHitsLabelCPUEB_); + produces(recHitsLabelCPUEE_); +} + +EcalUncalibRecHitConvertGPU2CPUFormat::~EcalUncalibRecHitConvertGPU2CPUFormat() {} + +void EcalUncalibRecHitConvertGPU2CPUFormat::produce(edm::Event& event, edm::EventSetup const& setup) { + edm::Handle hRecHitsGPUEB, hRecHitsGPUEE; + event.getByToken(recHitsGPUEB_, hRecHitsGPUEB); + event.getByToken(recHitsGPUEE_, hRecHitsGPUEE); + + auto recHitsCPUEB = std::make_unique(); + auto recHitsCPUEE = std::make_unique(); + recHitsCPUEB->reserve(hRecHitsGPUEB->amplitude.size()); + recHitsCPUEE->reserve(hRecHitsGPUEE->amplitude.size()); + + for (uint32_t i = 0; i < hRecHitsGPUEB->amplitude.size(); ++i) { + recHitsCPUEB->emplace_back(DetId{hRecHitsGPUEB->did[i]}, + hRecHitsGPUEB->amplitude[i], + hRecHitsGPUEB->pedestal[i], + hRecHitsGPUEB->jitter[i], + hRecHitsGPUEB->chi2[i], + hRecHitsGPUEB->flags[i]); + (*recHitsCPUEB)[i].setJitterError(hRecHitsGPUEB->jitterError[i]); + auto const offset = i * EcalDataFrame::MAXSAMPLES; + for (uint32_t sample = 0; sample < EcalDataFrame::MAXSAMPLES; ++sample) + (*recHitsCPUEB)[i].setOutOfTimeAmplitude(sample, hRecHitsGPUEB->amplitudesAll[offset + sample]); + } + + for (uint32_t i = 0; i < hRecHitsGPUEE->amplitude.size(); ++i) { + recHitsCPUEE->emplace_back(DetId{hRecHitsGPUEE->did[i]}, + hRecHitsGPUEE->amplitude[i], + hRecHitsGPUEE->pedestal[i], + hRecHitsGPUEE->jitter[i], + hRecHitsGPUEE->chi2[i], + hRecHitsGPUEE->flags[i]); + (*recHitsCPUEE)[i].setJitterError(hRecHitsGPUEE->jitterError[i]); + auto const offset = i * EcalDataFrame::MAXSAMPLES; + for (uint32_t sample = 0; sample < EcalDataFrame::MAXSAMPLES; ++sample) + (*recHitsCPUEE)[i].setOutOfTimeAmplitude(sample, hRecHitsGPUEE->amplitudesAll[offset + sample]); + } + + event.put(std::move(recHitsCPUEB), recHitsLabelCPUEB_); + event.put(std::move(recHitsCPUEE), recHitsLabelCPUEE_); +} + +DEFINE_FWK_MODULE(EcalUncalibRecHitConvertGPU2CPUFormat); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.cu b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.cu new file mode 100644 index 0000000000000..9d5a8a2ad1bd3 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.cu @@ -0,0 +1,305 @@ +#include +#include + +#include + +#include "CondFormats/EcalObjects/interface/EcalMGPAGainRatio.h" +#include "CondFormats/EcalObjects/interface/EcalPedestals.h" +#include "CondFormats/EcalObjects/interface/EcalPulseCovariances.h" +#include "CondFormats/EcalObjects/interface/EcalPulseShapes.h" +#include "CondFormats/EcalObjects/interface/EcalSampleMask.h" +#include "CondFormats/EcalObjects/interface/EcalSamplesCorrelation.h" +#include "CondFormats/EcalObjects/interface/EcalXtalGroupId.h" +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" + +#include "AmplitudeComputationCommonKernels.h" +#include "AmplitudeComputationKernels.h" +#include "Common.h" +#include "EcalUncalibRecHitMultiFitAlgoGPU.h" +#include "TimeComputationKernels.h" + +//#define DEBUG + +//#define ECAL_RECO_CUDA_DEBUG + +namespace ecal { + namespace multifit { + + void entryPoint(EventInputDataGPU const& eventInputGPU, + EventOutputDataGPU& eventOutputGPU, + EventDataForScratchGPU& scratch, + ConditionsProducts const& conditions, + ConfigurationParameters const& configParameters, + cudaStream_t cudaStream) { + using digis_type = std::vector; + using dids_type = std::vector; + // accodring to the cpu setup //----> hardcoded + bool const gainSwitchUseMaxSampleEB = true; + // accodring to the cpu setup //----> hardcoded + bool const gainSwitchUseMaxSampleEE = false; + + uint32_t const offsetForHashes = conditions.offsetForHashes; + uint32_t const offsetForInputs = eventInputGPU.ebDigis.size; + unsigned int totalChannels = eventInputGPU.ebDigis.size + eventInputGPU.eeDigis.size; + + // + // 1d preparation kernel + // + unsigned int nchannels_per_block = 32; + unsigned int threads_1d = 10 * nchannels_per_block; + unsigned int blocks_1d = threads_1d > 10 * totalChannels ? 1 : (totalChannels * 10 + threads_1d - 1) / threads_1d; + int shared_bytes = nchannels_per_block * EcalDataFrame::MAXSAMPLES * + (sizeof(bool) + sizeof(bool) + sizeof(bool) + sizeof(bool) + sizeof(char) + sizeof(bool)); + kernel_prep_1d_and_initialize<<>>( + conditions.pulseShapes.values, + eventInputGPU.ebDigis.data.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.data.get(), + eventInputGPU.eeDigis.ids.get(), + (SampleVector*)scratch.samples.get(), + (SampleVector*)eventOutputGPU.recHitsEB.amplitudesAll.get(), + (SampleVector*)eventOutputGPU.recHitsEE.amplitudesAll.get(), + (SampleGainVector*)scratch.gainsNoise.get(), + conditions.pedestals.mean_x1, + conditions.pedestals.mean_x12, + conditions.pedestals.rms_x12, + conditions.pedestals.mean_x6, + conditions.gainRatios.gain6Over1, + conditions.gainRatios.gain12Over6, + scratch.hasSwitchToGain6.get(), + scratch.hasSwitchToGain1.get(), + scratch.isSaturated.get(), + eventOutputGPU.recHitsEB.amplitude.get(), + eventOutputGPU.recHitsEE.amplitude.get(), + eventOutputGPU.recHitsEB.chi2.get(), + eventOutputGPU.recHitsEE.chi2.get(), + eventOutputGPU.recHitsEB.pedestal.get(), + eventOutputGPU.recHitsEE.pedestal.get(), + eventOutputGPU.recHitsEB.did.get(), + eventOutputGPU.recHitsEE.did.get(), + eventOutputGPU.recHitsEB.flags.get(), + eventOutputGPU.recHitsEE.flags.get(), + scratch.acState.get(), + (BXVectorType*)scratch.activeBXs.get(), + offsetForHashes, + offsetForInputs, + gainSwitchUseMaxSampleEB, + gainSwitchUseMaxSampleEE, + totalChannels); + cudaCheck(cudaGetLastError()); + + // + // 2d preparation kernel + // + int blocks_2d = totalChannels; + dim3 threads_2d{10, 10}; + kernel_prep_2d<<>>((SampleGainVector*)scratch.gainsNoise.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.ids.get(), + conditions.pedestals.rms_x12, + conditions.pedestals.rms_x6, + conditions.pedestals.rms_x1, + conditions.gainRatios.gain12Over6, + conditions.gainRatios.gain6Over1, + conditions.samplesCorrelation.EBG12SamplesCorrelation, + conditions.samplesCorrelation.EBG6SamplesCorrelation, + conditions.samplesCorrelation.EBG1SamplesCorrelation, + conditions.samplesCorrelation.EEG12SamplesCorrelation, + conditions.samplesCorrelation.EEG6SamplesCorrelation, + conditions.samplesCorrelation.EEG1SamplesCorrelation, + (SampleMatrix*)scratch.noisecov.get(), + (PulseMatrixType*)scratch.pulse_matrix.get(), + conditions.pulseShapes.values, + scratch.hasSwitchToGain6.get(), + scratch.hasSwitchToGain1.get(), + scratch.isSaturated.get(), + offsetForHashes, + offsetForInputs); + cudaCheck(cudaGetLastError()); + + // run minimization kernels + v1::minimization_procedure(eventInputGPU, eventOutputGPU, scratch, conditions, configParameters, cudaStream); + + if (configParameters.shouldRunTimingComputation) { + // + // TODO: this guy can run concurrently with other kernels, + // there is no dependence on the order of execution + // + unsigned int threads_time_init = threads_1d; + unsigned int blocks_time_init = blocks_1d; + int sharedBytesInit = 2 * threads_time_init * sizeof(SampleVector::Scalar); + kernel_time_computation_init<<>>( + eventInputGPU.ebDigis.data.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.data.get(), + eventInputGPU.eeDigis.ids.get(), + conditions.pedestals.rms_x12, + conditions.pedestals.rms_x6, + conditions.pedestals.rms_x1, + conditions.pedestals.mean_x12, + conditions.pedestals.mean_x6, + conditions.pedestals.mean_x1, + conditions.gainRatios.gain12Over6, + conditions.gainRatios.gain6Over1, + scratch.sample_values.get(), + scratch.sample_value_errors.get(), + scratch.ampMaxError.get(), + scratch.useless_sample_values.get(), + scratch.pedestal_nums.get(), + offsetForHashes, + offsetForInputs, + conditions.sampleMask.getEcalSampleMaskRecordEB(), + conditions.sampleMask.getEcalSampleMaskRecordEE(), + totalChannels); + cudaCheck(cudaGetLastError()); + + // + // TODO: small kernel only for EB. It needs to be checked if + /// fusing such small kernels is beneficial in here + // + // we are running only over EB digis + // therefore we need to create threads/blocks only for that + unsigned int const threadsFixMGPA = threads_1d; + unsigned int const blocksFixMGPA = + threadsFixMGPA > 10 * eventInputGPU.ebDigis.size + ? 1 + : (10 * eventInputGPU.ebDigis.size + threadsFixMGPA - 1) / threadsFixMGPA; + kernel_time_compute_fixMGPAslew<<>>( + eventInputGPU.ebDigis.data.get(), + eventInputGPU.eeDigis.data.get(), + scratch.sample_values.get(), + scratch.sample_value_errors.get(), + scratch.useless_sample_values.get(), + conditions.sampleMask.getEcalSampleMaskRecordEB(), + totalChannels, + offsetForInputs); + cudaCheck(cudaGetLastError()); + + int sharedBytes = EcalDataFrame::MAXSAMPLES * nchannels_per_block * 4 * sizeof(SampleVector::Scalar); + auto const threads_nullhypot = threads_1d; + auto const blocks_nullhypot = blocks_1d; + kernel_time_compute_nullhypot<<>>( + scratch.sample_values.get(), + scratch.sample_value_errors.get(), + scratch.useless_sample_values.get(), + scratch.chi2sNullHypot.get(), + scratch.sum0sNullHypot.get(), + scratch.sumAAsNullHypot.get(), + totalChannels); + cudaCheck(cudaGetLastError()); + + unsigned int nchannels_per_block_makeratio = 10; + unsigned int threads_makeratio = 45 * nchannels_per_block_makeratio; + unsigned int blocks_makeratio = threads_makeratio > 45 * totalChannels + ? 1 + : (totalChannels * 45 + threads_makeratio - 1) / threads_makeratio; + int sharedBytesMakeRatio = 5 * threads_makeratio * sizeof(SampleVector::Scalar); + kernel_time_compute_makeratio<<>>( + scratch.sample_values.get(), + scratch.sample_value_errors.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.ids.get(), + scratch.useless_sample_values.get(), + scratch.pedestal_nums.get(), + configParameters.amplitudeFitParametersEB, + configParameters.amplitudeFitParametersEE, + configParameters.timeFitParametersEB, + configParameters.timeFitParametersEE, + scratch.sumAAsNullHypot.get(), + scratch.sum0sNullHypot.get(), + scratch.tMaxAlphaBetas.get(), + scratch.tMaxErrorAlphaBetas.get(), + scratch.accTimeMax.get(), + scratch.accTimeWgt.get(), + scratch.tcState.get(), + configParameters.timeFitParametersSizeEB, + configParameters.timeFitParametersSizeEE, + configParameters.timeFitLimitsFirstEB, + configParameters.timeFitLimitsFirstEE, + configParameters.timeFitLimitsSecondEB, + configParameters.timeFitLimitsSecondEE, + totalChannels, + offsetForInputs); + cudaCheck(cudaGetLastError()); + + auto const threads_findamplchi2 = threads_1d; + auto const blocks_findamplchi2 = blocks_1d; + int const sharedBytesFindAmplChi2 = 2 * threads_findamplchi2 * sizeof(SampleVector::Scalar); + kernel_time_compute_findamplchi2_and_finish<<>>(scratch.sample_values.get(), + scratch.sample_value_errors.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.ids.get(), + scratch.useless_sample_values.get(), + scratch.tMaxAlphaBetas.get(), + scratch.tMaxErrorAlphaBetas.get(), + scratch.accTimeMax.get(), + scratch.accTimeWgt.get(), + configParameters.amplitudeFitParametersEB, + configParameters.amplitudeFitParametersEE, + scratch.sumAAsNullHypot.get(), + scratch.sum0sNullHypot.get(), + scratch.chi2sNullHypot.get(), + scratch.tcState.get(), + scratch.ampMaxAlphaBeta.get(), + scratch.ampMaxError.get(), + scratch.timeMax.get(), + scratch.timeError.get(), + totalChannels, + offsetForInputs); + cudaCheck(cudaGetLastError()); + + auto const threads_timecorr = 32; + auto const blocks_timecorr = + threads_timecorr > totalChannels ? 1 : (totalChannels + threads_timecorr - 1) / threads_timecorr; + kernel_time_correction_and_finalize<<>>( + eventOutputGPU.recHitsEB.amplitude.get(), + eventOutputGPU.recHitsEE.amplitude.get(), + eventInputGPU.ebDigis.data.get(), + eventInputGPU.ebDigis.ids.get(), + eventInputGPU.eeDigis.data.get(), + eventInputGPU.eeDigis.ids.get(), + conditions.timeBiasCorrections.EBTimeCorrAmplitudeBins, + conditions.timeBiasCorrections.EETimeCorrAmplitudeBins, + conditions.timeBiasCorrections.EBTimeCorrShiftBins, + conditions.timeBiasCorrections.EETimeCorrShiftBins, + scratch.timeMax.get(), + scratch.timeError.get(), + conditions.pedestals.rms_x12, + conditions.timeCalibConstants.values, + eventOutputGPU.recHitsEB.jitter.get(), + eventOutputGPU.recHitsEE.jitter.get(), + eventOutputGPU.recHitsEB.jitterError.get(), + eventOutputGPU.recHitsEE.jitterError.get(), + eventOutputGPU.recHitsEB.flags.get(), + eventOutputGPU.recHitsEE.flags.get(), + conditions.timeBiasCorrections.EBTimeCorrAmplitudeBinsSize, + conditions.timeBiasCorrections.EETimeCorrAmplitudeBinsSize, + configParameters.timeConstantTermEB, + configParameters.timeConstantTermEE, + conditions.timeOffsetConstant.getEBValue(), + conditions.timeOffsetConstant.getEEValue(), + configParameters.timeNconstEB, + configParameters.timeNconstEE, + configParameters.amplitudeThreshEB, + configParameters.amplitudeThreshEE, + configParameters.outOfTimeThreshG12pEB, + configParameters.outOfTimeThreshG12pEE, + configParameters.outOfTimeThreshG12mEB, + configParameters.outOfTimeThreshG12mEE, + configParameters.outOfTimeThreshG61pEB, + configParameters.outOfTimeThreshG61pEE, + configParameters.outOfTimeThreshG61mEB, + configParameters.outOfTimeThreshG61mEE, + offsetForHashes, + offsetForInputs, + totalChannels); + cudaCheck(cudaGetLastError()); + } + } + + } // namespace multifit +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.h b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.h new file mode 100644 index 0000000000000..c84047a8bf8e7 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitMultiFitAlgoGPU.h @@ -0,0 +1,23 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_EcalUncalibRecHitMultiFitAlgoGPU_h +#define RecoLocalCalo_EcalRecProducers_plugins_EcalUncalibRecHitMultiFitAlgoGPU_h + +#include + +#include + +#include "DeclsForKernels.h" + +namespace ecal { + namespace multifit { + + void entryPoint(EventInputDataGPU const&, + EventOutputDataGPU&, + EventDataForScratchGPU&, + ConditionsProducts const&, + ConfigurationParameters const&, + cudaStream_t); + + } // namespace multifit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_EcalUncalibRecHitMultiFitAlgoGPU_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc new file mode 100644 index 0000000000000..a321f35144c39 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EcalUncalibRecHitProducerGPU.cc @@ -0,0 +1,279 @@ +#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h" +#include "CondFormats/DataRecord/interface/EcalGainRatiosRcd.h" +#include "CondFormats/DataRecord/interface/EcalPedestalsRcd.h" +#include "CondFormats/DataRecord/interface/EcalPulseCovariancesRcd.h" +#include "CondFormats/DataRecord/interface/EcalPulseShapesRcd.h" +#include "CondFormats/DataRecord/interface/EcalSampleMaskRcd.h" +#include "CondFormats/DataRecord/interface/EcalSamplesCorrelationRcd.h" +#include "CondFormats/DataRecord/interface/EcalTimeBiasCorrectionsRcd.h" +#include "CondFormats/DataRecord/interface/EcalTimeCalibConstantsRcd.h" +#include "CondFormats/DataRecord/interface/EcalTimeOffsetConstantRcd.h" +#include "CondFormats/EcalObjects/interface/EcalTimeOffsetConstant.h" +#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "HeterogeneousCore/CUDACore/interface/JobConfigurationGPURecord.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalGainRatiosGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalMultifitParametersGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPedestalsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseCovariancesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalPulseShapesGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalSamplesCorrelationGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeBiasCorrectionsGPU.h" +#include "RecoLocalCalo/EcalRecAlgos/interface/EcalTimeCalibConstantsGPU.h" + +#include "Common.h" +#include "DeclsForKernels.h" +#include "EcalUncalibRecHitMultiFitAlgoGPU.h" + +class EcalUncalibRecHitProducerGPU : public edm::stream::EDProducer { +public: + explicit EcalUncalibRecHitProducerGPU(edm::ParameterSet const& ps); + ~EcalUncalibRecHitProducerGPU() override; + static void fillDescriptions(edm::ConfigurationDescriptions&); + +private: + void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override; + void produce(edm::Event&, edm::EventSetup const&) override; + +private: + using InputProduct = cms::cuda::Product>; + const edm::EDGetTokenT digisTokenEB_, digisTokenEE_; + using OutputProduct = cms::cuda::Product>; + const edm::EDPutTokenT recHitsTokenEB_, recHitsTokenEE_; + + // conditions tokens + const edm::ESGetToken pedestalsToken_; + const edm::ESGetToken gainRatiosToken_; + const edm::ESGetToken pulseShapesToken_; + const edm::ESGetToken pulseCovariancesToken_; + const edm::ESGetToken samplesCorrelationToken_; + const edm::ESGetToken timeBiasCorrectionsToken_; + const edm::ESGetToken timeCalibConstantsToken_; + const edm::ESGetToken sampleMaskToken_; + const edm::ESGetToken timeOffsetConstantToken_; + const edm::ESGetToken multifitParametersToken_; + + // configuration parameters + ecal::multifit::ConfigurationParameters configParameters_; + + // event data + ecal::multifit::EventOutputDataGPU eventOutputDataGPU_; + + cms::cuda::ContextState cudaState_; + + uint32_t neb_, nee_; +}; + +void EcalUncalibRecHitProducerGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) { + edm::ParameterSetDescription desc; + + desc.add("digisLabelEB", edm::InputTag("ecalRawToDigiGPU", "ebDigis")); + desc.add("digisLabelEE", edm::InputTag("ecalRawToDigiGPU", "eeDigis")); + + desc.add("recHitsLabelEB", "EcalUncalibRecHitsEB"); + desc.add("recHitsLabelEE", "EcalUncalibRecHitsEE"); + + desc.add("EBtimeFitLimits_Lower", 0.2); + desc.add("EBtimeFitLimits_Upper", 1.4); + desc.add("EEtimeFitLimits_Lower", 0.2); + desc.add("EEtimeFitLimits_Upper", 1.4); + desc.add("EBtimeConstantTerm", .6); + desc.add("EEtimeConstantTerm", 1.0); + desc.add("EBtimeNconst", 28.5); + desc.add("EEtimeNconst", 31.8); + desc.add("outOfTimeThresholdGain12pEB", 5); + desc.add("outOfTimeThresholdGain12mEB", 5); + desc.add("outOfTimeThresholdGain61pEB", 5); + desc.add("outOfTimeThresholdGain61mEB", 5); + desc.add("outOfTimeThresholdGain12pEE", 1000); + desc.add("outOfTimeThresholdGain12mEE", 1000); + desc.add("outOfTimeThresholdGain61pEE", 1000); + desc.add("outOfTimeThresholdGain61mEE", 1000); + desc.add("amplitudeThresholdEB", 10); + desc.add("amplitudeThresholdEE", 10); + desc.add("maxNumberHitsEB", 61200); + desc.add("maxNumberHitsEE", 14648); + desc.addUntracked>("kernelMinimizeThreads", {32, 1, 1}); + desc.add("shouldRunTimingComputation", true); + confDesc.addWithDefaultLabel(desc); +} + +EcalUncalibRecHitProducerGPU::EcalUncalibRecHitProducerGPU(const edm::ParameterSet& ps) + : digisTokenEB_{consumes(ps.getParameter("digisLabelEB"))}, + digisTokenEE_{consumes(ps.getParameter("digisLabelEE"))}, + recHitsTokenEB_{produces(ps.getParameter("recHitsLabelEB"))}, + recHitsTokenEE_{produces(ps.getParameter("recHitsLabelEE"))}, + pedestalsToken_{esConsumes()}, + gainRatiosToken_{esConsumes()}, + pulseShapesToken_{esConsumes()}, + pulseCovariancesToken_{esConsumes()}, + samplesCorrelationToken_{esConsumes()}, + timeBiasCorrectionsToken_{esConsumes()}, + timeCalibConstantsToken_{esConsumes()}, + sampleMaskToken_{esConsumes()}, + timeOffsetConstantToken_{esConsumes()}, + multifitParametersToken_{esConsumes()} { + std::pair EBtimeFitLimits, EEtimeFitLimits; + EBtimeFitLimits.first = ps.getParameter("EBtimeFitLimits_Lower"); + EBtimeFitLimits.second = ps.getParameter("EBtimeFitLimits_Upper"); + EEtimeFitLimits.first = ps.getParameter("EEtimeFitLimits_Lower"); + EEtimeFitLimits.second = ps.getParameter("EEtimeFitLimits_Upper"); + + auto EBtimeConstantTerm = ps.getParameter("EBtimeConstantTerm"); + auto EEtimeConstantTerm = ps.getParameter("EEtimeConstantTerm"); + auto EBtimeNconst = ps.getParameter("EBtimeNconst"); + auto EEtimeNconst = ps.getParameter("EEtimeNconst"); + + auto outOfTimeThreshG12pEB = ps.getParameter("outOfTimeThresholdGain12pEB"); + auto outOfTimeThreshG12mEB = ps.getParameter("outOfTimeThresholdGain12mEB"); + auto outOfTimeThreshG61pEB = ps.getParameter("outOfTimeThresholdGain61pEB"); + auto outOfTimeThreshG61mEB = ps.getParameter("outOfTimeThresholdGain61mEB"); + auto outOfTimeThreshG12pEE = ps.getParameter("outOfTimeThresholdGain12pEE"); + auto outOfTimeThreshG12mEE = ps.getParameter("outOfTimeThresholdGain12mEE"); + auto outOfTimeThreshG61pEE = ps.getParameter("outOfTimeThresholdGain61pEE"); + auto outOfTimeThreshG61mEE = ps.getParameter("outOfTimeThresholdGain61mEE"); + auto amplitudeThreshEB = ps.getParameter("amplitudeThresholdEB"); + auto amplitudeThreshEE = ps.getParameter("amplitudeThresholdEE"); + + // max number of digis to allocate for + configParameters_.maxNumberHitsEB = ps.getParameter("maxNumberHitsEB"); + configParameters_.maxNumberHitsEE = ps.getParameter("maxNumberHitsEE"); + + // switch to run timing computation kernels + configParameters_.shouldRunTimingComputation = ps.getParameter("shouldRunTimingComputation"); + + // minimize kernel launch conf + auto threadsMinimize = ps.getUntrackedParameter>("kernelMinimizeThreads"); + configParameters_.kernelMinimizeThreads[0] = threadsMinimize[0]; + configParameters_.kernelMinimizeThreads[1] = threadsMinimize[1]; + configParameters_.kernelMinimizeThreads[2] = threadsMinimize[2]; + + // + // configuration and physics parameters: done once + // assume there is a single device + // use sync copying + // + + // time fit parameters and limits + configParameters_.timeFitLimitsFirstEB = EBtimeFitLimits.first; + configParameters_.timeFitLimitsSecondEB = EBtimeFitLimits.second; + configParameters_.timeFitLimitsFirstEE = EEtimeFitLimits.first; + configParameters_.timeFitLimitsSecondEE = EEtimeFitLimits.second; + + // time constant terms + configParameters_.timeConstantTermEB = EBtimeConstantTerm; + configParameters_.timeConstantTermEE = EEtimeConstantTerm; + + // time N const + configParameters_.timeNconstEB = EBtimeNconst; + configParameters_.timeNconstEE = EEtimeNconst; + + // amplitude threshold for time flags + configParameters_.amplitudeThreshEB = amplitudeThreshEB; + configParameters_.amplitudeThreshEE = amplitudeThreshEE; + + // out of time thresholds gain-dependent + configParameters_.outOfTimeThreshG12pEB = outOfTimeThreshG12pEB; + configParameters_.outOfTimeThreshG12pEE = outOfTimeThreshG12pEE; + configParameters_.outOfTimeThreshG61pEB = outOfTimeThreshG61pEB; + configParameters_.outOfTimeThreshG61pEE = outOfTimeThreshG61pEE; + configParameters_.outOfTimeThreshG12mEB = outOfTimeThreshG12mEB; + configParameters_.outOfTimeThreshG12mEE = outOfTimeThreshG12mEE; + configParameters_.outOfTimeThreshG61mEB = outOfTimeThreshG61mEB; + configParameters_.outOfTimeThreshG61mEE = outOfTimeThreshG61mEE; +} + +EcalUncalibRecHitProducerGPU::~EcalUncalibRecHitProducerGPU() {} + +void EcalUncalibRecHitProducerGPU::acquire(edm::Event const& event, + edm::EventSetup const& setup, + edm::WaitingTaskWithArenaHolder holder) { + // cuda products + auto const& ebDigisProduct = event.get(digisTokenEB_); + auto const& eeDigisProduct = event.get(digisTokenEE_); + + // raii + cms::cuda::ScopedContextAcquire ctx{ebDigisProduct, std::move(holder), cudaState_}; + + // get actual obj + auto const& ebDigis = ctx.get(ebDigisProduct); + auto const& eeDigis = ctx.get(eeDigisProduct); + ecal::multifit::EventInputDataGPU inputDataGPU{ebDigis, eeDigis}; + neb_ = ebDigis.size; + nee_ = eeDigis.size; + + if ((neb_ > configParameters_.maxNumberHitsEB) || (nee_ > configParameters_.maxNumberHitsEE)) { + edm::LogError("EcalUncalibRecHitProducerGPU") + << "max number of channels exceeded. See options 'maxNumberHitsEB and maxNumberHitsEE' "; + } + + // conditions + auto const& timeCalibConstantsData = setup.getData(timeCalibConstantsToken_); + auto const& sampleMaskData = setup.getData(sampleMaskToken_); + auto const& timeOffsetConstantData = setup.getData(timeOffsetConstantToken_); + auto const& multifitParametersData = setup.getData(multifitParametersToken_); + + auto const& pedestals = setup.getData(pedestalsToken_).getProduct(ctx.stream()); + auto const& gainRatios = setup.getData(gainRatiosToken_).getProduct(ctx.stream()); + auto const& pulseShapes = setup.getData(pulseShapesToken_).getProduct(ctx.stream()); + auto const& pulseCovariances = setup.getData(pulseCovariancesToken_).getProduct(ctx.stream()); + auto const& samplesCorrelation = setup.getData(samplesCorrelationToken_).getProduct(ctx.stream()); + auto const& timeBiasCorrections = setup.getData(timeBiasCorrectionsToken_).getProduct(ctx.stream()); + auto const& timeCalibConstants = timeCalibConstantsData.getProduct(ctx.stream()); + auto const& multifitParameters = multifitParametersData.getProduct(ctx.stream()); + + // assign ptrs/values: this is done not to change how things look downstream + configParameters_.amplitudeFitParametersEB = multifitParameters.amplitudeFitParametersEB; + configParameters_.amplitudeFitParametersEE = multifitParameters.amplitudeFitParametersEE; + configParameters_.timeFitParametersEB = multifitParameters.timeFitParametersEB; + configParameters_.timeFitParametersEE = multifitParameters.timeFitParametersEE; + configParameters_.timeFitParametersSizeEB = multifitParametersData.getValues()[2].get().size(); + configParameters_.timeFitParametersSizeEE = multifitParametersData.getValues()[3].get().size(); + + // bundle up conditions + ecal::multifit::ConditionsProducts conditions{pedestals, + gainRatios, + pulseShapes, + pulseCovariances, + samplesCorrelation, + timeBiasCorrections, + timeCalibConstants, + sampleMaskData, + timeOffsetConstantData, + timeCalibConstantsData.getOffset(), + multifitParameters}; + + // dev mem + eventOutputDataGPU_.allocate(configParameters_, ctx.stream()); + + // scratch mem + ecal::multifit::EventDataForScratchGPU eventDataForScratchGPU; + eventDataForScratchGPU.allocate(configParameters_, ctx.stream()); + + // + // schedule algorithms + // + ecal::multifit::entryPoint( + inputDataGPU, eventOutputDataGPU_, eventDataForScratchGPU, conditions, configParameters_, ctx.stream()); +} + +void EcalUncalibRecHitProducerGPU::produce(edm::Event& event, edm::EventSetup const& setup) { + //DurationMeasurer timer{std::string{"produce duration"}}; + cms::cuda::ScopedContextProduce ctx{cudaState_}; + + // set the size of eb and ee + eventOutputDataGPU_.recHitsEB.size = neb_; + eventOutputDataGPU_.recHitsEE.size = nee_; + + // put into the event + ctx.emplace(event, recHitsTokenEB_, std::move(eventOutputDataGPU_.recHitsEB)); + ctx.emplace(event, recHitsTokenEE_, std::move(eventOutputDataGPU_.recHitsEE)); +} + +DEFINE_FWK_MODULE(EcalUncalibRecHitProducerGPU); diff --git a/RecoLocalCalo/EcalRecProducers/plugins/EigenMatrixTypes_gpu.h b/RecoLocalCalo/EcalRecProducers/plugins/EigenMatrixTypes_gpu.h new file mode 100644 index 0000000000000..bbf9cb0dbb5c9 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/EigenMatrixTypes_gpu.h @@ -0,0 +1,49 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_EigenMatrixTypes_gpu_h +#define RecoLocalCalo_EcalRecProducers_plugins_EigenMatrixTypes_gpu_h + +#include + +#include + +#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h" + +namespace ecal { + namespace multifit { + + constexpr int SampleVectorSize = 10; + constexpr int FullSampleVectorSize = 19; + constexpr int PulseVectorSize = 12; + constexpr int NGains = 3; + + using data_type = ::ecal::reco::ComputationScalarType; + + typedef Eigen::Matrix PulseMatrixType; + typedef Eigen::Matrix BXVectorType; + using SampleMatrixD = Eigen::Matrix; + + typedef Eigen::Matrix SampleVector; + typedef Eigen::Matrix FullSampleVector; + typedef Eigen::Matrix PulseVector; + typedef Eigen::Matrix BXVector; + typedef Eigen::Matrix SampleGainVector; + typedef Eigen::Matrix SampleMatrix; + typedef Eigen::Matrix FullSampleMatrix; + typedef Eigen::Matrix PulseMatrix; + typedef Eigen::Matrix + SamplePulseMatrix; + typedef Eigen::LLT SampleDecompLLT; + typedef Eigen::LLT SampleDecompLLTD; + typedef Eigen::LLT PulseDecompLLT; + typedef Eigen::LDLT PulseDecompLDLT; + + typedef Eigen::Matrix SingleMatrix; + typedef Eigen::Matrix SingleVector; + + typedef std::array SampleMatrixGainArray; + + using PermutationMatrix = Eigen::PermutationMatrix; + + } // namespace multifit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_EigenMatrixTypes_gpu_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.cu b/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.cu new file mode 100644 index 0000000000000..5316ed87d6ecc --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.cu @@ -0,0 +1,308 @@ +#include "DataFormats/EcalDetId/interface/EBDetId.h" +#include "DataFormats/EcalDetId/interface/EEDetId.h" + +#include "KernelHelpers.h" + +namespace ecal { + namespace reconstruction { + + namespace internal { + + namespace barrel { + + __device__ __forceinline__ bool positiveZ(uint32_t id) { return id & 0x10000; } + + __device__ __forceinline__ uint32_t ietaAbs(uint32_t id) { return (id >> 9) & 0x7F; } + + __device__ __forceinline__ uint32_t iphi(uint32_t id) { return id & 0x1FF; } + + __device__ int dccFromSm(int ism) { + int iz = 1; + if (ism > 18) + iz = -1; + if (iz == -1) + ism -= 18; + int idcc = 9 + ism; + if (iz == +1) + idcc += 18; + return idcc; + } + + __device__ int sm(int ieta, int iphi) { + int iz = 1; + if (ieta < 0) + iz = -1; + ieta *= iz; + int iphi_ = iphi; + if (iphi_ > 360) + iphi_ -= 360; + int ism = (iphi_ - 1) / 20 + 1; + if (iz == -1) + ism += 18; + return ism; + } + + __device__ int dcc(int ieta, int iphi) { + int ism = sm(ieta, iphi); + return dccFromSm(ism); + } + + // + // ---- why on hell things are so complex and not simple ??? + // + + __device__ int lm_channel(int iX, int iY) { + static const int idx_[] = { + // clang-format off + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8, // 3 + 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 6, 6, 8, 8, 8, 8, // 2 + 1, 3, 3, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 9, 9, 9, 9, // 1 + 1, 3, 3, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 9, 9, 9, 9 // 0 + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + // clang-format on + }; + + int il, ic, ii; + const int iym = 4; + const int ixm = 17; + int iX_ = iX + 1; + int iY_ = iY + 1; + il = iym - iY_; + ic = iX_ - 1; + ii = il * ixm + ic; + if (ii < 0 || ii > (int)(sizeof(idx_) / sizeof(int))) { + return -1; + }; + return idx_[ii]; + } + + __device__ int localCoord_x(int ieta, int iphi) { + int iz = 1; + if (ieta < 0) { + iz = -1; + } + ieta *= iz; + int ix = ieta - 1; + + return ix; + } + + __device__ int localCoord_y(int ieta, int iphi) { + int iz = 1; + if (ieta < 0) { + iz = -1; + } + int iphi_ = iphi; + if (iphi_ > 360) { + iphi_ -= 360; + } + int iy = (iphi_ - 1) % 20; + if (iz == -1) { + iy = 19 - iy; + } + + return iy; + } + + __device__ int lmmod(int ieta, int iphi) { + int ix = localCoord_x(ieta, iphi); + int iy = localCoord_y(ieta, iphi); + + return lm_channel(ix / 5, iy / 5); + } + + __device__ int side(int ieta, int iphi) { + int ilmmod = lmmod(ieta, iphi); + return (ilmmod % 2 == 0) ? 1 : 0; + } + + } // namespace barrel + + } // namespace internal + + __device__ uint32_t hashedIndexEB(uint32_t id) { + using namespace internal::barrel; + return (EBDetId::MAX_IETA + (positiveZ(id) ? ietaAbs(id) - 1 : -ietaAbs(id))) * EBDetId::MAX_IPHI + iphi(id) - 1; + } + + // + // https://cmssdt.cern.ch/lxr/source/CalibCalorimetry/EcalLaserAnalyzer/src/MEEBGeom.cc + // function: "lmr" + + __device__ int laser_monitoring_region_EB(uint32_t id) { + using namespace internal::barrel; + + int ieta; + if (positiveZ(id)) { + ieta = ietaAbs(id); + } else { + ieta = -ietaAbs(id); + } + + int idcc = dcc(ieta, (int)(iphi(id))); + int ism = idcc - 9; + + int iside = side(ieta, (int)(iphi(id))); + + return (1 + 2 * (ism - 1) + iside); + } + + namespace internal { + + namespace endcap { + + __device__ __forceinline__ uint32_t ix(uint32_t id) { return (id >> 7) & 0x7F; } + + __device__ __forceinline__ uint32_t iy(uint32_t id) { return id & 0x7F; } + + __device__ __forceinline__ bool positiveZ(uint32_t id) { return id & 0x4000; } + + // these constants come from EE Det Id + __constant__ const unsigned short kxf[] = { + 41, 51, 41, 51, 41, 51, 36, 51, 36, 51, 26, 51, 26, 51, 26, 51, 21, 51, 21, 51, 21, 51, 21, 51, 21, + 51, 16, 51, 16, 51, 14, 51, 14, 51, 14, 51, 14, 51, 14, 51, 9, 51, 9, 51, 9, 51, 9, 51, 9, 51, + 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 4, 51, 4, 51, 4, + 51, 4, 51, 4, 56, 1, 58, 1, 59, 1, 60, 1, 61, 1, 61, 1, 62, 1, 62, 1, 62, 1, 62, 1, 62, + 1, 62, 1, 62, 1, 62, 1, 62, 1, 62, 1, 61, 1, 61, 1, 60, 1, 59, 1, 58, 4, 56, 4, 51, 4, + 51, 4, 51, 4, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, 6, 51, + 9, 51, 9, 51, 9, 51, 9, 51, 9, 51, 14, 51, 14, 51, 14, 51, 14, 51, 14, 51, 16, 51, 16, 51, 21, + 51, 21, 51, 21, 51, 21, 51, 21, 51, 26, 51, 26, 51, 26, 51, 36, 51, 36, 51, 41, 51, 41, 51, 41, 51}; + + __constant__ const unsigned short kdi[] = { + 0, 10, 20, 30, 40, 50, 60, 75, 90, 105, 120, 145, 170, 195, 220, 245, 270, + 300, 330, 360, 390, 420, 450, 480, 510, 540, 570, 605, 640, 675, 710, 747, 784, 821, + 858, 895, 932, 969, 1006, 1043, 1080, 1122, 1164, 1206, 1248, 1290, 1332, 1374, 1416, 1458, 1500, + 1545, 1590, 1635, 1680, 1725, 1770, 1815, 1860, 1905, 1950, 1995, 2040, 2085, 2130, 2175, 2220, 2265, + 2310, 2355, 2400, 2447, 2494, 2541, 2588, 2635, 2682, 2729, 2776, 2818, 2860, 2903, 2946, 2988, 3030, + 3071, 3112, 3152, 3192, 3232, 3272, 3311, 3350, 3389, 3428, 3467, 3506, 3545, 3584, 3623, 3662, 3701, + 3740, 3779, 3818, 3857, 3896, 3935, 3974, 4013, 4052, 4092, 4132, 4172, 4212, 4253, 4294, 4336, 4378, + 4421, 4464, 4506, 4548, 4595, 4642, 4689, 4736, 4783, 4830, 4877, 4924, 4969, 5014, 5059, 5104, 5149, + 5194, 5239, 5284, 5329, 5374, 5419, 5464, 5509, 5554, 5599, 5644, 5689, 5734, 5779, 5824, 5866, 5908, + 5950, 5992, 6034, 6076, 6118, 6160, 6202, 6244, 6281, 6318, 6355, 6392, 6429, 6466, 6503, 6540, 6577, + 6614, 6649, 6684, 6719, 6754, 6784, 6814, 6844, 6874, 6904, 6934, 6964, 6994, 7024, 7054, 7079, 7104, + 7129, 7154, 7179, 7204, 7219, 7234, 7249, 7264, 7274, 7284, 7294, 7304, 7314}; + + __device__ int quadrant(int iX, int iY) { + bool near = iX >= 11; + bool far = !near; + bool top = iY >= 11; + bool bot = !top; + + int iquad = 0; + if (near && top) + iquad = 1; + if (far && top) + iquad = 2; + if (far && bot) + iquad = 3; + if (near && bot) + iquad = 4; + + return iquad; + } + + __device__ int sector(int iX, int iY) { + // Y (towards the surface) + // T + // | + // | + // | + // o---------| X (towards center of LHC) + // + static const int idx_[] = { + // clang-format off + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, // 20 + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, // 19 + 0, 0, 0, 2, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 8, 0, 0, 0, // 18 + 0, 0, 2, 2, 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 8, 8, 8, 0, 0, // 17 + 0, 2, 2, 2, 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 8, 8, 8, 8, 0, // 16 + 0, 2, 2, 2, 2, 2, 1, 1, 1, 1, 9, 9, 9, 9, 8, 8, 8, 8, 8, 0, // 15 + 0, 2, 2, 2, 2, 2, 2, 1, 1, 1, 9, 9, 9, 8, 8, 8, 8, 8, 8, 0, // 14 + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, // 13 + 3, 3, 2, 2, 2, 2, 2, 2, 2, 0, 0, 8, 8, 8, 8, 8, 8, 8, 7, 7, // 12 + 3, 3, 3, 3, 3, 3, 3, 2, 0, 0, 0, 0, 8, 7, 7, 7, 7, 7, 7, 7, // 11 + 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, // 10 + 3, 3, 3, 3, 3, 3, 3, 4, 4, 0, 0, 6, 6, 7, 7, 7, 7, 7, 7, 7, // 9 + 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7, 7, // 8 + 0, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 0, // 7 + 0, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 0, // 6 + 0, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 0, // 5 + 0, 0, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 0, 0, // 4 + 0, 0, 0, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 0, 0, 0, // 3 + 0, 0, 0, 0, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 0, 0, 0, 0, // 2 + 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0 // 1 + // 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // clang-format on + }; + + int iym, ixm, il, ic, ii; + iym = 20; + ixm = 20; + int iX_ = iX; + int iY_ = iY; + il = iym - iY_; + ic = iX_ - 1; + ii = il * ixm + ic; + + if (ii < 0 || ii > (int)(sizeof(idx_) / sizeof(int)) || idx_[ii] == 0) { + return -1; + }; + return idx_[ii]; + } + + } // namespace endcap + + } // namespace internal + + __device__ uint32_t hashedIndexEE(uint32_t id) { + using namespace internal::endcap; + + const uint32_t jx(ix(id)); + const uint32_t jd(2 * (iy(id) - 1) + (jx - 1) / 50); + return ((positiveZ(id) ? EEDetId::kEEhalf : 0) + kdi[jd] + jx - kxf[jd]); + } + + // + // https://cmssdt.cern.ch/lxr/source/CalibCalorimetry/EcalLaserAnalyzer/src/MEEEGeom.cc + // https://github.com/cms-sw/cmssw/blob/master/CalibCalorimetry/EcalLaserCorrection/src/EcalLaserDbService.cc + // + + __device__ int laser_monitoring_region_EE(uint32_t id) { + using namespace internal::endcap; + + // SuperCrysCoord + uint32_t iX = (ix(id) - 1) / 5 + 1; + uint32_t iY = (iy(id) - 1) / 5 + 1; + + // Correct convention + // * @param iz iz/zside index: -1 for EE-, +1 for EE+ + // https://github.com/cms-sw/cmssw/blob/master/DataFormats/EcalDetId/interface/EEDetId.h#L68-L71 + // zside in https://github.com/cms-sw/cmssw/blob/master/CalibCalorimetry/EcalLaserCorrection/src/EcalLaserDbService.cc#L63 + // + int iz = positiveZ(id) ? 1 : -1; + + int iquad = quadrant(iX, iY); + int isect = sector(iX, iY); + if (isect < 0) + return -1; + + int ilmr = 0; + ilmr = isect - 6; + if (ilmr <= 0) + ilmr += 9; + if (ilmr == 9) + ilmr++; + if (ilmr == 8 && iquad == 4) + ilmr++; + if (iz == +1) + ilmr += 72; + else + ilmr += 82; + + return ilmr; + } + + } // namespace reconstruction +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h b/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h new file mode 100644 index 0000000000000..74c5b68d8e137 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/KernelHelpers.h @@ -0,0 +1,26 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h +#define RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h + +#include "DataFormats/CaloRecHit/interface/MultifitComputations.h" + +#include +#include +#include + +#include + +namespace ecal { + namespace reconstruction { + + __device__ uint32_t hashedIndexEB(uint32_t id); + + __device__ uint32_t hashedIndexEE(uint32_t id); + + __device__ int laser_monitoring_region_EB(uint32_t id); + + __device__ int laser_monitoring_region_EE(uint32_t id); + + } // namespace reconstruction +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_KernelHelpers_h diff --git a/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.cu b/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.cu new file mode 100644 index 0000000000000..9c2d2fc986c08 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.cu @@ -0,0 +1,1133 @@ +#include +#include + +#include + +#include "DataFormats/EcalDigi/interface/EcalDataFrame.h" +#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h" +#include "DataFormats/Math/interface/approx_exp.h" +#include "DataFormats/Math/interface/approx_log.h" +#include "FWCore/Utilities/interface/CMSUnrollLoop.h" + +#include "Common.h" +#include "TimeComputationKernels.h" +#include "KernelHelpers.h" + +//#define DEBUG + +//#define ECAL_RECO_CUDA_DEBUG + +namespace ecal { + namespace multifit { + + __device__ __forceinline__ bool use_sample(unsigned int sample_mask, unsigned int sample) { + return sample_mask & (0x1 << (EcalDataFrame::MAXSAMPLES - (sample + 1))); + } + + __global__ void kernel_time_compute_nullhypot(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + bool const* useless_sample_values, + SampleVector::Scalar* chi2s, + SampleVector::Scalar* sum0s, + SampleVector::Scalar* sumAAs, + const int nchannels) { + using ScalarType = SampleVector::Scalar; + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + int tx = threadIdx.x + blockDim.x * blockIdx.x; + int ltx = threadIdx.x; + int ch = tx / nsamples; + int nchannels_per_block = blockDim.x / nsamples; + + // threads that return here should not affect the __syncthreads() below since they have exitted the kernel + if (ch >= nchannels) + return; + + int sample = tx % nsamples; + + // shared mem inits + extern __shared__ char sdata[]; + char* s_sum0 = sdata; + SampleVector::Scalar* s_sum1 = reinterpret_cast(s_sum0 + nchannels_per_block * nsamples); + SampleVector::Scalar* s_sumA = s_sum1 + nchannels_per_block * nsamples; + SampleVector::Scalar* s_sumAA = s_sumA + nchannels_per_block * nsamples; + + // TODO make sure no div by 0 + const auto inv_error = + useless_sample_values[tx] ? 0.0 : 1.0 / (sample_value_errors[tx] * sample_value_errors[tx]); + const auto sample_value = sample_values[tx]; + s_sum0[ltx] = useless_sample_values[tx] ? 0 : 1; + s_sum1[ltx] = inv_error; + s_sumA[ltx] = sample_value * inv_error; + s_sumAA[ltx] = sample_value * sample_value * inv_error; + __syncthreads(); + + // 5 threads for [0, 4] samples + if (sample < 5) { + s_sum0[ltx] += s_sum0[ltx + 5]; + s_sum1[ltx] += s_sum1[ltx + 5]; + s_sumA[ltx] += s_sumA[ltx + 5]; + s_sumAA[ltx] += s_sumAA[ltx + 5]; + } + __syncthreads(); + + if (sample < 2) { + // note double counting of sample 3 + s_sum0[ltx] += s_sum0[ltx + 2] + s_sum0[ltx + 3]; + s_sum1[ltx] += s_sum1[ltx + 2] + s_sum1[ltx + 3]; + s_sumA[ltx] += s_sumA[ltx + 2] + s_sumA[ltx + 3]; + s_sumAA[ltx] += s_sumAA[ltx + 2] + s_sumAA[ltx + 3]; + } + __syncthreads(); + + if (sample == 0) { + // note, subtract to remove the double counting of sample == 3 + const auto sum0 = s_sum0[ltx] + s_sum0[ltx + 1] - s_sum0[ltx + 3]; + const auto sum1 = s_sum1[ltx] + s_sum1[ltx + 1] - s_sum1[ltx + 3]; + const auto sumA = s_sumA[ltx] + s_sumA[ltx + 1] - s_sumA[ltx + 3]; + const auto sumAA = s_sumAA[ltx] + s_sumAA[ltx + 1] - s_sumAA[ltx + 3]; + const auto chi2 = sum0 > 0 ? (sumAA - sumA * sumA / sum1) / sum0 : static_cast(0); + chi2s[ch] = chi2; + sum0s[ch] = sum0; + sumAAs[ch] = sumAA; + +#ifdef DEBUG_TC_NULLHYPOT + if (ch == 0) { + printf("chi2 = %f sum0 = %d sumAA = %f\n", chi2, static_cast(sum0), sumAA); + } +#endif + } + } + + constexpr float fast_expf(float x) { return unsafe_expf<6>(x); } + constexpr float fast_logf(float x) { return unsafe_logf<7>(x); } + + //#define DEBUG_TC_MAKERATIO + // + // launch ctx parameters are + // 45 threads per channel, X channels per block, Y blocks + // 45 comes from: 10 samples for i <- 0 to 9 and for j <- i+1 to 9 + // TODO: it might be much beter to use 32 threads per channel instead of 45 + // to simplify the synchronization + // + __global__ void kernel_time_compute_makeratio(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + bool const* useless_sample_values, + char const* pedestal_nums, + ConfigurationParameters::type const* amplitudeFitParametersEB, + ConfigurationParameters::type const* amplitudeFitParametersEE, + ConfigurationParameters::type const* timeFitParametersEB, + ConfigurationParameters::type const* timeFitParametersEE, + SampleVector::Scalar const* sumAAsNullHypot, + SampleVector::Scalar const* sum0sNullHypot, + SampleVector::Scalar* tMaxAlphaBetas, + SampleVector::Scalar* tMaxErrorAlphaBetas, + SampleVector::Scalar* g_accTimeMax, + SampleVector::Scalar* g_accTimeWgt, + TimeComputationState* g_state, + unsigned const int timeFitParameters_sizeEB, + unsigned const int timeFitParameters_sizeEE, + ConfigurationParameters::type const timeFitLimits_firstEB, + ConfigurationParameters::type const timeFitLimits_firstEE, + ConfigurationParameters::type const timeFitLimits_secondEB, + ConfigurationParameters::type const timeFitLimits_secondEE, + const int nchannels, + uint32_t const offsetForInputs) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr int nthreads_per_channel = 45; // n=10, n(n-1)/2 + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int gtx = threadIdx.x + blockDim.x * blockIdx.x; + const int ch = gtx / nthreads_per_channel; + const int ltx = threadIdx.x % nthreads_per_channel; + const int ch_start = ch * nsamples; + const auto* dids = ch >= offsetForInputs ? dids_ee : dids_eb; + const int inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch; + + // remove inactive threads + // threads that return here should not affect the __syncthreads() below since they have exitted the kernel + if (ch >= nchannels) + return; + + const auto did = DetId{dids[inputCh]}; + const auto isBarrel = did.subdetId() == EcalBarrel; + const auto* amplitudeFitParameters = isBarrel ? amplitudeFitParametersEB : amplitudeFitParametersEE; + const auto* timeFitParameters = isBarrel ? timeFitParametersEB : timeFitParametersEE; + const auto timeFitParameters_size = isBarrel ? timeFitParameters_sizeEB : timeFitParameters_sizeEE; + const auto timeFitLimits_first = isBarrel ? timeFitLimits_firstEB : timeFitLimits_firstEE; + const auto timeFitLimits_second = isBarrel ? timeFitLimits_secondEB : timeFitLimits_secondEE; + + extern __shared__ char smem[]; + ScalarType* shr_chi2s = reinterpret_cast(smem); + ScalarType* shr_time_wgt = shr_chi2s + blockDim.x; + ScalarType* shr_time_max = shr_time_wgt + blockDim.x; + ScalarType* shrTimeMax = shr_time_max + blockDim.x; + ScalarType* shrTimeWgt = shrTimeMax + blockDim.x; + + // map tx -> (sample_i, sample_j) + int sample_i, sample_j = 0; + if (ltx >= 0 && ltx <= 8) { + sample_i = 0; + sample_j = 1 + ltx; + } else if (ltx <= 16) { + sample_i = 1; + sample_j = 2 + ltx - 9; + } else if (ltx <= 23) { + sample_i = 2; + sample_j = 3 + ltx - 17; + } else if (ltx <= 29) { + sample_i = 3; + sample_j = 4 + ltx - 24; + } else if (ltx <= 34) { + sample_i = 4; + sample_j = 5 + ltx - 30; + } else if (ltx <= 38) { + sample_i = 5; + sample_j = 6 + ltx - 35; + } else if (ltx <= 41) { + sample_i = 6; + sample_j = 7 + ltx - 39; + } else if (ltx <= 43) { + sample_i = 7; + sample_j = 8 + ltx - 42; + } else if (ltx <= 44) { + sample_i = 8; + sample_j = 9; + } else + assert(false); + + const auto tx_i = ch_start + sample_i; + const auto tx_j = ch_start + sample_j; + + // + // note, given the way we partition the block, with 45 threads per channel + // we will end up with inactive threads which need to be dragged along + // through the synching point + // + bool const condForUselessSamples = useless_sample_values[tx_i] || useless_sample_values[tx_j] || + sample_values[tx_i] <= 1 || sample_values[tx_j] <= 1; + + // + // see cpu implementation for explanation + // + ScalarType chi2 = std::numeric_limits::max(); + ScalarType tmax = 0; + ScalarType tmaxerr = 0; + shrTimeMax[threadIdx.x] = 0; + shrTimeWgt[threadIdx.x] = 0; + bool internalCondForSkipping1 = true; + bool internalCondForSkipping2 = true; + if (!condForUselessSamples) { + const auto rtmp = sample_values[tx_i] / sample_values[tx_j]; + const auto invampl_i = 1.0 / sample_values[tx_i]; + const auto relErr2_i = sample_value_errors[tx_i] * sample_value_errors[tx_i] * invampl_i * invampl_i; + const auto invampl_j = 1.0 / sample_values[tx_j]; + const auto relErr2_j = sample_value_errors[tx_j] * sample_value_errors[tx_j] * invampl_j * invampl_j; + const auto err1 = rtmp * rtmp * (relErr2_i + relErr2_j); + auto err2 = sample_value_errors[tx_j] * (sample_values[tx_i] - sample_values[tx_j]) * (invampl_j * invampl_j); + // TODO non-divergent branch for a block if each block has 1 channel + // otherwise non-divergent for groups of 45 threads + // at this point, pedestal_nums[ch] can be either 0, 1 or 2 + if (pedestal_nums[ch] == 2) + err2 *= err2 * 0.5; + const auto err3 = (0.289 * 0.289) * (invampl_j * invampl_j); + const auto total_error = std::sqrt(err1 + err2 + err3); + + const auto alpha = amplitudeFitParameters[0]; + const auto beta = amplitudeFitParameters[1]; + const auto alphabeta = alpha * beta; + const auto invalphabeta = 1.0 / alphabeta; + + // variables instead of a struct + const auto ratio_index = sample_i; + const auto ratio_step = sample_j - sample_i; + const auto ratio_value = rtmp; + const auto ratio_error = total_error; + + const auto rlim_i_j = fast_expf(static_cast(sample_j - sample_i) / beta) - 0.001; + internalCondForSkipping1 = !(total_error < 1.0 && rtmp > 0.001 && rtmp < rlim_i_j); + if (!internalCondForSkipping1) { + // + // precompute. + // in cpu version this was done conditionally + // however easier to do it here (precompute) and then just filter out + // if not needed + // + const auto l_timeFitLimits_first = timeFitLimits_first; + const auto l_timeFitLimits_second = timeFitLimits_second; + if (ratio_step == 1 && ratio_value >= l_timeFitLimits_first && ratio_value <= l_timeFitLimits_second) { + const auto time_max_i = static_cast(ratio_index); + auto u = timeFitParameters[timeFitParameters_size - 1]; + CMS_UNROLL_LOOP + for (int k = timeFitParameters_size - 2; k >= 0; k--) + u = u * ratio_value + timeFitParameters[k]; + + auto du = (timeFitParameters_size - 1) * (timeFitParameters[timeFitParameters_size - 1]); + for (int k = timeFitParameters_size - 2; k >= 1; k--) + du = du * ratio_value + k * timeFitParameters[k]; + + const auto error2 = ratio_error * ratio_error * du * du; + const auto time_max = error2 > 0 ? (time_max_i - u) / error2 : static_cast(0); + const auto time_wgt = error2 > 0 ? 1.0 / error2 : static_cast(0); + + // store into shared mem + // note, this name is essentially identical to the one used + // below. + shrTimeMax[threadIdx.x] = error2 > 0 ? time_max : 0; + shrTimeWgt[threadIdx.x] = error2 > 0 ? time_wgt : 0; + } else { + shrTimeMax[threadIdx.x] = 0; + shrTimeWgt[threadIdx.x] = 0; + } + + // continue with ratios + const auto stepOverBeta = static_cast(ratio_step) / beta; + const auto offset = static_cast(ratio_index) + alphabeta; + const auto rmin = std::max(ratio_value - ratio_error, 0.001); + const auto rmax = std::min(ratio_value + ratio_error, + fast_expf(static_cast(ratio_step) / beta) - 0.001); + const auto time1 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmin)) / alpha) - 1.0); + const auto time2 = offset - ratio_step / (fast_expf((stepOverBeta - fast_logf(rmax)) / alpha) - 1.0); + + // set these guys + tmax = 0.5 * (time1 + time2); + tmaxerr = 0.5 * std::sqrt((time1 - time2) * (time1 - time2)); +#ifdef DEBUG_TC_MAKERATIO + if (ch == 1 || ch == 0) + printf("ch = %d ltx = %d tmax = %f tmaxerr = %f time1 = %f time2 = %f offset = %f rmin = %f rmax = %f\n", + ch, + ltx, + tmax, + tmaxerr, + time1, + time2, + offset, + rmin, + rmax); +#endif + + SampleVector::Scalar sumAf = 0; + SampleVector::Scalar sumff = 0; + const int itmin = std::max(-1, static_cast(std::floor(tmax - alphabeta))); + auto loffset = (static_cast(itmin) - tmax) * invalphabeta; + // TODO: data dependence + for (int it = itmin + 1; it < nsamples; it++) { + loffset += invalphabeta; + if (useless_sample_values[ch_start + it]) + continue; + const auto inverr2 = 1.0 / (sample_value_errors[ch_start + it] * sample_value_errors[ch_start + it]); + const auto term1 = 1.0 + loffset; + const auto f = (term1 > 1e-6) ? fast_expf(alpha * (fast_logf(term1) - loffset)) : 0; + sumAf += sample_values[ch_start + it] * (f * inverr2); + sumff += f * (f * inverr2); + } + + const auto sumAA = sumAAsNullHypot[ch]; + const auto sum0 = sum0sNullHypot[ch]; + chi2 = sumAA; + // TODO: sum0 can not be 0 below, need to introduce the check upfront + if (sumff > 0) { + chi2 = sumAA - sumAf * (sumAf / sumff); + } + chi2 /= sum0; + +#ifdef DEBUG_TC_MAKERATIO + if (ch == 1 || ch == 0) + printf("ch = %d ltx = %d sumAf = %f sumff = %f sumAA = %f sum0 = %d tmax = %f tmaxerr = %f chi2 = %f\n", + ch, + ltx, + sumAf, + sumff, + sumAA, + static_cast(sum0), + tmax, + tmaxerr, + chi2); +#endif + + if (chi2 > 0 && tmax > 0 && tmaxerr > 0) + internalCondForSkipping2 = false; + else + chi2 = std::numeric_limits::max(); + } + } + + // store into smem + shr_chi2s[threadIdx.x] = chi2; + __syncthreads(); + + // find min chi2 - quite crude for now + // TODO validate/check + char iter = nthreads_per_channel / 2 + nthreads_per_channel % 2; + bool oddElements = nthreads_per_channel % 2; + CMS_UNROLL_LOOP + while (iter >= 1) { + if (ltx < iter) + // for odd ns, the last guy will just store itself + // exception is for ltx == 0 and iter==1 + shr_chi2s[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0) + ? shr_chi2s[threadIdx.x] + : std::min(shr_chi2s[threadIdx.x], shr_chi2s[threadIdx.x + iter]); + __syncthreads(); + oddElements = iter % 2; + iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2; + } + + // filter out inactive or useless samples threads + if (!condForUselessSamples && !internalCondForSkipping1 && !internalCondForSkipping2) { + // min chi2, now compute weighted average of tmax measurements + // see cpu version for more explanation + const auto chi2min = shr_chi2s[threadIdx.x - ltx]; + const auto chi2Limit = chi2min + 1.0; + const auto inverseSigmaSquared = chi2 < chi2Limit ? 1.0 / (tmaxerr * tmaxerr) : 0.0; + +#ifdef DEBUG_TC_MAKERATIO + if (ch == 1 || ch == 0) + printf("ch = %d ltx = %d chi2min = %f chi2Limit = %f inverseSigmaSquared = %f\n", + ch, + ltx, + chi2min, + chi2Limit, + inverseSigmaSquared); +#endif + + // store into shared mem and run reduction + // TODO: check if cooperative groups would be better + // TODO: check if shuffling intrinsics are better + shr_time_wgt[threadIdx.x] = inverseSigmaSquared; + shr_time_max[threadIdx.x] = tmax * inverseSigmaSquared; + } else { + shr_time_wgt[threadIdx.x] = 0; + shr_time_max[threadIdx.x] = 0; + } + __syncthreads(); + + // reduce to compute time_max and time_wgt + iter = nthreads_per_channel / 2 + nthreads_per_channel % 2; + oddElements = nthreads_per_channel % 2; + CMS_UNROLL_LOOP + while (iter >= 1) { + if (ltx < iter) { + shr_time_wgt[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0) + ? shr_time_wgt[threadIdx.x] + : shr_time_wgt[threadIdx.x] + shr_time_wgt[threadIdx.x + iter]; + shr_time_max[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0) + ? shr_time_max[threadIdx.x] + : shr_time_max[threadIdx.x] + shr_time_max[threadIdx.x + iter]; + shrTimeMax[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0) + ? shrTimeMax[threadIdx.x] + : shrTimeMax[threadIdx.x] + shrTimeMax[threadIdx.x + iter]; + shrTimeWgt[threadIdx.x] = oddElements && (ltx == iter - 1 && ltx > 0) + ? shrTimeWgt[threadIdx.x] + : shrTimeWgt[threadIdx.x] + shrTimeWgt[threadIdx.x + iter]; + } + + __syncthreads(); + oddElements = iter % 2; + iter = iter == 1 ? iter / 2 : iter / 2 + iter % 2; + } + + // load from shared memory the 0th guy (will contain accumulated values) + // compute + // store into global mem + if (ltx == 0) { + const auto tmp_time_max = shr_time_max[threadIdx.x]; + const auto tmp_time_wgt = shr_time_wgt[threadIdx.x]; + + // we are done if there number of time ratios is 0 + if (tmp_time_wgt == 0 && tmp_time_max == 0) { + g_state[ch] = TimeComputationState::Finished; + return; + } + + // no div by 0 + const auto tMaxAlphaBeta = tmp_time_max / tmp_time_wgt; + const auto tMaxErrorAlphaBeta = 1.0 / std::sqrt(tmp_time_wgt); + + tMaxAlphaBetas[ch] = tMaxAlphaBeta; + tMaxErrorAlphaBetas[ch] = tMaxErrorAlphaBeta; + g_accTimeMax[ch] = shrTimeMax[threadIdx.x]; + g_accTimeWgt[ch] = shrTimeWgt[threadIdx.x]; + g_state[ch] = TimeComputationState::NotFinished; + +#ifdef DEBUG_TC_MAKERATIO + printf("ch = %d time_max = %f time_wgt = %f\n", ch, tmp_time_max, tmp_time_wgt); + printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f timeMax = %f timeWgt = %f\n", + ch, + tMaxAlphaBeta, + tMaxErrorAlphaBeta, + shrTimeMax[threadIdx.x], + shrTimeWgt[threadIdx.x]); +#endif + } + } + + /// launch ctx parameters are + /// 10 threads per channel, N channels per block, Y blocks + /// TODO: do we need to keep the state around or can be removed?! + //#define DEBUG_FINDAMPLCHI2_AND_FINISH + __global__ void kernel_time_compute_findamplchi2_and_finish( + SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + bool const* useless_samples, + SampleVector::Scalar const* g_tMaxAlphaBeta, + SampleVector::Scalar const* g_tMaxErrorAlphaBeta, + SampleVector::Scalar const* g_accTimeMax, + SampleVector::Scalar const* g_accTimeWgt, + ConfigurationParameters::type const* amplitudeFitParametersEB, + ConfigurationParameters::type const* amplitudeFitParametersEE, + SampleVector::Scalar const* sumAAsNullHypot, + SampleVector::Scalar const* sum0sNullHypot, + SampleVector::Scalar const* chi2sNullHypot, + TimeComputationState* g_state, + SampleVector::Scalar* g_ampMaxAlphaBeta, + SampleVector::Scalar* g_ampMaxError, + SampleVector::Scalar* g_timeMax, + SampleVector::Scalar* g_timeError, + const int nchannels, + uint32_t const offsetForInputs) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int gtx = threadIdx.x + blockIdx.x * blockDim.x; + const int ch = gtx / nsamples; + const int sample = threadIdx.x % nsamples; + const auto* dids = ch >= offsetForInputs ? dids_ee : dids_eb; + const int inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch; + + // configure shared mem + // per block, we need #threads per block * 2 * sizeof(ScalarType) + // we run with N channels per block + extern __shared__ char smem[]; + ScalarType* shr_sumAf = reinterpret_cast(smem); + ScalarType* shr_sumff = shr_sumAf + blockDim.x; + + if (ch >= nchannels) + return; + + auto state = g_state[ch]; + const auto did = DetId{dids[inputCh]}; + const auto* amplitudeFitParameters = + did.subdetId() == EcalBarrel ? amplitudeFitParametersEB : amplitudeFitParametersEE; + + // TODO is that better than storing into global and launching another kernel + // for the first 10 threads + if (state == TimeComputationState::NotFinished) { + const auto alpha = amplitudeFitParameters[0]; + const auto beta = amplitudeFitParameters[1]; + const auto alphabeta = alpha * beta; + const auto invalphabeta = 1.0 / alphabeta; + const auto tMaxAlphaBeta = g_tMaxAlphaBeta[ch]; + const auto sample_value = sample_values[gtx]; + const auto sample_value_error = sample_value_errors[gtx]; + const auto inverr2 = + useless_samples[gtx] ? static_cast(0) : 1.0 / (sample_value_error * sample_value_error); + const auto offset = (static_cast(sample) - tMaxAlphaBeta) * invalphabeta; + const auto term1 = 1.0 + offset; + const auto f = term1 > 1e-6 ? fast_expf(alpha * (fast_logf(term1) - offset)) : static_cast(0.0); + const auto sumAf = sample_value * (f * inverr2); + const auto sumff = f * (f * inverr2); + + // store into shared mem + shr_sumAf[threadIdx.x] = sumAf; + shr_sumff[threadIdx.x] = sumff; + } else { + shr_sumAf[threadIdx.x] = 0; + shr_sumff[threadIdx.x] = 0; + } + __syncthreads(); + + // reduce + // unroll completely here (but hardcoded) + if (sample < 5) { + shr_sumAf[threadIdx.x] += shr_sumAf[threadIdx.x + 5]; + shr_sumff[threadIdx.x] += shr_sumff[threadIdx.x + 5]; + } + __syncthreads(); + + if (sample < 2) { + // will need to subtract for ltx = 3, we double count here + shr_sumAf[threadIdx.x] += shr_sumAf[threadIdx.x + 2] + shr_sumAf[threadIdx.x + 3]; + shr_sumff[threadIdx.x] += shr_sumff[threadIdx.x + 2] + shr_sumff[threadIdx.x + 3]; + } + __syncthreads(); + + if (sample == 0) { + // exit if the state is done + // note, we do not exit before all __synchtreads are finished + if (state == TimeComputationState::Finished) { + g_timeMax[ch] = 5; + g_timeError[ch] = -999; + return; + } + + // subtract to avoid double counting + const auto sumff = shr_sumff[threadIdx.x] + shr_sumff[threadIdx.x + 1] - shr_sumff[threadIdx.x + 3]; + const auto sumAf = shr_sumAf[threadIdx.x] + shr_sumAf[threadIdx.x + 1] - shr_sumAf[threadIdx.x + 3]; + + const auto ampMaxAlphaBeta = sumff > 0 ? sumAf / sumff : 0; + const auto sumAA = sumAAsNullHypot[ch]; + const auto sum0 = sum0sNullHypot[ch]; + const auto nullChi2 = chi2sNullHypot[ch]; + if (sumff > 0) { + const auto chi2AlphaBeta = (sumAA - sumAf * sumAf / sumff) / sum0; + if (chi2AlphaBeta > nullChi2) { + // null hypothesis is better + state = TimeComputationState::Finished; +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d chi2AlphaBeta = %f nullChi2 = %f sumAA = %f sumAf = %f sumff = %f sum0 = %f\n", + ch, + chi2AlphaBeta, + nullChi2, + sumAA, + sumAf, + sumff, + sum0); +#endif + } + + // store to global + g_ampMaxAlphaBeta[ch] = ampMaxAlphaBeta; + } else { +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d sum0 = %f sumAA = %f sumff = %f sumAf = %f\n", ch, sum0, sumAA, sumff, sumAf); +#endif + state = TimeComputationState::Finished; + } + + // store the state to global and finish calcs + g_state[ch] = state; + if (state == TimeComputationState::Finished) { + // store default values into global + g_timeMax[ch] = 5; + g_timeError[ch] = -999; +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d finished state\n", ch); +#endif + return; + } + + const auto ampMaxError = g_ampMaxError[ch]; + const auto test_ratio = ampMaxAlphaBeta / ampMaxError; + const auto accTimeMax = g_accTimeMax[ch]; + const auto accTimeWgt = g_accTimeWgt[ch]; + const auto tMaxAlphaBeta = g_tMaxAlphaBeta[ch]; + const auto tMaxErrorAlphaBeta = g_tMaxErrorAlphaBeta[ch]; + // branch to separate large vs small pulses + // see cpu version for more info + if (test_ratio > 5.0 && accTimeWgt > 0) { + const auto tMaxRatio = accTimeWgt > 0 ? accTimeMax / accTimeWgt : static_cast(0); + const auto tMaxErrorRatio = accTimeWgt > 0 ? 1.0 / std::sqrt(accTimeWgt) : static_cast(0); + + if (test_ratio > 10.0) { + g_timeMax[ch] = tMaxRatio; + g_timeError[ch] = tMaxErrorRatio; + +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d tMaxRatio = %f tMaxErrorRatio = %f\n", ch, tMaxRatio, tMaxErrorRatio); +#endif + } else { + const auto timeMax = (tMaxAlphaBeta * (10.0 - ampMaxAlphaBeta / ampMaxError) + + tMaxRatio * (ampMaxAlphaBeta / ampMaxError - 5.0)) / + 5.0; + const auto timeError = (tMaxErrorAlphaBeta * (10.0 - ampMaxAlphaBeta / ampMaxError) + + tMaxErrorRatio * (ampMaxAlphaBeta / ampMaxError - 5.0)) / + 5.0; + state = TimeComputationState::Finished; + g_state[ch] = state; + g_timeMax[ch] = timeMax; + g_timeError[ch] = timeError; + +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d timeMax = %f timeError = %f\n", ch, timeMax, timeError); +#endif + } + } else { + state = TimeComputationState::Finished; + g_state[ch] = state; + g_timeMax[ch] = tMaxAlphaBeta; + g_timeError[ch] = tMaxErrorAlphaBeta; + +#ifdef DEBUG_FINDAMPLCHI2_AND_FINISH + printf("ch = %d tMaxAlphaBeta = %f tMaxErrorAlphaBeta = %f\n", ch, tMaxAlphaBeta, tMaxErrorAlphaBeta); +#endif + } + } + } + + __global__ void kernel_time_compute_fixMGPAslew(uint16_t const* digis_eb, + uint16_t const* digis_ee, + SampleVector::Scalar* sample_values, + SampleVector::Scalar* sample_value_errors, + bool* useless_sample_values, + unsigned const int sample_mask, + const int nchannels, + uint32_t const offsetForInputs) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int gtx = threadIdx.x + blockIdx.x * blockDim.x; + const int ch = gtx / nsamples; + const int sample = threadIdx.x % nsamples; + const int inputGtx = ch >= offsetForInputs ? gtx - offsetForInputs * nsamples : gtx; + const auto* digis = ch >= offsetForInputs ? digis_ee : digis_eb; + + // remove thread for sample 0, oversubscribing is easier than .... + if (ch >= nchannels || sample == 0) + return; + + if (!use_sample(sample_mask, sample)) + return; + + const auto gainIdPrev = ecal::mgpa::gainId(digis[inputGtx - 1]); + const auto gainIdNext = ecal::mgpa::gainId(digis[inputGtx]); + if (gainIdPrev >= 1 && gainIdPrev <= 3 && gainIdNext >= 1 && gainIdNext <= 3 && gainIdPrev < gainIdNext) { + sample_values[gtx - 1] = 0; + sample_value_errors[gtx - 1] = 1e+9; + useless_sample_values[gtx - 1] = true; + } + } + + __global__ void kernel_time_compute_ampl(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids, + bool const* useless_samples, + SampleVector::Scalar const* g_timeMax, + SampleVector::Scalar const* amplitudeFitParametersEB, + SampleVector::Scalar const* amplitudeFitParametersEE, + SampleVector::Scalar* g_amplitudeMax, + const int nchannels) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr ScalarType corr4 = 1.; + constexpr ScalarType corr6 = 1.; + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int gtx = threadIdx.x + blockIdx.x * blockDim.x; + const int ch = gtx / nsamples; + const int sample = threadIdx.x % nsamples; + + if (ch >= nchannels) + return; + + const auto did = DetId{dids[ch]}; + const auto* amplitudeFitParameters = + did.subdetId() == EcalBarrel ? amplitudeFitParametersEB : amplitudeFitParametersEE; + + // configure shared mem + extern __shared__ char smem[]; + ScalarType* shr_sum1 = reinterpret_cast(smem); + auto* shr_sumA = shr_sum1 + blockDim.x; + auto* shr_sumF = shr_sumA + blockDim.x; + auto* shr_sumAF = shr_sumF + blockDim.x; + auto* shr_sumFF = shr_sumAF + blockDim.x; + + const auto alpha = amplitudeFitParameters[0]; + const auto beta = amplitudeFitParameters[1]; + const auto timeMax = g_timeMax[ch]; + const auto pedestalLimit = timeMax - (alpha * beta) - 1.0; + const auto sample_value = sample_values[gtx]; + const auto sample_value_error = sample_value_errors[gtx]; + const auto inverr2 = + sample_value_error > 0 ? 1. / (sample_value_error * sample_value_error) : static_cast(0); + const auto termOne = 1 + (sample - timeMax) / (alpha * beta); + const auto f = termOne > 1.e-5 ? fast_expf(alpha * fast_logf(termOne) - (sample - timeMax) / beta) + : static_cast(0.); + + bool const cond = ((sample < pedestalLimit) || (f > 0.6 * corr6 && sample <= timeMax) || + (f > 0.4 * corr4 && sample >= timeMax)) && + !useless_samples[gtx]; + + // store into shared mem + shr_sum1[threadIdx.x] = cond ? inverr2 : static_cast(0); + shr_sumA[threadIdx.x] = cond ? sample_value * inverr2 : static_cast(0); + shr_sumF[threadIdx.x] = cond ? f * inverr2 : static_cast(0); + shr_sumAF[threadIdx.x] = cond ? (f * inverr2) * sample_value : static_cast(0); + shr_sumFF[threadIdx.x] = cond ? f * (f * inverr2) : static_cast(0); + + // reduction + if (sample <= 4) { + shr_sum1[threadIdx.x] += shr_sum1[threadIdx.x + 5]; + shr_sumA[threadIdx.x] += shr_sumA[threadIdx.x + 5]; + shr_sumF[threadIdx.x] += shr_sumF[threadIdx.x + 5]; + shr_sumAF[threadIdx.x] += shr_sumAF[threadIdx.x + 5]; + shr_sumFF[threadIdx.x] += shr_sumFF[threadIdx.x + 5]; + } + __syncthreads(); + + if (sample < 2) { + // note: we double count sample 3 + shr_sum1[threadIdx.x] += shr_sum1[threadIdx.x + 2] + shr_sum1[threadIdx.x + 3]; + shr_sumA[threadIdx.x] += shr_sumA[threadIdx.x + 2] + shr_sumA[threadIdx.x + 3]; + shr_sumF[threadIdx.x] += shr_sumF[threadIdx.x + 2] + shr_sumF[threadIdx.x + 3]; + shr_sumAF[threadIdx.x] += shr_sumAF[threadIdx.x + 2] + shr_sumAF[threadIdx.x + 3]; + shr_sumFF[threadIdx.x] += shr_sumFF[threadIdx.x + 2] + shr_sumFF[threadIdx.x + 3]; + } + __syncthreads(); + + if (sample == 0) { + const auto sum1 = shr_sum1[threadIdx.x] + shr_sum1[threadIdx.x + 1] - shr_sum1[threadIdx.x + 3]; + const auto sumA = shr_sumA[threadIdx.x] + shr_sumA[threadIdx.x + 1] - shr_sumA[threadIdx.x + 3]; + const auto sumF = shr_sumF[threadIdx.x] + shr_sumF[threadIdx.x + 1] - shr_sumF[threadIdx.x + 3]; + const auto sumAF = shr_sumAF[threadIdx.x] + shr_sumAF[threadIdx.x + 1] - shr_sumAF[threadIdx.x + 3]; + const auto sumFF = shr_sumFF[threadIdx.x] + shr_sumFF[threadIdx.x + 1] - shr_sumFF[threadIdx.x + 3]; + + const auto denom = sumFF * sum1 - sumF * sumF; + const auto condForDenom = sum1 > 0 && std::abs(denom) > 1.e-20; + const auto amplitudeMax = condForDenom ? (sumAF * sum1 - sumA * sumF) / denom : static_cast(0.); + + // store into global mem + g_amplitudeMax[ch] = amplitudeMax; + } + } + + //#define ECAL_RECO_CUDA_TC_INIT_DEBUG + __global__ void kernel_time_computation_init(uint16_t const* digis_eb, + uint32_t const* dids_eb, + uint16_t const* digis_ee, + uint32_t const* dids_ee, + float const* rms_x12, + float const* rms_x6, + float const* rms_x1, + float const* mean_x12, + float const* mean_x6, + float const* mean_x1, + float const* gain12Over6, + float const* gain6Over1, + SampleVector::Scalar* sample_values, + SampleVector::Scalar* sample_value_errors, + SampleVector::Scalar* ampMaxError, + bool* useless_sample_values, + char* pedestal_nums, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + unsigned const int sample_maskEB, + unsigned const int sample_maskEE, + int nchannels) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int tx = threadIdx.x + blockDim.x * blockIdx.x; + const int ch = tx / nsamples; + const int inputTx = ch >= offsetForInputs ? tx - offsetForInputs * nsamples : tx; + const int inputCh = ch >= offsetForInputs ? ch - offsetForInputs : ch; + const auto* digis = ch >= offsetForInputs ? digis_ee : digis_eb; + const auto* dids = ch >= offsetForInputs ? dids_ee : dids_eb; + + // threads that return here should not affect the __syncthreads() below since they have exitted the kernel + if (ch >= nchannels) + return; + + // indices/inits + const int sample = tx % nsamples; + const int input_ch_start = inputCh * nsamples; + SampleVector::Scalar pedestal = 0.; + int num = 0; + + // configure shared mem + extern __shared__ char smem[]; + ScalarType* shrSampleValues = reinterpret_cast(smem); + ScalarType* shrSampleValueErrors = shrSampleValues + blockDim.x; + + // 0 and 1 sample values + const auto adc0 = ecal::mgpa::adc(digis[input_ch_start]); + const auto gainId0 = ecal::mgpa::gainId(digis[input_ch_start]); + const auto adc1 = ecal::mgpa::adc(digis[input_ch_start + 1]); + const auto gainId1 = ecal::mgpa::gainId(digis[input_ch_start + 1]); + const auto did = DetId{dids[inputCh]}; + const auto isBarrel = did.subdetId() == EcalBarrel; + const auto sample_mask = did.subdetId() == EcalBarrel ? sample_maskEB : sample_maskEE; + const auto hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId()); + + // set pedestal + // TODO this branch is non-divergent for a group of 10 threads + if (gainId0 == 1 && use_sample(sample_mask, 0)) { + pedestal = static_cast(adc0); + num = 1; + + const auto diff = adc1 - adc0; + if (gainId1 == 1 && use_sample(sample_mask, 1) && std::abs(diff) < 3 * rms_x12[hashedId]) { + pedestal = (pedestal + static_cast(adc1)) / 2.0; + num = 2; + } + } else { + pedestal = mean_x12[ch]; + } + + // ped subtracted and gain-renormalized samples. + const auto gainId = ecal::mgpa::gainId(digis[inputTx]); + const auto adc = ecal::mgpa::adc(digis[inputTx]); + + bool bad = false; + SampleVector::Scalar sample_value, sample_value_error; + // TODO divergent branch + // TODO: piece below is general both for amplitudes and timing + // potentially there is a way to reduce the amount of code... + if (!use_sample(sample_mask, sample)) { + bad = true; + sample_value = 0; + sample_value_error = 0; + } else if (gainId == 1) { + sample_value = static_cast(adc) - pedestal; + sample_value_error = rms_x12[hashedId]; + } else if (gainId == 2) { + sample_value = (static_cast(adc) - mean_x6[hashedId]) * gain12Over6[hashedId]; + sample_value_error = rms_x6[hashedId] * gain12Over6[hashedId]; + } else if (gainId == 3) { + sample_value = + (static_cast(adc) - mean_x1[hashedId]) * gain6Over1[hashedId] * gain12Over6[hashedId]; + sample_value_error = rms_x1[hashedId] * gain6Over1[hashedId] * gain12Over6[hashedId]; + } else { + sample_value = 0; + sample_value_error = 0; + bad = true; + } + + // TODO: make sure we save things correctly when sample is useless + const auto useless_sample = (sample_value_error <= 0) | bad; + useless_sample_values[tx] = useless_sample; + sample_values[tx] = sample_value; + sample_value_errors[tx] = useless_sample ? 1e+9 : sample_value_error; + + // DEBUG +#ifdef ECAL_RECO_CUDA_TC_INIT_DEBUG + if (ch == 0) { + printf("sample = %d sample_value = %f sample_value_error = %f useless = %c\n", + sample, + sample_value, + sample_value_error, + useless_sample ? '1' : '0'); + } +#endif + + // store into the shared mem + shrSampleValues[threadIdx.x] = sample_value_error > 0 ? sample_value : std::numeric_limits::min(); + shrSampleValueErrors[threadIdx.x] = sample_value_error; + __syncthreads(); + + // perform the reduction with min + if (sample < 5) { + // note, if equal -> we keep the value with lower sample as for cpu + shrSampleValueErrors[threadIdx.x] = shrSampleValues[threadIdx.x] < shrSampleValues[threadIdx.x + 5] + ? shrSampleValueErrors[threadIdx.x + 5] + : shrSampleValueErrors[threadIdx.x]; + shrSampleValues[threadIdx.x] = std::max(shrSampleValues[threadIdx.x], shrSampleValues[threadIdx.x + 5]); + } + __syncthreads(); + + // a bit of an overkill, but easier than to compare across 3 values + if (sample < 3) { + shrSampleValueErrors[threadIdx.x] = shrSampleValues[threadIdx.x] < shrSampleValues[threadIdx.x + 3] + ? shrSampleValueErrors[threadIdx.x + 3] + : shrSampleValueErrors[threadIdx.x]; + shrSampleValues[threadIdx.x] = std::max(shrSampleValues[threadIdx.x], shrSampleValues[threadIdx.x + 3]); + } + __syncthreads(); + + if (sample < 2) { + shrSampleValueErrors[threadIdx.x] = shrSampleValues[threadIdx.x] < shrSampleValues[threadIdx.x + 2] + ? shrSampleValueErrors[threadIdx.x + 2] + : shrSampleValueErrors[threadIdx.x]; + shrSampleValues[threadIdx.x] = std::max(shrSampleValues[threadIdx.x], shrSampleValues[threadIdx.x + 2]); + } + __syncthreads(); + + if (sample == 0) { + // we only needd the max error + const auto maxSampleValueError = shrSampleValues[threadIdx.x] < shrSampleValues[threadIdx.x + 1] + ? shrSampleValueErrors[threadIdx.x + 1] + : shrSampleValueErrors[threadIdx.x]; + + // # pedestal samples used + pedestal_nums[ch] = num; + // this is used downstream + ampMaxError[ch] = maxSampleValueError; + + // DEBUG +#ifdef ECAL_RECO_CUDA_TC_INIT_DEBUG + if (ch == 0) { + printf("pedestal_nums = %d ampMaxError = %f\n", num, maxSampleValueError); + } +#endif + } + } + + /// + /// launch context parameters: 1 thread per channel + /// + //#define DEBUG_TIME_CORRECTION + __global__ void kernel_time_correction_and_finalize( + // SampleVector::Scalar const* g_amplitude, + ::ecal::reco::StorageScalarType const* g_amplitudeEB, + ::ecal::reco::StorageScalarType const* g_amplitudeEE, + uint16_t const* digis_eb, + uint32_t const* dids_eb, + uint16_t const* digis_ee, + uint32_t const* dids_ee, + float const* amplitudeBinsEB, + float const* amplitudeBinsEE, + float const* shiftBinsEB, + float const* shiftBinsEE, + SampleVector::Scalar const* g_timeMax, + SampleVector::Scalar const* g_timeError, + float const* g_rms_x12, + float const* timeCalibConstant, + float* g_jitterEB, + float* g_jitterEE, + float* g_jitterErrorEB, + float* g_jitterErrorEE, + uint32_t* flagsEB, + uint32_t* flagsEE, + const int amplitudeBinsSizeEB, + const int amplitudeBinsSizeEE, + ConfigurationParameters::type const timeConstantTermEB, + ConfigurationParameters::type const timeConstantTermEE, + float const offsetTimeValueEB, + float const offsetTimeValueEE, + ConfigurationParameters::type const timeNconstEB, + ConfigurationParameters::type const timeNconstEE, + ConfigurationParameters::type const amplitudeThresholdEB, + ConfigurationParameters::type const amplitudeThresholdEE, + ConfigurationParameters::type const outOfTimeThreshG12pEB, + ConfigurationParameters::type const outOfTimeThreshG12pEE, + ConfigurationParameters::type const outOfTimeThreshG12mEB, + ConfigurationParameters::type const outOfTimeThreshG12mEE, + ConfigurationParameters::type const outOfTimeThreshG61pEB, + ConfigurationParameters::type const outOfTimeThreshG61pEE, + ConfigurationParameters::type const outOfTimeThreshG61mEB, + ConfigurationParameters::type const outOfTimeThreshG61mEE, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + const int nchannels) { + using ScalarType = SampleVector::Scalar; + + // constants + constexpr int nsamples = EcalDataFrame::MAXSAMPLES; + + // indices + const int gtx = threadIdx.x + blockIdx.x * blockDim.x; + const int inputGtx = gtx >= offsetForInputs ? gtx - offsetForInputs : gtx; + const auto* dids = gtx >= offsetForInputs ? dids_ee : dids_eb; + const auto& digis = gtx >= offsetForInputs ? digis_ee : digis_eb; + + // filter out outside of range threads + if (gtx >= nchannels) + return; + +// need to ref the right ptrs +#define ARRANGE(var) auto* var = gtx >= offsetForInputs ? var##EE : var##EB + ARRANGE(g_amplitude); + ARRANGE(g_jitter); + ARRANGE(g_jitterError); + ARRANGE(flags); +#undef ARRANGE + + const auto did = DetId{dids[inputGtx]}; + const auto isBarrel = did.subdetId() == EcalBarrel; + const auto hashedId = isBarrel ? ecal::reconstruction::hashedIndexEB(did.rawId()) + : offsetForHashes + ecal::reconstruction::hashedIndexEE(did.rawId()); + const auto* amplitudeBins = isBarrel ? amplitudeBinsEB : amplitudeBinsEE; + const auto* shiftBins = isBarrel ? shiftBinsEB : shiftBinsEE; + const auto amplitudeBinsSize = isBarrel ? amplitudeBinsSizeEB : amplitudeBinsSizeEE; + const auto timeConstantTerm = isBarrel ? timeConstantTermEB : timeConstantTermEE; + const auto timeNconst = isBarrel ? timeNconstEB : timeNconstEE; + const auto offsetTimeValue = isBarrel ? offsetTimeValueEB : offsetTimeValueEE; + const auto amplitudeThreshold = isBarrel ? amplitudeThresholdEB : amplitudeThresholdEE; + const auto outOfTimeThreshG12p = isBarrel ? outOfTimeThreshG12pEB : outOfTimeThreshG12pEE; + const auto outOfTimeThreshG12m = isBarrel ? outOfTimeThreshG12mEB : outOfTimeThreshG12mEE; + const auto outOfTimeThreshG61p = isBarrel ? outOfTimeThreshG61pEB : outOfTimeThreshG61pEE; + const auto outOfTimeThreshG61m = isBarrel ? outOfTimeThreshG61mEB : outOfTimeThreshG61mEE; + + // load some + const auto amplitude = g_amplitude[inputGtx]; + const auto rms_x12 = g_rms_x12[hashedId]; + const auto timeCalibConst = timeCalibConstant[hashedId]; + + int myBin = -1; + for (int bin = 0; bin < amplitudeBinsSize; bin++) { + if (amplitude > amplitudeBins[bin]) + myBin = bin; + else + break; + } + + ScalarType correction = 0; + if (myBin == -1) { + correction = shiftBins[0]; + } else if (myBin == amplitudeBinsSize - 1) { + correction = shiftBins[myBin]; + } else { + correction = shiftBins[myBin + 1] - shiftBins[myBin]; + correction *= (amplitude - amplitudeBins[myBin]) / (amplitudeBins[myBin + 1] - amplitudeBins[myBin]); + correction += shiftBins[myBin]; + } + + // correction * 1./25. + correction = correction * 0.04; + const auto timeMax = g_timeMax[gtx]; + const auto timeError = g_timeError[gtx]; + const auto jitter = timeMax - 5 + correction; + const auto jitterError = + std::sqrt(timeError * timeError + timeConstantTerm * timeConstantTerm * 0.04 * 0.04); // 0.04 = 1./25. + +#ifdef DEBUG_TIME_CORRECTION + printf("ch = %d timeMax = %f timeError = %f jitter = %f correction = %f\n", + gtx, + timeMax, + timeError, + jitter, + correction); +// } +#endif + + // store back to global + g_jitter[inputGtx] = jitter; + g_jitterError[inputGtx] = jitterError; + + // set the flag + // TODO: replace with something more efficient (if required), + // for now just to make it work + if (amplitude > amplitudeThreshold * rms_x12) { + auto threshP = outOfTimeThreshG12p; + auto threshM = outOfTimeThreshG12m; + if (amplitude > 3000.) { + for (int isample = 0; isample < nsamples; isample++) { + int gainid = ecal::mgpa::gainId(digis[nsamples * inputGtx + isample]); + if (gainid != 1) { + threshP = outOfTimeThreshG61p; + threshM = outOfTimeThreshG61m; + break; + } + } + } + + const auto correctedTime = (timeMax - 5) * 25 + timeCalibConst + offsetTimeValue; + const auto nterm = timeNconst * rms_x12 / amplitude; + const auto sigmat = std::sqrt(nterm * nterm + timeConstantTerm * timeConstantTerm); + if (correctedTime > sigmat * threshP || correctedTime < -sigmat * threshM) + flags[inputGtx] |= 0x1 << EcalUncalibratedRecHit::kOutOfTime; + } + } + + } // namespace multifit +} // namespace ecal diff --git a/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.h b/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.h new file mode 100644 index 0000000000000..a9b1c69678abd --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/plugins/TimeComputationKernels.h @@ -0,0 +1,186 @@ +#ifndef RecoLocalCalo_EcalRecProducers_plugins_TimeComputationKernels_h +#define RecoLocalCalo_EcalRecProducers_plugins_TimeComputationKernels_h + +#include +#include + +#include + +#include "DataFormats/Math/interface/approx_exp.h" +#include "DataFormats/Math/interface/approx_log.h" + +#include "Common.h" +#include "DeclsForKernels.h" +#include "EigenMatrixTypes_gpu.h" + +//#define DEBUG + +//#define ECAL_RECO_CUDA_DEBUG + +namespace ecal { + namespace multifit { + + __global__ void kernel_time_compute_nullhypot(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + bool const* useless_sample_values, + SampleVector::Scalar* chi2s, + SampleVector::Scalar* sum0s, + SampleVector::Scalar* sumAAs, + int const nchannels); + // + // launch ctx parameters are + // 45 threads per channel, X channels per block, Y blocks + // 45 comes from: 10 samples for i <- 0 to 9 and for j <- i+1 to 9 + // TODO: it might be much beter to use 32 threads per channel instead of 45 + // to simplify the synchronization + // + __global__ void kernel_time_compute_makeratio(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + bool const* useless_sample_values, + char const* pedestal_nums, + ConfigurationParameters::type const* amplitudeFitParametersEB, + ConfigurationParameters::type const* amplitudeFitParametersEE, + ConfigurationParameters::type const* timeFitParametersEB, + ConfigurationParameters::type const* timeFitParametersEE, + SampleVector::Scalar const* sumAAsNullHypot, + SampleVector::Scalar const* sum0sNullHypot, + SampleVector::Scalar* tMaxAlphaBetas, + SampleVector::Scalar* tMaxErrorAlphaBetas, + SampleVector::Scalar* g_accTimeMax, + SampleVector::Scalar* g_accTimeWgt, + TimeComputationState* g_state, + unsigned int const timeFitParameters_sizeEB, + unsigned int const timeFitParameters_sizeEE, + ConfigurationParameters::type const timeFitLimits_firstEB, + ConfigurationParameters::type const timeFitLimits_firstEE, + ConfigurationParameters::type const timeFitLimits_secondEB, + ConfigurationParameters::type const timeFitLimits_secondEE, + int const nchannels, + uint32_t const offsetForInputs); + + /// launch ctx parameters are + /// 10 threads per channel, N channels per block, Y blocks + /// TODO: do we need to keep the state around or can be removed?! + //#define DEBUG_FINDAMPLCHI2_AND_FINISH + __global__ void kernel_time_compute_findamplchi2_and_finish( + SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids_eb, + uint32_t const* dids_ee, + bool const* useless_samples, + SampleVector::Scalar const* g_tMaxAlphaBeta, + SampleVector::Scalar const* g_tMaxErrorAlphaBeta, + SampleVector::Scalar const* g_accTimeMax, + SampleVector::Scalar const* g_accTimeWgt, + ConfigurationParameters::type const* amplitudeFitParametersEB, + ConfigurationParameters::type const* amplitudeFitParametersEE, + SampleVector::Scalar const* sumAAsNullHypot, + SampleVector::Scalar const* sum0sNullHypot, + SampleVector::Scalar const* chi2sNullHypot, + TimeComputationState* g_state, + SampleVector::Scalar* g_ampMaxAlphaBeta, + SampleVector::Scalar* g_ampMaxError, + SampleVector::Scalar* g_timeMax, + SampleVector::Scalar* g_timeError, + int const nchannels, + uint32_t const offsetForInputs); + + __global__ void kernel_time_compute_fixMGPAslew(uint16_t const* digis_eb, + uint16_t const* digis_ee, + SampleVector::Scalar* sample_values, + SampleVector::Scalar* sample_value_errors, + bool* useless_sample_values, + unsigned int const sample_mask, + int const nchannels, + uint32_t const offsetForInputs); + + __global__ void kernel_time_compute_ampl(SampleVector::Scalar const* sample_values, + SampleVector::Scalar const* sample_value_errors, + uint32_t const* dids_eb, + uint32_t const* dids_ed, + bool const* useless_samples, + SampleVector::Scalar const* g_timeMax, + SampleVector::Scalar const* amplitudeFitParametersEB, + SampleVector::Scalar const* amplitudeFitParametersEE, + SampleVector::Scalar* g_amplitudeMax, + int const nchannels, + uint32_t const offsetForInputs); + + //#define ECAL_RECO_CUDA_TC_INIT_DEBUG + __global__ void kernel_time_computation_init(uint16_t const* digis_eb, + uint32_t const* dids_eb, + uint16_t const* digis_ee, + uint32_t const* dids_ee, + float const* rms_x12, + float const* rms_x6, + float const* rms_x1, + float const* mean_x12, + float const* mean_x6, + float const* mean_x1, + float const* gain12Over6, + float const* gain6Over1, + SampleVector::Scalar* sample_values, + SampleVector::Scalar* sample_value_errors, + SampleVector::Scalar* ampMaxError, + bool* useless_sample_values, + char* pedestal_nums, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + unsigned int const sample_maskEB, + unsigned int const sample_maskEE, + int nchannels); + + /// + /// launch context parameters: 1 thread per channel + /// + //#define DEBUG_TIME_CORRECTION + __global__ void kernel_time_correction_and_finalize( + // SampleVector::Scalar const* g_amplitude, + ::ecal::reco::StorageScalarType const* g_amplitudeEB, + ::ecal::reco::StorageScalarType const* g_amplitudeEE, + uint16_t const* digis_eb, + uint32_t const* dids_eb, + uint16_t const* digis_ee, + uint32_t const* dids_ee, + float const* amplitudeBinsEB, + float const* amplitudeBinsEE, + float const* shiftBinsEB, + float const* shiftBinsEE, + SampleVector::Scalar const* g_timeMax, + SampleVector::Scalar const* g_timeError, + float const* g_rms_x12, + float const* timeCalibConstant, + ::ecal::reco::StorageScalarType* g_jitterEB, + ::ecal::reco::StorageScalarType* g_jitterEE, + ::ecal::reco::StorageScalarType* g_jitterErrorEB, + ::ecal::reco::StorageScalarType* g_jitterErrorEE, + uint32_t* flagsEB, + uint32_t* flagsEE, + int const amplitudeBinsSizeEB, + int const amplitudeBinsSizeEE, + ConfigurationParameters::type const timeConstantTermEB, + ConfigurationParameters::type const timeConstantTermEE, + float const offsetTimeValueEB, + float const offsetTimeValueEE, + ConfigurationParameters::type const timeNconstEB, + ConfigurationParameters::type const timeNconstEE, + ConfigurationParameters::type const amplitudeThresholdEB, + ConfigurationParameters::type const amplitudeThresholdEE, + ConfigurationParameters::type const outOfTimeThreshG12pEB, + ConfigurationParameters::type const outOfTimeThreshG12pEE, + ConfigurationParameters::type const outOfTimeThreshG12mEB, + ConfigurationParameters::type const outOfTimeThreshG12mEE, + ConfigurationParameters::type const outOfTimeThreshG61pEB, + ConfigurationParameters::type const outOfTimeThreshG61pEE, + ConfigurationParameters::type const outOfTimeThreshG61mEB, + ConfigurationParameters::type const outOfTimeThreshG61mEE, + uint32_t const offsetForHashes, + uint32_t const offsetForInputs, + int const nchannels); + + } // namespace multifit +} // namespace ecal + +#endif // RecoLocalCalo_EcalRecProducers_plugins_TimeComputationKernels_h diff --git a/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_cff.py b/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_cff.py index 1eef78d42e940..72a3efaae38ba 100644 --- a/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_cff.py +++ b/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_cff.py @@ -1,6 +1,57 @@ import FWCore.ParameterSet.Config as cms +from Configuration.ProcessModifiers.gpu_cff import gpu # ECAL multifit running on CPU from RecoLocalCalo.EcalRecProducers.ecalMultiFitUncalibRecHit_cfi import ecalMultiFitUncalibRecHit ecalMultiFitUncalibRecHitTask = cms.Task(ecalMultiFitUncalibRecHit) + +# ECAL conditions used by the multifit running on GPU +from RecoLocalCalo.EcalRecProducers.ecalPedestalsGPUESProducer_cfi import ecalPedestalsGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalGainRatiosGPUESProducer_cfi import ecalGainRatiosGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalPulseShapesGPUESProducer_cfi import ecalPulseShapesGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalPulseCovariancesGPUESProducer_cfi import ecalPulseCovariancesGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalSamplesCorrelationGPUESProducer_cfi import ecalSamplesCorrelationGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalTimeBiasCorrectionsGPUESProducer_cfi import ecalTimeBiasCorrectionsGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalTimeCalibConstantsGPUESProducer_cfi import ecalTimeCalibConstantsGPUESProducer +from RecoLocalCalo.EcalRecProducers.ecalMultifitParametersGPUESProducer_cfi import ecalMultifitParametersGPUESProducer + +# ECAL multifit running on GPU +from RecoLocalCalo.EcalRecProducers.ecalUncalibRecHitProducerGPU_cfi import ecalUncalibRecHitProducerGPU as _ecalUncalibRecHitProducerGPU +ecalMultiFitUncalibRecHitGPU = _ecalUncalibRecHitProducerGPU.clone( + digisLabelEB = cms.InputTag('ecalDigisGPU', 'ebDigis'), + digisLabelEE = cms.InputTag('ecalDigisGPU', 'eeDigis'), +) + +# copy the uncalibrated rechits from GPU to CPU +from RecoLocalCalo.EcalRecProducers.ecalCPUUncalibRecHitProducer_cfi import ecalCPUUncalibRecHitProducer as _ecalCPUUncalibRecHitProducer +ecalMultiFitUncalibRecHitSoA = _ecalCPUUncalibRecHitProducer.clone( + recHitsInLabelEB = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEB'), + recHitsInLabelEE = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEE'), +) + +# convert the uncalibrated rechits from SoA to legacy format +from RecoLocalCalo.EcalRecProducers.ecalUncalibRecHitConvertGPU2CPUFormat_cfi import ecalUncalibRecHitConvertGPU2CPUFormat as _ecalUncalibRecHitConvertGPU2CPUFormat +_ecalMultiFitUncalibRecHit_gpu = _ecalUncalibRecHitConvertGPU2CPUFormat.clone( + recHitsLabelGPUEB = cms.InputTag('ecalMultiFitUncalibRecHitSoA', 'EcalUncalibRecHitsEB'), + recHitsLabelGPUEE = cms.InputTag('ecalMultiFitUncalibRecHitSoA', 'EcalUncalibRecHitsEE'), +) +gpu.toReplaceWith(ecalMultiFitUncalibRecHit, _ecalMultiFitUncalibRecHit_gpu) + +gpu.toReplaceWith(ecalMultiFitUncalibRecHitTask, cms.Task( + # ECAL conditions used by the multifit running on GPU + ecalPedestalsGPUESProducer, + ecalGainRatiosGPUESProducer, + ecalPulseShapesGPUESProducer, + ecalPulseCovariancesGPUESProducer, + ecalSamplesCorrelationGPUESProducer, + ecalTimeBiasCorrectionsGPUESProducer, + ecalTimeCalibConstantsGPUESProducer, + ecalMultifitParametersGPUESProducer, + # ECAL multifit running on GP + ecalMultiFitUncalibRecHitGPU, + # copy the uncalibrated rechits from GPU to CPU + ecalMultiFitUncalibRecHitSoA, + # convert the uncalibrated rechits legacy format + ecalMultiFitUncalibRecHit, +)) diff --git a/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_gpu_new_cfi.py b/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_gpu_new_cfi.py new file mode 100644 index 0000000000000..84a0c6f9cbe8a --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/python/ecalMultiFitUncalibRecHit_gpu_new_cfi.py @@ -0,0 +1,83 @@ +import FWCore.ParameterSet.Config as cms + +from RecoLocalCalo.EcalRecProducers.ecalPulseShapeParameters_cff import * + +ecalMultiFitUncalibRecHitgpu = cms.EDProducer("EcalUncalibRecHitProducerGPUNew", + EBdigiCollection = cms.InputTag("ecalDigis","ebDigis"), + EEdigiCollection = cms.InputTag("ecalDigis","eeDigis"), + EBhitCollection = cms.string("EcalUncalibRecHitsEBgpunew"), + EBhitCollection_soa = cms.string("EcalUncalibRecHitsEBgpunew"), + EEhitCollection = cms.string('EcalUncalibRecHitsEEgpunew'), + EEhitCollection_soa = cms.string('EcalUncalibRecHitsEEgpunew'), + algo = cms.string("EcalUncalibRecHitWorkerMultiFitGPUNew"), + algoPSet = cms.PSet( + # for multifit method + EcalPulseShapeParameters = cms.PSet( ecal_pulse_shape_parameters ), + activeBXs = cms.vint32(-5,-4,-3,-2,-1,0,1,2,3,4), + ampErrorCalculation = cms.bool(True), + useLumiInfoRunHeader = cms.bool(True), + + doPrefitEB = cms.bool(False), + doPrefitEE = cms.bool(False), + prefitMaxChiSqEB = cms.double(25.), + prefitMaxChiSqEE = cms.double(10.), + + dynamicPedestalsEB = cms.bool(False), + dynamicPedestalsEE = cms.bool(False), + mitigateBadSamplesEB = cms.bool(False), + mitigateBadSamplesEE = cms.bool(False), + gainSwitchUseMaxSampleEB = cms.bool(True), + gainSwitchUseMaxSampleEE = cms.bool(False), + selectiveBadSampleCriteriaEB = cms.bool(False), + selectiveBadSampleCriteriaEE = cms.bool(False), + simplifiedNoiseModelForGainSwitch = cms.bool(True), + addPedestalUncertaintyEB = cms.double(0.), + addPedestalUncertaintyEE = cms.double(0.), + + # decide which algorithm to be use to calculate the jitter + timealgo = cms.string("RatioMethod"), + + # for ratio method + EBtimeFitParameters = cms.vdouble(-2.015452e+00, 3.130702e+00, -1.234730e+01, 4.188921e+01, -8.283944e+01, 9.101147e+01, -5.035761e+01, 1.105621e+01), + EEtimeFitParameters = cms.vdouble(-2.390548e+00, 3.553628e+00, -1.762341e+01, 6.767538e+01, -1.332130e+02, 1.407432e+02, -7.541106e+01, 1.620277e+01), + EBamplitudeFitParameters = cms.vdouble(1.138,1.652), + EEamplitudeFitParameters = cms.vdouble(1.890,1.400), + EBtimeFitLimits_Lower = cms.double(0.2), + EBtimeFitLimits_Upper = cms.double(1.4), + EEtimeFitLimits_Lower = cms.double(0.2), + EEtimeFitLimits_Upper = cms.double(1.4), + # for time error + EBtimeConstantTerm= cms.double(.6), + EEtimeConstantTerm= cms.double(1.0), + + # for kOutOfTime flag + EBtimeNconst = cms.double(28.5), + EEtimeNconst = cms.double(31.8), + outOfTimeThresholdGain12pEB = cms.double(5), # times estimated precision + outOfTimeThresholdGain12mEB = cms.double(5), # times estimated precision + outOfTimeThresholdGain61pEB = cms.double(5), # times estimated precision + outOfTimeThresholdGain61mEB = cms.double(5), # times estimated precision + outOfTimeThresholdGain12pEE = cms.double(1000), # times estimated precision + outOfTimeThresholdGain12mEE = cms.double(1000), # times estimated precision + outOfTimeThresholdGain61pEE = cms.double(1000), # times estimated precision + outOfTimeThresholdGain61mEE = cms.double(1000), # times estimated precision + amplitudeThresholdEB = cms.double(10), + amplitudeThresholdEE = cms.double(10), + + ebSpikeThreshold = cms.double(1.042), + + # these are now taken from DB. Here the MC parameters for backward compatibility + ebPulseShape = cms.vdouble( 5.2e-05,-5.26e-05 , 6.66e-05, 0.1168, 0.7575, 1., 0.8876, 0.6732, 0.4741, 0.3194 ), + eePulseShape = cms.vdouble( 5.2e-05,-5.26e-05 , 6.66e-05, 0.1168, 0.7575, 1., 0.8876, 0.6732, 0.4741, 0.3194 ), + + # for kPoorReco flag + kPoorRecoFlagEB = cms.bool(True), + kPoorRecoFlagEE = cms.bool(False), + chi2ThreshEB_ = cms.double(65.0), + chi2ThreshEE_ = cms.double(50.0), + + # threads/blocks config + threads = cms.vint32(256, 1, 1), + runV1 = cms.bool(True), + ) +) diff --git a/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py b/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py new file mode 100644 index 0000000000000..a9b5599fd970f --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/python/ecalRecHitGPU_cfi.py @@ -0,0 +1,69 @@ +import FWCore.ParameterSet.Config as cms + +from RecoLocalCalo.EcalRecAlgos.ecalCleaningAlgo import cleaningAlgoConfig + +# rechit producer +ecalRecHitGPU = cms.EDProducer("EcalRecHitProducerGPU", + + uncalibrecHitsInLabelEB = cms.InputTag("ecalUncalibRecHitProducerGPU","EcalUncalibRecHitsEB"), + uncalibrecHitsInLabelEE = cms.InputTag("ecalUncalibRecHitProducerGPU","EcalUncalibRecHitsEE"), + + recHitsLabelEB = cms.string("EcalRecHitsEB"), + recHitsLabelEE = cms.string("EcalRecHitsEE"), + + maxNumberHitsEB = cms.uint32(61200), + maxNumberHitsEE = cms.uint32(14648), + + ## db statuses to be exluded from reconstruction (some will be recovered) + ChannelStatusToBeExcluded = cms.vstring( 'kDAC', + 'kNoisy', + 'kNNoisy', + 'kFixedG6', + 'kFixedG1', + 'kFixedG0', + 'kNonRespondingIsolated', + 'kDeadVFE', + 'kDeadFE', + 'kNoDataNoTP', + # + # AM should I add them here????? + # next ones from "flagsMapDBReco" + # but not defined in "EcalChannelStatusCode.h" + # but they are defined in "EcalRecHit.h" + # + #'kKilled', + #'kTPSaturated', + #'kL1SpikeFlag', + ), + + ## avoid propagation of dead channels other than after recovery + killDeadChannels = cms.bool(True), + + ## define maximal and minimal values for the laser corrections + + EBLaserMIN = cms.double(0.01), + EELaserMIN = cms.double(0.01), + + EBLaserMAX = cms.double(30.0), + EELaserMAX = cms.double(30.0), + + ## reco flags association to DB flag + flagsMapDBReco = cms.PSet( + kGood = cms.vstring('kOk','kDAC','kNoLaser','kNoisy'), + kNoisy = cms.vstring('kNNoisy','kFixedG6','kFixedG1'), + kNeighboursRecovered = cms.vstring('kFixedG0', + 'kNonRespondingIsolated', + 'kDeadVFE'), + kTowerRecovered = cms.vstring('kDeadFE'), + kDead = cms.vstring('kNoDataNoTP') + ), + + ## for channel recovery + recoverEBIsolatedChannels = cms.bool(False), + recoverEEIsolatedChannels = cms.bool(False), + recoverEBVFE = cms.bool(False), + recoverEEVFE = cms.bool(False), + recoverEBFE = cms.bool(True), + recoverEEFE = cms.bool(True), +) + diff --git a/RecoLocalCalo/EcalRecProducers/test/ecalRawDecodingAndMultifit.py b/RecoLocalCalo/EcalRecProducers/test/ecalRawDecodingAndMultifit.py new file mode 100644 index 0000000000000..a3d04e836f020 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/test/ecalRawDecodingAndMultifit.py @@ -0,0 +1,172 @@ +import FWCore.ParameterSet.Config as cms +from Configuration.StandardSequences.Eras import eras + +process = cms.Process('RECO', eras.Run2_2018) + +# import of standard configurations +process.load('Configuration.StandardSequences.Services_cff') +process.load('FWCore.MessageService.MessageLogger_cfi') +process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi') +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') + +# Other statements +from Configuration.AlCa.GlobalTag import GlobalTag +process.GlobalTag = GlobalTag(process.GlobalTag, '102X_dataRun2_HLT_v2', '') + + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(100) +) + +# load data using the DAQ source +import sys, os, inspect +sys.path.append(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))) +process.load('sourceFromRawCmggpu_cff') + +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load("RecoLocalCalo.Configuration.hcalLocalReco_cff") +process.load("EventFilter.HcalRawToDigi.HcalRawToDigi_cfi") +process.load("EventFilter.EcalRawToDigi.EcalUnpackerData_cfi") +process.load("RecoLuminosity.LumiProducer.bunchSpacingProducer_cfi") + +# load both cpu and gpu plugins +process.load("RecoLocalCalo.EcalRecProducers.ecalUncalibRecHitProducerGPU_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalMultiFitUncalibRecHit_cfi") + +# for validation of gpu multifit products +process.load("RecoLocalCalo.EcalRecProducers.ecalCPUUncalibRecHitProducer_cfi") +process.load("EventFilter.EcalRawToDigi.ecalCPUDigisProducer_cfi") + +process.load("EventFilter.EcalRawToDigi.ecalRawToDigiGPU_cfi") +process.load("EventFilter.EcalRawToDigi.ecalElectronicsMappingGPUESProducer_cfi") + +process.load("RecoLocalCalo.EcalRecProducers.ecalPedestalsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalGainRatiosGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseShapesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseCovariancesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalSamplesCorrelationGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeBiasCorrectionsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeCalibConstantsGPUESProducer_cfi") + +process.load("RecoLocalCalo.EcalRecProducers.ecalRechitADCToGeVConstantGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalIntercalibConstantsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalRechitChannelStatusGPUESProducer_cfi") + +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosRefGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAlphasGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLinearCorrectionsGPUESProducer_cfi") + +# force HLT configuration for ecalMultiFitUncalibRecHit +process.ecalMultiFitUncalibRecHit.algoPSet = cms.PSet( + ebSpikeThreshold = cms.double(1.042), + EBtimeFitLimits_Upper = cms.double(1.4), + EEtimeFitLimits_Lower = cms.double(0.2), + timealgo = cms.string("None"), + EBtimeNconst = cms.double(28.5), + prefitMaxChiSqEE = cms.double(10.0), + outOfTimeThresholdGain12mEB = cms.double(5.0), + outOfTimeThresholdGain12mEE = cms.double(1000.0), + EEtimeFitParameters = cms.vdouble(-2.390548, 3.553628, -17.62341, 67.67538, -133.213, 140.7432, -75.41106, 16.20277), + prefitMaxChiSqEB = cms.double(25.0), + simplifiedNoiseModelForGainSwitch = cms.bool(True), + EBtimeFitParameters = cms.vdouble(-2.015452, 3.130702, -12.3473, 41.88921, -82.83944, 91.01147, -50.35761, 11.05621), + selectiveBadSampleCriteriaEB = cms.bool(False), + dynamicPedestalsEB = cms.bool(False), + useLumiInfoRunHeader = cms.bool(False), + EBamplitudeFitParameters = cms.vdouble(1.138, 1.652), + doPrefitEE = cms.bool(False), + dynamicPedestalsEE = cms.bool(False), + selectiveBadSampleCriteriaEE = cms.bool(False), + outOfTimeThresholdGain61pEE = cms.double(1000.0), + outOfTimeThresholdGain61pEB = cms.double(5.0), + activeBXs = cms.vint32(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4), + EcalPulseShapeParameters = cms.PSet( + EEPulseShapeTemplate = cms.vdouble(0.116442, 0.756246, 1.0, 0.897182, 0.686831, 0.491506, 0.344111, 0.245731, 0.174115, 0.123361, 0.0874288, 0.061957), + EEdigiCollection = cms.string(""), + EcalPreMixStage2 = cms.bool(False), + EcalPreMixStage1 = cms.bool(False), + EBPulseShapeCovariance = cms.vdouble(3.001E-6, 1.233E-5, 0.0, -4.416E-6, -4.571E-6, -3.614E-6, -2.636E-6, -1.286E-6, -8.41E-7, -5.296E-7, 0.0, 0.0, 1.233E-5, 6.154E-5, 0.0, -2.2E-5, -2.309E-5, -1.838E-5, -1.373E-5, -7.334E-6, -5.088E-6, -3.745E-6, -2.428E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -4.416E-6, -2.2E-5, 0.0, 8.319E-6, 8.545E-6, 6.792E-6, 5.059E-6, 2.678E-6, 1.816E-6, 1.223E-6, 8.245E-7, 5.589E-7, -4.571E-6, -2.309E-5, 0.0, 8.545E-6, 9.182E-6, 7.219E-6, 5.388E-6, 2.853E-6, 1.944E-6, 1.324E-6, 9.083E-7, 6.335E-7, -3.614E-6, -1.838E-5, 0.0, 6.792E-6, 7.219E-6, 6.016E-6, 4.437E-6, 2.385E-6, 1.636E-6, 1.118E-6, 7.754E-7, 5.556E-7, -2.636E-6, -1.373E-5, 0.0, 5.059E-6, 5.388E-6, 4.437E-6, 3.602E-6, 1.917E-6, 1.322E-6, 9.079E-7, 6.529E-7, 4.752E-7, -1.286E-6, -7.334E-6, 0.0, 2.678E-6, 2.853E-6, 2.385E-6, 1.917E-6, 1.375E-6, 9.1E-7, 6.455E-7, 4.693E-7, 3.657E-7, -8.41E-7, -5.088E-6, 0.0, 1.816E-6, 1.944E-6, 1.636E-6, 1.322E-6, 9.1E-7, 9.115E-7, 6.062E-7, 4.436E-7, 3.422E-7, -5.296E-7, -3.745E-6, 0.0, 1.223E-6, 1.324E-6, 1.118E-6, 9.079E-7, 6.455E-7, 6.062E-7, 7.217E-7, 4.862E-7, 3.768E-7, 0.0, -2.428E-6, 0.0, 8.245E-7, 9.083E-7, 7.754E-7, 6.529E-7, 4.693E-7, 4.436E-7, 4.862E-7, 6.509E-7, 4.418E-7, 0.0, 0.0, 0.0, 5.589E-7, 6.335E-7, 5.556E-7, 4.752E-7, 3.657E-7, 3.422E-7, 3.768E-7, 4.418E-7, 6.142E-7), + ESdigiCollection = cms.string(""), + EBdigiCollection = cms.string(""), + EBCorrNoiseMatrixG01 = cms.vdouble(1.0, 0.73354, 0.64442, 0.58851, 0.55425, 0.53082, 0.51916, 0.51097, 0.50732, 0.50409), + EBCorrNoiseMatrixG12 = cms.vdouble(1.0, 0.71073, 0.55721, 0.46089, 0.40449, 0.35931, 0.33924, 0.32439, 0.31581, 0.30481), + EBCorrNoiseMatrixG06 = cms.vdouble(1.0, 0.70946, 0.58021, 0.49846, 0.45006, 0.41366, 0.39699, 0.38478, 0.37847, 0.37055), + EEPulseShapeCovariance = cms.vdouble(3.941E-5, 3.333E-5, 0.0, -1.449E-5, -1.661E-5, -1.424E-5, -1.183E-5, -6.842E-6, -4.915E-6, -3.411E-6, 0.0, 0.0, 3.333E-5, 2.862E-5, 0.0, -1.244E-5, -1.431E-5, -1.233E-5, -1.032E-5, -5.883E-6, -4.154E-6, -2.902E-6, -2.128E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.449E-5, -1.244E-5, 0.0, 5.84E-6, 6.649E-6, 5.72E-6, 4.812E-6, 2.708E-6, 1.869E-6, 1.33E-6, 9.186E-7, 6.446E-7, -1.661E-5, -1.431E-5, 0.0, 6.649E-6, 7.966E-6, 6.898E-6, 5.794E-6, 3.157E-6, 2.184E-6, 1.567E-6, 1.084E-6, 7.575E-7, -1.424E-5, -1.233E-5, 0.0, 5.72E-6, 6.898E-6, 6.341E-6, 5.347E-6, 2.859E-6, 1.991E-6, 1.431E-6, 9.839E-7, 6.886E-7, -1.183E-5, -1.032E-5, 0.0, 4.812E-6, 5.794E-6, 5.347E-6, 4.854E-6, 2.628E-6, 1.809E-6, 1.289E-6, 9.02E-7, 6.146E-7, -6.842E-6, -5.883E-6, 0.0, 2.708E-6, 3.157E-6, 2.859E-6, 2.628E-6, 1.863E-6, 1.296E-6, 8.882E-7, 6.108E-7, 4.283E-7, -4.915E-6, -4.154E-6, 0.0, 1.869E-6, 2.184E-6, 1.991E-6, 1.809E-6, 1.296E-6, 1.217E-6, 8.669E-7, 5.751E-7, 3.882E-7, -3.411E-6, -2.902E-6, 0.0, 1.33E-6, 1.567E-6, 1.431E-6, 1.289E-6, 8.882E-7, 8.669E-7, 9.522E-7, 6.717E-7, 4.293E-7, 0.0, -2.128E-6, 0.0, 9.186E-7, 1.084E-6, 9.839E-7, 9.02E-7, 6.108E-7, 5.751E-7, 6.717E-7, 7.911E-7, 5.493E-7, 0.0, 0.0, 0.0, 6.446E-7, 7.575E-7, 6.886E-7, 6.146E-7, 4.283E-7, 3.882E-7, 4.293E-7, 5.493E-7, 7.027E-7), + EBPulseShapeTemplate = cms.vdouble(0.0113979, 0.758151, 1.0, 0.887744, 0.673548, 0.474332, 0.319561, 0.215144, 0.147464, 0.101087, 0.0693181, 0.0475044), + EECorrNoiseMatrixG01 = cms.vdouble(1.0, 0.72698, 0.62048, 0.55691, 0.51848, 0.49147, 0.47813, 0.47007, 0.46621, 0.46265), + EECorrNoiseMatrixG12 = cms.vdouble(1.0, 0.71373, 0.44825, 0.30152, 0.21609, 0.14786, 0.11772, 0.10165, 0.09465, 0.08098), + UseLCcorrection = cms.untracked.bool(True), + EECorrNoiseMatrixG06 = cms.vdouble(1.0, 0.71217, 0.47464, 0.34056, 0.26282, 0.20287, 0.17734, 0.16256, 0.15618, 0.14443) + ), + doPrefitEB = cms.bool(False), + addPedestalUncertaintyEE = cms.double(0.0), + addPedestalUncertaintyEB = cms.double(0.0), + gainSwitchUseMaxSampleEB = cms.bool(True), + EEtimeNconst = cms.double(31.8), + EEamplitudeFitParameters = cms.vdouble(1.89, 1.4), + chi2ThreshEE_ = cms.double(50.0), + eePulseShape = cms.vdouble(5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194), + outOfTimeThresholdGain12pEB = cms.double(5.0), + gainSwitchUseMaxSampleEE = cms.bool(False), + mitigateBadSamplesEB = cms.bool(False), + outOfTimeThresholdGain12pEE = cms.double(1000.0), + ebPulseShape = cms.vdouble(5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194), + ampErrorCalculation = cms.bool(False), + mitigateBadSamplesEE = cms.bool(False), + amplitudeThresholdEB = cms.double(10.0), + kPoorRecoFlagEB = cms.bool(True), + amplitudeThresholdEE = cms.double(10.0), + EBtimeFitLimits_Lower = cms.double(0.2), + kPoorRecoFlagEE = cms.bool(False), + EEtimeFitLimits_Upper = cms.double(1.4), + outOfTimeThresholdGain61mEE = cms.double(1000.0), + EEtimeConstantTerm = cms.double(1.0), + EBtimeConstantTerm = cms.double(0.6), + chi2ThreshEB_ = cms.double(65.0), + outOfTimeThresholdGain61mEB = cms.double(5.0) +) + +process.ecalDigis = process.ecalEBunpacker.clone() +process.ecalDigis.InputLabel = cms.InputTag('rawDataCollector') + +process.out = cms.OutputModule("PoolOutputModule", + fileName = cms.untracked.string("test.root") +) + +process.finalize = cms.EndPath(process.out) + +process.bunchSpacing = cms.Path( + process.bunchSpacingProducer +) + +process.digiPath = cms.Path( + process.ecalDigis + + process.ecalRawToDigiGPU + + process.ecalCPUDigisProducer +) + +process.recoPath = cms.Path( + process.ecalMultiFitUncalibRecHit + + process.ecalUncalibRecHitProducerGPU + + process.ecalCPUUncalibRecHitProducer +) + +process.schedule = cms.Schedule( + process.bunchSpacing, + process.digiPath, + process.recoPath, + process.finalize +) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32(4), + numberOfStreams = cms.untracked.uint32(4), + SkipEvent = cms.untracked.vstring('ProductNotFound'), + wantSummary = cms.untracked.bool(True) +) + +# report CUDAService messages +process.MessageLogger.categories.append("CUDAService") diff --git a/RecoLocalCalo/EcalRecProducers/test/sourceFromRawCmggpu_cff.py b/RecoLocalCalo/EcalRecProducers/test/sourceFromRawCmggpu_cff.py new file mode 100644 index 0000000000000..e993a7573b689 --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/test/sourceFromRawCmggpu_cff.py @@ -0,0 +1,151 @@ +import FWCore.ParameterSet.Config as cms + +# input +FastMonitoringService = cms.Service( "FastMonitoringService", + filePerFwkStream = cms.untracked.bool( False ), + fastMonIntervals = cms.untracked.uint32( 2 ), + sleepTime = cms.untracked.int32( 1 ) +) + +EvFDaqDirector = cms.Service( "EvFDaqDirector", + runNumber = cms.untracked.uint32( 321177 ), + + baseDir = cms.untracked.string( "tmp" ), + buBaseDir = cms.untracked.string( "tmp" ), + + useFileBroker = cms.untracked.bool( False ), + fileBrokerKeepAlive = cms.untracked.bool( True ), + fileBrokerPort = cms.untracked.string( "8080" ), + fileBrokerUseLocalLock = cms.untracked.bool( True ), + fuLockPollInterval = cms.untracked.uint32( 2000 ), + + requireTransfersPSet = cms.untracked.bool( False ), + selectedTransferMode = cms.untracked.string( "" ), + mergingPset = cms.untracked.string( "" ), + + outputAdler32Recheck = cms.untracked.bool( False ), +) + +source = cms.Source( "FedRawDataInputSource", + runNumber = cms.untracked.uint32( 321177 ), + getLSFromFilename = cms.untracked.bool(True), + testModeNoBuilderUnit = cms.untracked.bool(False), + verifyAdler32 = cms.untracked.bool( True ), + verifyChecksum = cms.untracked.bool( True ), + useL1EventID = cms.untracked.bool( False ), # True + alwaysStartFromfirstLS = cms.untracked.uint32( 0 ), + + eventChunkBlock = cms.untracked.uint32( 240 ), # 32 + eventChunkSize = cms.untracked.uint32( 240), # 32 + maxBufferedFiles = cms.untracked.uint32( 8 ), # 2 + numBuffers = cms.untracked.uint32( 8 ), # 2 + + fileListMode = cms.untracked.bool( True ), # False + fileNames = cms.untracked.vstring( + #'/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0142_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0142_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0142_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0142_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0142_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0143_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0143_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0143_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0143_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0143_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0144_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0144_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0144_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0144_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0144_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0145_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0145_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0145_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0145_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0145_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0146_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0146_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0146_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0146_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0146_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0147_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0147_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0147_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0147_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0147_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0148_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0148_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0148_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0148_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0148_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0149_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0149_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0149_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0149_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0149_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0150_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0150_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0150_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0150_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0150_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0151_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0151_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0151_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0151_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0151_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0152_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0152_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0152_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0152_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0152_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0153_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0153_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0153_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0153_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0153_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0154_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0154_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0154_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0154_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0154_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0155_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0155_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0155_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0155_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0155_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0156_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0156_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0156_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0156_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0156_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0157_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0157_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0157_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0157_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0157_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0158_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0158_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0158_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0158_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0158_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0159_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0159_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0159_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0159_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0159_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0160_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0160_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0160_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0160_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0160_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0161_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0161_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0161_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0161_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0161_index000004.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0162_index000000.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0162_index000001.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0162_index000002.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0162_index000003.raw', + '/data/patatrack/store/raw/Run2018D/JetHT/RAW/v1/000/321/177/00000/run321177_ls0162_index000004.raw', + ), +) \ No newline at end of file diff --git a/RecoLocalCalo/EcalRecProducers/test/testEcalRechitProducer_cfg.py b/RecoLocalCalo/EcalRecProducers/test/testEcalRechitProducer_cfg.py new file mode 100644 index 0000000000000..c70572ff3b89d --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/test/testEcalRechitProducer_cfg.py @@ -0,0 +1,322 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.StandardSequences.Eras import eras +#from Configuration.ProcessModifiers.gpu_cff import gpu + +process = cms.Process('RECO', eras.Run2_2018) + +# import of standard configurations +process.load('Configuration.StandardSequences.Services_cff') +#process.load('SimGeneral.HepPDTESSource.pythiapdt_cfi') +process.load('FWCore.MessageService.MessageLogger_cfi') +process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi') +#process.load('Configuration.EventContent.EventContent_cff') +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +#process.load('Configuration.StandardSequences.RawToDigi_Data_cff') +#process.load('Configuration.StandardSequences.Reconstruction_Data_cff') +#process.load('DQMOffline.Configuration.DQMOffline_cff') +process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') + + + + + + +# Other statements +from Configuration.AlCa.GlobalTag import GlobalTag +process.GlobalTag = GlobalTag(process.GlobalTag, '102X_dataRun2_HLT_v2', '') + + +process.maxEvents = cms.untracked.PSet( + input = cms.untracked.int32(1000) +) + +# load data using the DAQ source +import sys, os, inspect +sys.path.append(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))) +process.load('sourceFromRawCmggpu_cff') + +#----------------------------------------- +# CMSSW/Hcal non-DQM Related Module import +#----------------------------------------- +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load("RecoLocalCalo.Configuration.hcalLocalReco_cff") +#process.load("RecoLocalCalo.Configuration.ecalLocalRecoSequence_cff") +process.load("EventFilter.HcalRawToDigi.HcalRawToDigi_cfi") +process.load("EventFilter.EcalRawToDigi.EcalUnpackerData_cfi") +process.load("RecoLuminosity.LumiProducer.bunchSpacingProducer_cfi") + +# load both cpu and gpu plugins +# +# ../cfipython/slc7_amd64_gcc700/RecoLocalCalo/EcalRecProducers/ecalUncalibRecHitProducerGPU_cfi.py +# +process.load("RecoLocalCalo.EcalRecProducers.ecalUncalibRecHitProducerGPU_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalMultiFitUncalibRecHit_cfi") + +# for validation of gpu multifit products +process.load("RecoLocalCalo.EcalRecProducers.ecalCPUUncalibRecHitProducer_cfi") +process.load("EventFilter.EcalRawToDigi.ecalCPUDigisProducer_cfi") + +process.load("EventFilter.EcalRawToDigi.ecalRawToDigiGPU_cfi") +process.load("EventFilter.EcalRawToDigi.ecalElectronicsMappingGPUESProducer_cfi") + +#process.ecalUncalibRecHitProducerGPU.kernelsVersion = 0 +#process.ecalUncalibRecHitProducerGPU.kernelMinimizeThreads = cms.vuint32(16, 1, 1) + +process.load("RecoLocalCalo.EcalRecProducers.ecalPedestalsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalGainRatiosGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseShapesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseCovariancesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalSamplesCorrelationGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeBiasCorrectionsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeCalibConstantsGPUESProducer_cfi") + +#process.ecalMultiFitUncalibRecHitgpu.algoPSet.threads = cms.vint32(256, 1, 1) + +#from RecoLocalCalo.EcalRecProducers.ecalMultifitParametersGPUESProducer_cfi import ecalMultifitParametersGPUESProducer +process.load("RecoLocalCalo.EcalRecProducers.ecalMultifitParametersGPUESProducer_cfi") + +# +# +# No "JobConfigurationGPURecord" record found in the EventSetup.n +# #---> +# +process.load("RecoLocalCalo.EcalRecProducers.ecalRecHitParametersGPUESProducer_cfi") +#ecalRecHitParametersGPUESProducer_cfi.py + + +## +## force HLT configuration for ecalMultiFitUncalibRecHit +## + +process.ecalMultiFitUncalibRecHit.algoPSet = cms.PSet( + ebSpikeThreshold = cms.double( 1.042 ), + EBtimeFitLimits_Upper = cms.double( 1.4 ), + EEtimeFitLimits_Lower = cms.double( 0.2 ), + timealgo = cms.string( "None" ), + EBtimeNconst = cms.double( 28.5 ), + prefitMaxChiSqEE = cms.double( 10.0 ), + outOfTimeThresholdGain12mEB = cms.double( 5.0 ), + outOfTimeThresholdGain12mEE = cms.double( 1000.0 ), + EEtimeFitParameters = cms.vdouble( -2.390548, 3.553628, -17.62341, 67.67538, -133.213, 140.7432, -75.41106, 16.20277 ), + prefitMaxChiSqEB = cms.double( 25.0 ), + simplifiedNoiseModelForGainSwitch = cms.bool( True ), + EBtimeFitParameters = cms.vdouble( -2.015452, 3.130702, -12.3473, 41.88921, -82.83944, 91.01147, -50.35761, 11.05621 ), + selectiveBadSampleCriteriaEB = cms.bool( False ), + dynamicPedestalsEB = cms.bool( False ), + useLumiInfoRunHeader = cms.bool( False ), + EBamplitudeFitParameters = cms.vdouble( 1.138, 1.652 ), + doPrefitEE = cms.bool( False ), + dynamicPedestalsEE = cms.bool( False ), + selectiveBadSampleCriteriaEE = cms.bool( False ), + outOfTimeThresholdGain61pEE = cms.double( 1000.0 ), + outOfTimeThresholdGain61pEB = cms.double( 5.0 ), + activeBXs = cms.vint32( -5, -4, -3, -2, -1, 0, 1, 2, 3, 4 ), + EcalPulseShapeParameters = cms.PSet( + EEPulseShapeTemplate = cms.vdouble( 0.116442, 0.756246, 1.0, 0.897182, 0.686831, 0.491506, 0.344111, 0.245731, 0.174115, 0.123361, 0.0874288, 0.061957 ), + EEdigiCollection = cms.string( "" ), + EcalPreMixStage2 = cms.bool( False ), + EcalPreMixStage1 = cms.bool( False ), + EBPulseShapeCovariance = cms.vdouble( 3.001E-6, 1.233E-5, 0.0, -4.416E-6, -4.571E-6, -3.614E-6, -2.636E-6, -1.286E-6, -8.41E-7, -5.296E-7, 0.0, 0.0, 1.233E-5, 6.154E-5, 0.0, -2.2E-5, -2.309E-5, -1.838E-5, -1.373E-5, -7.334E-6, -5.088E-6, -3.745E-6, -2.428E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -4.416E-6, -2.2E-5, 0.0, 8.319E-6, 8.545E-6, 6.792E-6, 5.059E-6, 2.678E-6, 1.816E-6, 1.223E-6, 8.245E-7, 5.589E-7, -4.571E-6, -2.309E-5, 0.0, 8.545E-6, 9.182E-6, 7.219E-6, 5.388E-6, 2.853E-6, 1.944E-6, 1.324E-6, 9.083E-7, 6.335E-7, -3.614E-6, -1.838E-5, 0.0, 6.792E-6, 7.219E-6, 6.016E-6, 4.437E-6, 2.385E-6, 1.636E-6, 1.118E-6, 7.754E-7, 5.556E-7, -2.636E-6, -1.373E-5, 0.0, 5.059E-6, 5.388E-6, 4.437E-6, 3.602E-6, 1.917E-6, 1.322E-6, 9.079E-7, 6.529E-7, 4.752E-7, -1.286E-6, -7.334E-6, 0.0, 2.678E-6, 2.853E-6, 2.385E-6, 1.917E-6, 1.375E-6, 9.1E-7, 6.455E-7, 4.693E-7, 3.657E-7, -8.41E-7, -5.088E-6, 0.0, 1.816E-6, 1.944E-6, 1.636E-6, 1.322E-6, 9.1E-7, 9.115E-7, 6.062E-7, 4.436E-7, 3.422E-7, -5.296E-7, -3.745E-6, 0.0, 1.223E-6, 1.324E-6, 1.118E-6, 9.079E-7, 6.455E-7, 6.062E-7, 7.217E-7, 4.862E-7, 3.768E-7, 0.0, -2.428E-6, 0.0, 8.245E-7, 9.083E-7, 7.754E-7, 6.529E-7, 4.693E-7, 4.436E-7, 4.862E-7, 6.509E-7, 4.418E-7, 0.0, 0.0, 0.0, 5.589E-7, 6.335E-7, 5.556E-7, 4.752E-7, 3.657E-7, 3.422E-7, 3.768E-7, 4.418E-7, 6.142E-7 ), + ESdigiCollection = cms.string( "" ), + EBdigiCollection = cms.string( "" ), + EBCorrNoiseMatrixG01 = cms.vdouble( 1.0, 0.73354, 0.64442, 0.58851, 0.55425, 0.53082, 0.51916, 0.51097, 0.50732, 0.50409 ), + EBCorrNoiseMatrixG12 = cms.vdouble( 1.0, 0.71073, 0.55721, 0.46089, 0.40449, 0.35931, 0.33924, 0.32439, 0.31581, 0.30481 ), + EBCorrNoiseMatrixG06 = cms.vdouble( 1.0, 0.70946, 0.58021, 0.49846, 0.45006, 0.41366, 0.39699, 0.38478, 0.37847, 0.37055 ), + EEPulseShapeCovariance = cms.vdouble( 3.941E-5, 3.333E-5, 0.0, -1.449E-5, -1.661E-5, -1.424E-5, -1.183E-5, -6.842E-6, -4.915E-6, -3.411E-6, 0.0, 0.0, 3.333E-5, 2.862E-5, 0.0, -1.244E-5, -1.431E-5, -1.233E-5, -1.032E-5, -5.883E-6, -4.154E-6, -2.902E-6, -2.128E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.449E-5, -1.244E-5, 0.0, 5.84E-6, 6.649E-6, 5.72E-6, 4.812E-6, 2.708E-6, 1.869E-6, 1.33E-6, 9.186E-7, 6.446E-7, -1.661E-5, -1.431E-5, 0.0, 6.649E-6, 7.966E-6, 6.898E-6, 5.794E-6, 3.157E-6, 2.184E-6, 1.567E-6, 1.084E-6, 7.575E-7, -1.424E-5, -1.233E-5, 0.0, 5.72E-6, 6.898E-6, 6.341E-6, 5.347E-6, 2.859E-6, 1.991E-6, 1.431E-6, 9.839E-7, 6.886E-7, -1.183E-5, -1.032E-5, 0.0, 4.812E-6, 5.794E-6, 5.347E-6, 4.854E-6, 2.628E-6, 1.809E-6, 1.289E-6, 9.02E-7, 6.146E-7, -6.842E-6, -5.883E-6, 0.0, 2.708E-6, 3.157E-6, 2.859E-6, 2.628E-6, 1.863E-6, 1.296E-6, 8.882E-7, 6.108E-7, 4.283E-7, -4.915E-6, -4.154E-6, 0.0, 1.869E-6, 2.184E-6, 1.991E-6, 1.809E-6, 1.296E-6, 1.217E-6, 8.669E-7, 5.751E-7, 3.882E-7, -3.411E-6, -2.902E-6, 0.0, 1.33E-6, 1.567E-6, 1.431E-6, 1.289E-6, 8.882E-7, 8.669E-7, 9.522E-7, 6.717E-7, 4.293E-7, 0.0, -2.128E-6, 0.0, 9.186E-7, 1.084E-6, 9.839E-7, 9.02E-7, 6.108E-7, 5.751E-7, 6.717E-7, 7.911E-7, 5.493E-7, 0.0, 0.0, 0.0, 6.446E-7, 7.575E-7, 6.886E-7, 6.146E-7, 4.283E-7, 3.882E-7, 4.293E-7, 5.493E-7, 7.027E-7 ), + EBPulseShapeTemplate = cms.vdouble( 0.0113979, 0.758151, 1.0, 0.887744, 0.673548, 0.474332, 0.319561, 0.215144, 0.147464, 0.101087, 0.0693181, 0.0475044 ), + EECorrNoiseMatrixG01 = cms.vdouble( 1.0, 0.72698, 0.62048, 0.55691, 0.51848, 0.49147, 0.47813, 0.47007, 0.46621, 0.46265 ), + EECorrNoiseMatrixG12 = cms.vdouble( 1.0, 0.71373, 0.44825, 0.30152, 0.21609, 0.14786, 0.11772, 0.10165, 0.09465, 0.08098 ), + UseLCcorrection = cms.untracked.bool( True ), + EECorrNoiseMatrixG06 = cms.vdouble( 1.0, 0.71217, 0.47464, 0.34056, 0.26282, 0.20287, 0.17734, 0.16256, 0.15618, 0.14443 ) + ), + doPrefitEB = cms.bool( False ), + addPedestalUncertaintyEE = cms.double( 0.0 ), + addPedestalUncertaintyEB = cms.double( 0.0 ), + gainSwitchUseMaxSampleEB = cms.bool( True ), + EEtimeNconst = cms.double( 31.8 ), + EEamplitudeFitParameters = cms.vdouble( 1.89, 1.4 ), + chi2ThreshEE_ = cms.double( 50.0 ), + eePulseShape = cms.vdouble( 5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194 ), + outOfTimeThresholdGain12pEB = cms.double( 5.0 ), + gainSwitchUseMaxSampleEE = cms.bool( False ), + mitigateBadSamplesEB = cms.bool( False ), + outOfTimeThresholdGain12pEE = cms.double( 1000.0 ), + ebPulseShape = cms.vdouble( 5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194 ), + ampErrorCalculation = cms.bool( False ), + mitigateBadSamplesEE = cms.bool( False ), + amplitudeThresholdEB = cms.double( 10.0 ), + kPoorRecoFlagEB = cms.bool( True ), + amplitudeThresholdEE = cms.double( 10.0 ), + EBtimeFitLimits_Lower = cms.double( 0.2 ), + kPoorRecoFlagEE = cms.bool( False ), + EEtimeFitLimits_Upper = cms.double( 1.4 ), + outOfTimeThresholdGain61mEE = cms.double( 1000.0 ), + EEtimeConstantTerm = cms.double( 1.0 ), + EBtimeConstantTerm = cms.double( 0.6 ), + chi2ThreshEB_ = cms.double( 65.0 ), + outOfTimeThresholdGain61mEB = cms.double( 5.0 ) +) + +## + + + +process.load('Configuration.StandardSequences.Reconstruction_cff') +#process.ecalRecHit + + +process.load("RecoLocalCalo.EcalRecProducers.ecalRechitADCToGeVConstantGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalRechitChannelStatusGPUESProducer_cfi") +#process.load("RecoLocalCalo.EcalRecProducers.ecalADCToGeVConstantGPUESProducer_cfi") +#process.load("RecoLocalCalo.EcalRecProducers.ecalChannelStatusGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalIntercalibConstantsGPUESProducer_cfi") + +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosRefGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLaserAlphasGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalLinearCorrectionsGPUESProducer_cfi") + +process.load("RecoLocalCalo.EcalRecProducers.ecalRecHitGPU_cfi") +process.ecalRecHitProducerGPU = process.ecalRecHitGPU.clone() + + +process.load("RecoLocalCalo.EcalRecProducers.ecalCPURecHitProducer_cfi") + + +# +# AM : TEST to see if the number of rechits matches +# +#process.ecalRecHit.killDeadChannels = cms.bool(False) +# +#process.ecalRecHit.recoverEBFE = cms.bool(False) +#process.ecalRecHit.recoverEBIsolatedChannels = cms.bool(False) +#process.ecalRecHit.recoverEBVFE = cms.bool(False) +## +#process.ecalRecHit.recoverEEFE = cms.bool(False) +#process.ecalRecHit.recoverEEIsolatedChannels = cms.bool(False) +#process.ecalRecHit.recoverEEVFE = cms.bool(False) +# +#process.ecalRecHit.skipTimeCalib = cms.bool(True) +# +#process.ecalRecHitProducerGPU.killDeadChannels = cms.bool(False) +# +# +#process.ecalRecHitProducerGPU.recoverEBFE = cms.bool(False) +#process.ecalRecHitProducerGPU.recoverEBIsolatedChannels = cms.bool(False) +#process.ecalRecHitProducerGPU.recoverEBVFE = cms.bool(False) +#process.ecalRecHitProducerGPU.recoverEEFE = cms.bool(False) +#process.ecalRecHitProducerGPU.recoverEEIsolatedChannels = cms.bool(False) +#process.ecalRecHitProducerGPU.recoverEEVFE = cms.bool(False) +# +# +# +# TEST +# +#process.ecalRecHit.ChannelStatusToBeExcluded = cms.vstring( + #'kDAC', + #'kNoisy', + #'kNNoisy', + #'kFixedG6', + #'kFixedG1', + #'kFixedG0', + #'kNonRespondingIsolated', + #'kDeadVFE', + #'kDeadFE', + #'kNoDataNoTP' + #) +#process.ecalRecHitProducerGPU.ChannelStatusToBeExcluded = cms.vstring( + #'kDAC', + #'kNoisy', + #'kNNoisy', + #'kFixedG6', + #'kFixedG1', + #'kFixedG0', + #'kNonRespondingIsolated', + #'kDeadVFE', + #'kDeadFE', + #'kNoDataNoTP' + #) +# +# + + #ChannelStatusToBeExcluded = cms.vstring( + #'kDAC', + #'kNoisy', + #'kNNoisy', + #'kFixedG6', + #'kFixedG1', + #'kFixedG0', + #'kNonRespondingIsolated', + #'kDeadVFE', + #'kDeadFE', + #'kNoDataNoTP' + #), + + + +#process.hcalDigis.silent = cms.untracked.bool(False) +#process.hcalDigis.InputLabel = rawTag +process.ecalDigis = process.ecalEBunpacker.clone() +process.ecalDigis.InputLabel = cms.InputTag('rawDataCollector') +#process.hbheprerecogpu.processQIE11 = cms.bool(True) + +process.out = cms.OutputModule( + "PoolOutputModule", + fileName = cms.untracked.string("testRechit.root") +) + +#process.out = cms.OutputModule("AsciiOutputModule", +# outputCommands = cms.untracked.vstring( +# 'keep *_ecalMultiFitUncalibRecHit_*_*', +# ), +# verbosity = cms.untracked.uint32(0) +#) +process.finalize = cms.EndPath(process.out) + +process.bunchSpacing = cms.Path( + process.bunchSpacingProducer +) + +process.digiPath = cms.Path( + #process.hcalDigis + process.ecalDigis + *process.ecalRawToDigiGPU + *process.ecalCPUDigisProducer +) + +process.recoPath = cms.Path( + (process.ecalMultiFitUncalibRecHit+process.ecalDetIdToBeRecovered) + #process.ecalMultiFitUncalibRecHit + *process.ecalRecHit +# gpu + *process.ecalUncalibRecHitProducerGPU + *process.ecalCPUUncalibRecHitProducer + *process.ecalRecHitProducerGPU + *process.ecalCPURecHitProducer +) + +process.schedule = cms.Schedule( + process.bunchSpacing, + process.digiPath, + process.recoPath, +# process.ecalecalLocalRecoSequence + process.finalize +) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32(4), + numberOfStreams = cms.untracked.uint32(4), + SkipEvent = cms.untracked.vstring('ProductNotFound'), + wantSummary = cms.untracked.bool(True) +) + +# report CUDAService messages +process.MessageLogger.categories.append("CUDAService") + + +# +#process.DependencyGraph = cms.Service("DependencyGraph") + + diff --git a/RecoLocalCalo/EcalRecProducers/test/testEcalUncalibRechitProducer_cfg.py b/RecoLocalCalo/EcalRecProducers/test/testEcalUncalibRechitProducer_cfg.py new file mode 100644 index 0000000000000..ffb665d7bc96a --- /dev/null +++ b/RecoLocalCalo/EcalRecProducers/test/testEcalUncalibRechitProducer_cfg.py @@ -0,0 +1,233 @@ +import FWCore.ParameterSet.Config as cms + +from Configuration.StandardSequences.Eras import eras +#from Configuration.ProcessModifiers.gpu_cff import gpu + +process = cms.Process('RECO', eras.Run2_2018) + +# import of standard configurations +process.load('Configuration.StandardSequences.Services_cff') +#process.load('SimGeneral.HepPDTESSource.pythiapdt_cfi') +process.load('FWCore.MessageService.MessageLogger_cfi') +process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi') +#process.load('Configuration.EventContent.EventContent_cff') +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load('Configuration.StandardSequences.MagneticField_AutoFromDBCurrent_cff') +#process.load('Configuration.StandardSequences.RawToDigi_Data_cff') +#process.load('Configuration.StandardSequences.Reconstruction_Data_cff') +#process.load('DQMOffline.Configuration.DQMOffline_cff') +process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff') + + + + + + +# Other statements +from Configuration.AlCa.GlobalTag import GlobalTag +process.GlobalTag = GlobalTag(process.GlobalTag, '102X_dataRun2_HLT_v2', '') + + +process.maxEvents = cms.untracked.PSet( + #input = cms.untracked.int32(100) + input = cms.untracked.int32(1000) +) + +# load data using the DAQ source +import sys, os, inspect +sys.path.append(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))) +process.load('sourceFromRawCmggpu_cff') + +#----------------------------------------- +# CMSSW/Hcal non-DQM Related Module import +#----------------------------------------- +process.load('Configuration.StandardSequences.GeometryRecoDB_cff') +process.load("RecoLocalCalo.Configuration.hcalLocalReco_cff") +#process.load("RecoLocalCalo.Configuration.ecalLocalRecoSequence_cff") +process.load("EventFilter.HcalRawToDigi.HcalRawToDigi_cfi") +process.load("EventFilter.EcalRawToDigi.EcalUnpackerData_cfi") +process.load("RecoLuminosity.LumiProducer.bunchSpacingProducer_cfi") + +# load both cpu and gpu plugins +# +# ../cfipython/slc7_amd64_gcc700/RecoLocalCalo/EcalRecProducers/ecalUncalibRecHitProducerGPU_cfi.py +# +process.load("RecoLocalCalo.EcalRecProducers.ecalUncalibRecHitProducerGPU_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalMultiFitUncalibRecHit_cfi") + +# for validation of gpu multifit products +process.load("RecoLocalCalo.EcalRecProducers.ecalCPUUncalibRecHitProducer_cfi") +# +# ../cfipython/slc7_amd64_gcc700/RecoLocalCalo/EcalRecProducers/ecalCPUUncalibRecHitProducer_cfi.py +# + +process.load("EventFilter.EcalRawToDigi.ecalRawToDigiGPU_cfi") +process.load("EventFilter.EcalRawToDigi.ecalElectronicsMappingGPUESProducer_cfi") + +#process.ecalUncalibRecHitProducerGPU.kernelsVersion = 0 +#process.ecalUncalibRecHitProducerGPU.kernelMinimizeThreads = cms.vuint32(16, 1, 1) +# +# process.ecalUncalibRecHitProducerGPU.shouldRunTimingComputation = cms.bool(False) +# + + +process.load("RecoLocalCalo.EcalRecProducers.ecalPedestalsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalGainRatiosGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseShapesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalPulseCovariancesGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalSamplesCorrelationGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeBiasCorrectionsGPUESProducer_cfi") +process.load("RecoLocalCalo.EcalRecProducers.ecalTimeCalibConstantsGPUESProducer_cfi") + +#process.ecalMultiFitUncalibRecHitgpu.algoPSet.threads = cms.vint32(256, 1, 1) + + +process.load("RecoLocalCalo.EcalRecProducers.ecalMultifitParametersGPUESProducer_cfi") + + + +## +## force HLT configuration for ecalMultiFitUncalibRecHit +## + +process.ecalMultiFitUncalibRecHit.algoPSet = cms.PSet( + ebSpikeThreshold = cms.double( 1.042 ), + EBtimeFitLimits_Upper = cms.double( 1.4 ), + EEtimeFitLimits_Lower = cms.double( 0.2 ), + timealgo = cms.string( "None" ), # ----> no timing computation for CPU version + EBtimeNconst = cms.double( 28.5 ), + prefitMaxChiSqEE = cms.double( 10.0 ), + outOfTimeThresholdGain12mEB = cms.double( 5.0 ), + outOfTimeThresholdGain12mEE = cms.double( 1000.0 ), + EEtimeFitParameters = cms.vdouble( -2.390548, 3.553628, -17.62341, 67.67538, -133.213, 140.7432, -75.41106, 16.20277 ), + prefitMaxChiSqEB = cms.double( 25.0 ), + simplifiedNoiseModelForGainSwitch = cms.bool( True ), + EBtimeFitParameters = cms.vdouble( -2.015452, 3.130702, -12.3473, 41.88921, -82.83944, 91.01147, -50.35761, 11.05621 ), + selectiveBadSampleCriteriaEB = cms.bool( False ), + dynamicPedestalsEB = cms.bool( False ), + useLumiInfoRunHeader = cms.bool( False ), + EBamplitudeFitParameters = cms.vdouble( 1.138, 1.652 ), + doPrefitEE = cms.bool( False ), + dynamicPedestalsEE = cms.bool( False ), + selectiveBadSampleCriteriaEE = cms.bool( False ), + outOfTimeThresholdGain61pEE = cms.double( 1000.0 ), + outOfTimeThresholdGain61pEB = cms.double( 5.0 ), + activeBXs = cms.vint32( -5, -4, -3, -2, -1, 0, 1, 2, 3, 4 ), + EcalPulseShapeParameters = cms.PSet( + EEPulseShapeTemplate = cms.vdouble( 0.116442, 0.756246, 1.0, 0.897182, 0.686831, 0.491506, 0.344111, 0.245731, 0.174115, 0.123361, 0.0874288, 0.061957 ), + EEdigiCollection = cms.string( "" ), + EcalPreMixStage2 = cms.bool( False ), + EcalPreMixStage1 = cms.bool( False ), + EBPulseShapeCovariance = cms.vdouble( 3.001E-6, 1.233E-5, 0.0, -4.416E-6, -4.571E-6, -3.614E-6, -2.636E-6, -1.286E-6, -8.41E-7, -5.296E-7, 0.0, 0.0, 1.233E-5, 6.154E-5, 0.0, -2.2E-5, -2.309E-5, -1.838E-5, -1.373E-5, -7.334E-6, -5.088E-6, -3.745E-6, -2.428E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -4.416E-6, -2.2E-5, 0.0, 8.319E-6, 8.545E-6, 6.792E-6, 5.059E-6, 2.678E-6, 1.816E-6, 1.223E-6, 8.245E-7, 5.589E-7, -4.571E-6, -2.309E-5, 0.0, 8.545E-6, 9.182E-6, 7.219E-6, 5.388E-6, 2.853E-6, 1.944E-6, 1.324E-6, 9.083E-7, 6.335E-7, -3.614E-6, -1.838E-5, 0.0, 6.792E-6, 7.219E-6, 6.016E-6, 4.437E-6, 2.385E-6, 1.636E-6, 1.118E-6, 7.754E-7, 5.556E-7, -2.636E-6, -1.373E-5, 0.0, 5.059E-6, 5.388E-6, 4.437E-6, 3.602E-6, 1.917E-6, 1.322E-6, 9.079E-7, 6.529E-7, 4.752E-7, -1.286E-6, -7.334E-6, 0.0, 2.678E-6, 2.853E-6, 2.385E-6, 1.917E-6, 1.375E-6, 9.1E-7, 6.455E-7, 4.693E-7, 3.657E-7, -8.41E-7, -5.088E-6, 0.0, 1.816E-6, 1.944E-6, 1.636E-6, 1.322E-6, 9.1E-7, 9.115E-7, 6.062E-7, 4.436E-7, 3.422E-7, -5.296E-7, -3.745E-6, 0.0, 1.223E-6, 1.324E-6, 1.118E-6, 9.079E-7, 6.455E-7, 6.062E-7, 7.217E-7, 4.862E-7, 3.768E-7, 0.0, -2.428E-6, 0.0, 8.245E-7, 9.083E-7, 7.754E-7, 6.529E-7, 4.693E-7, 4.436E-7, 4.862E-7, 6.509E-7, 4.418E-7, 0.0, 0.0, 0.0, 5.589E-7, 6.335E-7, 5.556E-7, 4.752E-7, 3.657E-7, 3.422E-7, 3.768E-7, 4.418E-7, 6.142E-7 ), + ESdigiCollection = cms.string( "" ), + EBdigiCollection = cms.string( "" ), + EBCorrNoiseMatrixG01 = cms.vdouble( 1.0, 0.73354, 0.64442, 0.58851, 0.55425, 0.53082, 0.51916, 0.51097, 0.50732, 0.50409 ), + EBCorrNoiseMatrixG12 = cms.vdouble( 1.0, 0.71073, 0.55721, 0.46089, 0.40449, 0.35931, 0.33924, 0.32439, 0.31581, 0.30481 ), + EBCorrNoiseMatrixG06 = cms.vdouble( 1.0, 0.70946, 0.58021, 0.49846, 0.45006, 0.41366, 0.39699, 0.38478, 0.37847, 0.37055 ), + EEPulseShapeCovariance = cms.vdouble( 3.941E-5, 3.333E-5, 0.0, -1.449E-5, -1.661E-5, -1.424E-5, -1.183E-5, -6.842E-6, -4.915E-6, -3.411E-6, 0.0, 0.0, 3.333E-5, 2.862E-5, 0.0, -1.244E-5, -1.431E-5, -1.233E-5, -1.032E-5, -5.883E-6, -4.154E-6, -2.902E-6, -2.128E-6, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.449E-5, -1.244E-5, 0.0, 5.84E-6, 6.649E-6, 5.72E-6, 4.812E-6, 2.708E-6, 1.869E-6, 1.33E-6, 9.186E-7, 6.446E-7, -1.661E-5, -1.431E-5, 0.0, 6.649E-6, 7.966E-6, 6.898E-6, 5.794E-6, 3.157E-6, 2.184E-6, 1.567E-6, 1.084E-6, 7.575E-7, -1.424E-5, -1.233E-5, 0.0, 5.72E-6, 6.898E-6, 6.341E-6, 5.347E-6, 2.859E-6, 1.991E-6, 1.431E-6, 9.839E-7, 6.886E-7, -1.183E-5, -1.032E-5, 0.0, 4.812E-6, 5.794E-6, 5.347E-6, 4.854E-6, 2.628E-6, 1.809E-6, 1.289E-6, 9.02E-7, 6.146E-7, -6.842E-6, -5.883E-6, 0.0, 2.708E-6, 3.157E-6, 2.859E-6, 2.628E-6, 1.863E-6, 1.296E-6, 8.882E-7, 6.108E-7, 4.283E-7, -4.915E-6, -4.154E-6, 0.0, 1.869E-6, 2.184E-6, 1.991E-6, 1.809E-6, 1.296E-6, 1.217E-6, 8.669E-7, 5.751E-7, 3.882E-7, -3.411E-6, -2.902E-6, 0.0, 1.33E-6, 1.567E-6, 1.431E-6, 1.289E-6, 8.882E-7, 8.669E-7, 9.522E-7, 6.717E-7, 4.293E-7, 0.0, -2.128E-6, 0.0, 9.186E-7, 1.084E-6, 9.839E-7, 9.02E-7, 6.108E-7, 5.751E-7, 6.717E-7, 7.911E-7, 5.493E-7, 0.0, 0.0, 0.0, 6.446E-7, 7.575E-7, 6.886E-7, 6.146E-7, 4.283E-7, 3.882E-7, 4.293E-7, 5.493E-7, 7.027E-7 ), + EBPulseShapeTemplate = cms.vdouble( 0.0113979, 0.758151, 1.0, 0.887744, 0.673548, 0.474332, 0.319561, 0.215144, 0.147464, 0.101087, 0.0693181, 0.0475044 ), + EECorrNoiseMatrixG01 = cms.vdouble( 1.0, 0.72698, 0.62048, 0.55691, 0.51848, 0.49147, 0.47813, 0.47007, 0.46621, 0.46265 ), + EECorrNoiseMatrixG12 = cms.vdouble( 1.0, 0.71373, 0.44825, 0.30152, 0.21609, 0.14786, 0.11772, 0.10165, 0.09465, 0.08098 ), + UseLCcorrection = cms.untracked.bool( True ), + EECorrNoiseMatrixG06 = cms.vdouble( 1.0, 0.71217, 0.47464, 0.34056, 0.26282, 0.20287, 0.17734, 0.16256, 0.15618, 0.14443 ) + ), + doPrefitEB = cms.bool( False ), + addPedestalUncertaintyEE = cms.double( 0.0 ), + addPedestalUncertaintyEB = cms.double( 0.0 ), + gainSwitchUseMaxSampleEB = cms.bool( True ), + EEtimeNconst = cms.double( 31.8 ), + EEamplitudeFitParameters = cms.vdouble( 1.89, 1.4 ), + chi2ThreshEE_ = cms.double( 50.0 ), + eePulseShape = cms.vdouble( 5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194 ), + outOfTimeThresholdGain12pEB = cms.double( 5.0 ), + gainSwitchUseMaxSampleEE = cms.bool( False ), + mitigateBadSamplesEB = cms.bool( False ), + outOfTimeThresholdGain12pEE = cms.double( 1000.0 ), + ebPulseShape = cms.vdouble( 5.2E-5, -5.26E-5, 6.66E-5, 0.1168, 0.7575, 1.0, 0.8876, 0.6732, 0.4741, 0.3194 ), + ampErrorCalculation = cms.bool( False ), + mitigateBadSamplesEE = cms.bool( False ), + amplitudeThresholdEB = cms.double( 10.0 ), + kPoorRecoFlagEB = cms.bool( True ), + amplitudeThresholdEE = cms.double( 10.0 ), + EBtimeFitLimits_Lower = cms.double( 0.2 ), + kPoorRecoFlagEE = cms.bool( False ), + EEtimeFitLimits_Upper = cms.double( 1.4 ), + outOfTimeThresholdGain61mEE = cms.double( 1000.0 ), + EEtimeConstantTerm = cms.double( 1.0 ), + EBtimeConstantTerm = cms.double( 0.6 ), + chi2ThreshEB_ = cms.double( 65.0 ), + outOfTimeThresholdGain61mEB = cms.double( 5.0 ) +) + +## + + + +#process.load('Configuration.StandardSequences.Reconstruction_cff') +#process.ecalRecHit + + + +#process.load("RecoLocalCalo.EcalRecProducers.ecalRecHitGPU_cfi") +#process.ecalRecHitGPU + + + +#process.hcalDigis.silent = cms.untracked.bool(False) +#process.hcalDigis.InputLabel = rawTag +process.ecalDigis = process.ecalEBunpacker.clone() +process.ecalDigis.InputLabel = cms.InputTag('rawDataCollector') +#process.hbheprerecogpu.processQIE11 = cms.bool(True) + +process.out = cms.OutputModule( + "PoolOutputModule", + fileName = cms.untracked.string("test_uncalib.root") +) + +#process.out = cms.OutputModule("AsciiOutputModule", +# outputCommands = cms.untracked.vstring( +# 'keep *_ecalMultiFitUncalibRecHit_*_*', +# ), +# verbosity = cms.untracked.uint32(0) +#) +process.finalize = cms.EndPath(process.out) + +process.bunchSpacing = cms.Path( + process.bunchSpacingProducer +) + +process.digiPath = cms.Path( + #process.hcalDigis + process.ecalDigis + *process.ecalRawToDigiGPU +) + +process.recoPath = cms.Path( + #(process.ecalMultiFitUncalibRecHit+process.ecalDetIdToBeRecovered) + process.ecalMultiFitUncalibRecHit + #*process.ecalRecHit +# gpu + *process.ecalUncalibRecHitProducerGPU + *process.ecalCPUUncalibRecHitProducer + #*process.ecalRecHitGPU +) + +process.schedule = cms.Schedule( + process.bunchSpacing, + process.digiPath, + process.recoPath, +# process.ecalecalLocalRecoSequence + process.finalize +) + +process.options = cms.untracked.PSet( + numberOfThreads = cms.untracked.uint32(8), + numberOfStreams = cms.untracked.uint32(8), + SkipEvent = cms.untracked.vstring('ProductNotFound'), + wantSummary = cms.untracked.bool(True) +) + +# report CUDAService messages +process.MessageLogger.categories.append("CUDAService") + +