diff --git a/CUDADataFormats/EcalDigi/BuildFile.xml b/CUDADataFormats/EcalDigi/BuildFile.xml
new file mode 100644
index 0000000000000..b7d25b0872646
--- /dev/null
+++ b/CUDADataFormats/EcalDigi/BuildFile.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/EcalDigi/interface/DigisCollection.h b/CUDADataFormats/EcalDigi/interface/DigisCollection.h
new file mode 100644
index 0000000000000..f471dbfb9fac8
--- /dev/null
+++ b/CUDADataFormats/EcalDigi/interface/DigisCollection.h
@@ -0,0 +1,24 @@
+#ifndef CUDADataFormats_EcalDigi_interface_DigisCollection_h
+#define CUDADataFormats_EcalDigi_interface_DigisCollection_h
+
+#include "CUDADataFormats/CaloCommon/interface/Common.h"
+
+namespace ecal {
+
+ template
+ struct DigisCollection : public ::calo::common::AddSize {
+ DigisCollection() = default;
+ DigisCollection(DigisCollection const &) = default;
+ DigisCollection &operator=(DigisCollection const &) = default;
+
+ DigisCollection(DigisCollection &&) = default;
+ DigisCollection &operator=(DigisCollection &&) = default;
+
+ // stride is statically known
+ typename StoragePolicy::template StorageSelector::type ids;
+ typename StoragePolicy::template StorageSelector::type data;
+ };
+
+} // namespace ecal
+
+#endif // CUDADataFormats_EcalDigi_interface_DigisCollection_h
diff --git a/CUDADataFormats/EcalDigi/src/classes.h b/CUDADataFormats/EcalDigi/src/classes.h
new file mode 100644
index 0000000000000..cd60b775e229b
--- /dev/null
+++ b/CUDADataFormats/EcalDigi/src/classes.h
@@ -0,0 +1,3 @@
+#include "CUDADataFormats/Common/interface/Product.h"
+#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h"
+#include "DataFormats/Common/interface/Wrapper.h"
diff --git a/CUDADataFormats/EcalDigi/src/classes_def.xml b/CUDADataFormats/EcalDigi/src/classes_def.xml
new file mode 100644
index 0000000000000..6a3adfe4b41c5
--- /dev/null
+++ b/CUDADataFormats/EcalDigi/src/classes_def.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff --git a/CUDADataFormats/EcalRecHitSoA/BuildFile.xml b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml
new file mode 100644
index 0000000000000..a684d9a23f1c6
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/BuildFile.xml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h b/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h
new file mode 100644
index 0000000000000..731b8b801407f
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h
@@ -0,0 +1,45 @@
+#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h
+#define CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h
+
+#include
+#include
+
+#include "CUDADataFormats/CaloCommon/interface/Common.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
+
+namespace ecal {
+
+ template
+ struct RecHit : public ::calo::common::AddSize {
+ RecHit() = default;
+ RecHit(const RecHit&) = default;
+ RecHit& operator=(const RecHit&) = default;
+
+ RecHit(RecHit&&) = default;
+ RecHit& operator=(RecHit&&) = default;
+
+ typename StoragePolicy::template StorageSelector::type energy;
+ typename StoragePolicy::template StorageSelector::type time;
+ // should we remove the following, since already included in "extra" ?
+ typename StoragePolicy::template StorageSelector::type chi2;
+ typename StoragePolicy::template StorageSelector::type
+ extra; // packed uint32_t for timeError, chi2, energyError
+ typename StoragePolicy::template StorageSelector::type
+ flagBits; // store rechit condition (see Flags enum) in a bit-wise way
+ typename StoragePolicy::template StorageSelector::type did;
+
+ template
+ typename std::enable_if::value, void>::type resize(size_t size) {
+ energy.resize(size);
+ time.resize(size);
+ chi2.resize(size);
+ extra.resize(size);
+ flagBits.resize(size);
+ did.resize(size);
+ }
+ };
+
+} // namespace ecal
+
+#endif // CUDADataFormats_EcalRecHitSoA_interface_EcalRecHit_h
diff --git a/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h
new file mode 100644
index 0000000000000..78c909b029dc1
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h
@@ -0,0 +1,46 @@
+#ifndef CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h
+#define CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h
+
+#include
+#include
+
+#include "CUDADataFormats/CaloCommon/interface/Common.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h"
+#include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
+
+namespace ecal {
+
+ template
+ struct UncalibratedRecHit : public ::calo::common::AddSize {
+ UncalibratedRecHit() = default;
+ UncalibratedRecHit(const UncalibratedRecHit&) = default;
+ UncalibratedRecHit& operator=(const UncalibratedRecHit&) = default;
+
+ UncalibratedRecHit(UncalibratedRecHit&&) = default;
+ UncalibratedRecHit& operator=(UncalibratedRecHit&&) = default;
+
+ typename StoragePolicy::template StorageSelector::type amplitudesAll;
+ typename StoragePolicy::template StorageSelector::type amplitude;
+ typename StoragePolicy::template StorageSelector::type chi2;
+ typename StoragePolicy::template StorageSelector::type pedestal;
+ typename StoragePolicy::template StorageSelector::type jitter;
+ typename StoragePolicy::template StorageSelector::type jitterError;
+ typename StoragePolicy::template StorageSelector::type did;
+ typename StoragePolicy::template StorageSelector::type flags;
+
+ template
+ typename std::enable_if::value, void>::type resize(size_t size) {
+ amplitudesAll.resize(size * EcalDataFrame::MAXSAMPLES);
+ amplitude.resize(size);
+ pedestal.resize(size);
+ chi2.resize(size);
+ did.resize(size);
+ flags.resize(size);
+ jitter.resize(size);
+ jitterError.resize(size);
+ }
+ };
+
+} // namespace ecal
+
+#endif // CUDADataFormats_EcalRecHitSoA_interface_EcalUncalibratedRecHit_h
diff --git a/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h
new file mode 100644
index 0000000000000..87c4252a5e949
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/interface/RecoTypes.h
@@ -0,0 +1,13 @@
+#ifndef CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h
+#define CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h
+
+namespace ecal {
+ namespace reco {
+
+ using ComputationScalarType = float;
+ using StorageScalarType = float;
+
+ } // namespace reco
+} // namespace ecal
+
+#endif // CUDADataFormats_EcalRecHitSoA_interface_RecoTypes_h
diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes.h b/CUDADataFormats/EcalRecHitSoA/src/classes.h
new file mode 100644
index 0000000000000..ef95da461e3ba
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/src/classes.h
@@ -0,0 +1,4 @@
+#include "CUDADataFormats/Common/interface/Product.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/EcalRecHit.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h"
+#include "DataFormats/Common/interface/Wrapper.h"
diff --git a/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml
new file mode 100644
index 0000000000000..27e315b2c2822
--- /dev/null
+++ b/CUDADataFormats/EcalRecHitSoA/src/classes_def.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Configuration/StandardSequences/python/RawToDigi_cff.py b/Configuration/StandardSequences/python/RawToDigi_cff.py
index dd3bf675faf0d..102e8b1132f71 100644
--- a/Configuration/StandardSequences/python/RawToDigi_cff.py
+++ b/Configuration/StandardSequences/python/RawToDigi_cff.py
@@ -1,4 +1,5 @@
import FWCore.ParameterSet.Config as cms
+from Configuration.ProcessModifiers.gpu_cff import gpu
# This object is used to selectively make changes for different running
# scenarios. In this case it makes changes for Run 2.
@@ -74,7 +75,7 @@
scalersRawToDigi.scalersInputTag = 'rawDataCollector'
siPixelDigis.cpu.InputLabel = 'rawDataCollector'
-ecalDigis.InputLabel = 'rawDataCollector'
+(~gpu).toModify(ecalDigis, InputLabel='rawDataCollector')
ecalPreshowerDigis.sourceTag = 'rawDataCollector'
hcalDigis.InputLabel = 'rawDataCollector'
muonCSCDigis.InputObjects = 'rawDataCollector'
diff --git a/EventFilter/EcalRawToDigi/BuildFile.xml b/EventFilter/EcalRawToDigi/BuildFile.xml
index 88f864737813e..2ac1b25233910 100644
--- a/EventFilter/EcalRawToDigi/BuildFile.xml
+++ b/EventFilter/EcalRawToDigi/BuildFile.xml
@@ -1,14 +1,18 @@
-
-
+
+
+
+
+
+
diff --git a/EventFilter/EcalRawToDigi/bin/BuildFile.xml b/EventFilter/EcalRawToDigi/bin/BuildFile.xml
new file mode 100644
index 0000000000000..792fe438d8799
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/bin/BuildFile.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
diff --git a/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp b/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp
new file mode 100644
index 0000000000000..609c277e19288
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/bin/makeEcalRaw2DigiGpuValidationPlots.cpp
@@ -0,0 +1,210 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout << "run with: ./ \n";
+ exit(0);
+ }
+
+ // branches to use
+ edm::Wrapper*wgpuEB = nullptr, *wcpuEB = nullptr;
+ edm::Wrapper*wgpuEE = nullptr, *wcpuEE = nullptr;
+
+ std::string inFileName{argv[1]};
+ std::string outFileName{argv[2]};
+
+ // prep output
+ TFile rfout{outFileName.c_str(), "recreate"};
+
+ int const nbins = 400;
+ float const last = 4096.;
+ auto hADCEBGPU = new TH1D("hADCEBGPU", "hADCEBGPU", nbins, 0, last);
+ auto hADCEBCPU = new TH1D("hADCEBCPU", "hADCEBCPU", nbins, 0, last);
+ auto hADCEEGPU = new TH1D("hADCEEGPU", "hADCEEGPU", nbins, 0, last);
+ auto hADCEECPU = new TH1D("hADCEECPU", "hADCEECPU", nbins, 0, last);
+
+ auto hGainEBGPU = new TH1D("hGainEBGPU", "hGainEBGPU", 4, 0, 4);
+ auto hGainEBCPU = new TH1D("hGainEBCPU", "hGainEBCPU", 4, 0, 4);
+ auto hGainEEGPU = new TH1D("hGainEEGPU", "hGainEEGPU", 4, 0, 4);
+ auto hGainEECPU = new TH1D("hGainEECPU", "hGainEECPU", 4, 0, 4);
+
+ auto hADCEBGPUvsCPU = new TH2D("hADCEBGPUvsCPU", "hADCEBGPUvsCPU", nbins, 0, last, nbins, 0, last);
+ auto hADCEEGPUvsCPU = new TH2D("hADCEEGPUvsCPU", "hADCEEGPUvsCPU", nbins, 0, last, nbins, 0, last);
+ auto hGainEBGPUvsCPU = new TH2D("hGainEBGPUvsCPU", "hGainEBGPUvsCPU", 4, 0, 4, 4, 0, 4);
+ auto hGainEEGPUvsCPU = new TH2D("hGainEEGPUvsCPU", "hGainEEGPUvsCPU", 4, 0, 4, 4, 0, 4);
+
+ // prep input
+ TFile rfin{inFileName.c_str()};
+ TTree* rt = (TTree*)rfin.Get("Events");
+ rt->SetBranchAddress("EBDigiCollection_ecalCPUDigisProducer_ebDigis_RECO.", &wgpuEB);
+ rt->SetBranchAddress("EEDigiCollection_ecalCPUDigisProducer_eeDigis_RECO.", &wgpuEE);
+ rt->SetBranchAddress("EBDigiCollection_ecalDigis_ebDigis_RECO.", &wcpuEB);
+ rt->SetBranchAddress("EEDigiCollection_ecalDigis_eeDigis_RECO.", &wcpuEE);
+
+ // accumulate
+ auto const nentries = rt->GetEntries();
+ std::cout << ">>> nentries = " << nentries << std::endl;
+ for (int ie = 0; ie < nentries; ++ie) {
+ rt->GetEntry(ie);
+
+ auto const ngpuebs = wgpuEB->bareProduct().size();
+ auto const ncpuebs = wcpuEB->bareProduct().size();
+ auto const ngpuees = wgpuEE->bareProduct().size();
+ auto const ncpuees = wcpuEE->bareProduct().size();
+
+ if (ngpuebs != ncpuebs or ngpuees != ncpuees) {
+ std::cerr << "*** mismatch in ndigis: "
+ << "ie = " << ie << " ngpuebs = " << ngpuebs << " ncpuebs = " << ncpuebs << " ngpuees = " << ngpuees
+ << " ncpuees = " << ncpuees << std::endl;
+
+ // this is a must for now
+ //assert(ngpuebs==ncpuebs);
+ //assert(ngpuees==ncpuees);
+ }
+
+ // assume identical sizes
+ auto const& idsgpuEB = wgpuEB->bareProduct().ids();
+ auto const& datagpuEB = wgpuEB->bareProduct().data();
+ auto const& idscpuEB = wcpuEB->bareProduct().ids();
+ auto const& datacpuEB = wcpuEB->bareProduct().data();
+ for (uint32_t ieb = 0; ieb < ngpuebs; ++ieb) {
+ auto const& idgpu = idsgpuEB[ieb];
+ auto iter2idcpu = std::find(idscpuEB.begin(), idscpuEB.end(), idgpu);
+ // FIXME
+ assert(idgpu == *iter2idcpu);
+
+ auto const ptrdiff = iter2idcpu - idscpuEB.begin();
+ for (uint32_t s = 0u; s < 10u; s++) {
+ EcalMGPASample sampleGPU{datagpuEB[ieb * 10 + s]};
+ EcalMGPASample sampleCPU{datacpuEB[ptrdiff * 10 + s]};
+
+ hADCEBGPU->Fill(sampleGPU.adc());
+ hGainEBGPU->Fill(sampleGPU.gainId());
+ hADCEBCPU->Fill(sampleCPU.adc());
+ hGainEBCPU->Fill(sampleCPU.gainId());
+ hADCEBGPUvsCPU->Fill(sampleCPU.adc(), sampleGPU.adc());
+ hGainEBGPUvsCPU->Fill(sampleCPU.gainId(), sampleGPU.gainId());
+ }
+ }
+
+ auto const& idsgpuEE = wgpuEE->bareProduct().ids();
+ auto const& datagpuEE = wgpuEE->bareProduct().data();
+ auto const& idscpuEE = wcpuEE->bareProduct().ids();
+ auto const& datacpuEE = wcpuEE->bareProduct().data();
+ for (uint32_t iee = 0; iee < ngpuees; ++iee) {
+ auto const& idgpu = idsgpuEE[iee];
+ auto iter2idcpu = std::find(idscpuEE.begin(), idscpuEE.end(), idgpu);
+ // FIXME
+ assert(idgpu == *iter2idcpu);
+
+ // get the digis
+ auto const ptrdiff = iter2idcpu - idscpuEE.begin();
+ for (uint32_t s = 0u; s < 10u; s++) {
+ EcalMGPASample sampleGPU{datagpuEE[iee * 10 + s]};
+ EcalMGPASample sampleCPU{datacpuEE[ptrdiff * 10 + s]};
+
+ hADCEEGPU->Fill(sampleGPU.adc());
+ hGainEEGPU->Fill(sampleGPU.gainId());
+ hADCEECPU->Fill(sampleCPU.adc());
+ hGainEECPU->Fill(sampleCPU.gainId());
+ hADCEEGPUvsCPU->Fill(sampleCPU.adc(), sampleGPU.adc());
+ hGainEEGPUvsCPU->Fill(sampleCPU.gainId(), sampleGPU.gainId());
+ }
+ }
+ }
+
+ {
+ TCanvas c{"plots", "plots", 4200, 6200};
+ c.Divide(2, 4);
+ c.cd(1);
+ {
+ gPad->SetLogy();
+ hADCEBCPU->SetLineColor(kBlack);
+ hADCEBCPU->SetLineWidth(1.);
+ hADCEBCPU->Draw("");
+ hADCEBGPU->SetLineColor(kBlue);
+ hADCEBGPU->SetLineWidth(1.);
+ hADCEBGPU->Draw("sames");
+ gPad->Update();
+ auto stats = (TPaveStats*)hADCEBGPU->FindObject("stats");
+ auto y2 = stats->GetY2NDC();
+ auto y1 = stats->GetY1NDC();
+ stats->SetY2NDC(y1);
+ stats->SetY1NDC(y1 - (y2 - y1));
+ }
+ c.cd(2);
+ {
+ gPad->SetLogy();
+ hADCEECPU->SetLineColor(kBlack);
+ hADCEECPU->SetLineWidth(1.);
+ hADCEECPU->Draw("");
+ hADCEEGPU->SetLineColor(kBlue);
+ hADCEEGPU->SetLineWidth(1.);
+ hADCEEGPU->Draw("sames");
+ gPad->Update();
+ auto stats = (TPaveStats*)hADCEEGPU->FindObject("stats");
+ auto y2 = stats->GetY2NDC();
+ auto y1 = stats->GetY1NDC();
+ stats->SetY2NDC(y1);
+ stats->SetY1NDC(y1 - (y2 - y1));
+ }
+ c.cd(3);
+ {
+ gPad->SetLogy();
+ hGainEBCPU->SetLineColor(kBlack);
+ hGainEBCPU->SetLineWidth(1.);
+ hGainEBCPU->Draw("");
+ hGainEBGPU->SetLineColor(kBlue);
+ hGainEBGPU->SetLineWidth(1.);
+ hGainEBGPU->Draw("sames");
+ gPad->Update();
+ auto stats = (TPaveStats*)hGainEBGPU->FindObject("stats");
+ auto y2 = stats->GetY2NDC();
+ auto y1 = stats->GetY1NDC();
+ stats->SetY2NDC(y1);
+ stats->SetY1NDC(y1 - (y2 - y1));
+ }
+ c.cd(4);
+ {
+ gPad->SetLogy();
+ hGainEECPU->SetLineColor(kBlack);
+ hGainEECPU->SetLineWidth(1.);
+ hGainEECPU->Draw("");
+ hGainEEGPU->SetLineColor(kBlue);
+ hGainEEGPU->SetLineWidth(1.);
+ hGainEEGPU->Draw("sames");
+ gPad->Update();
+ auto stats = (TPaveStats*)hGainEEGPU->FindObject("stats");
+ auto y2 = stats->GetY2NDC();
+ auto y1 = stats->GetY1NDC();
+ stats->SetY2NDC(y1);
+ stats->SetY1NDC(y1 - (y2 - y1));
+ }
+ c.cd(5);
+ hADCEBGPUvsCPU->Draw("colz");
+ c.cd(6);
+ hADCEEGPUvsCPU->Draw("colz");
+ c.cd(7);
+ hGainEBGPUvsCPU->Draw("colz");
+ c.cd(8);
+ hGainEEGPUvsCPU->Draw("colz");
+ c.SaveAs("plots.pdf");
+ }
+
+ rfin.Close();
+ rfout.Write();
+ rfout.Close();
+}
diff --git a/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h b/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h
index fa6e9f5d5a161..38a9ebdf18cb8 100644
--- a/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h
+++ b/EventFilter/EcalRawToDigi/interface/EcalRegionCabling.h
@@ -1,14 +1,11 @@
-#ifndef EcalRegionCabling_H
-#define EcalRegionCabling_H
+#ifndef EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h
+#define EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h
-#include "Geometry/EcalMapping/interface/EcalElectronicsMapping.h"
-#include "Geometry/EcalMapping/interface/ESElectronicsMapper.h"
-
-#include "DataFormats/EcalRecHit/interface/EcalRecHit.h"
-#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "DataFormats/FEDRawData/interface/FEDNumbering.h"
-
#include "FWCore/MessageLogger/interface/MessageLogger.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "Geometry/EcalMapping/interface/ESElectronicsMapper.h"
+#include "Geometry/EcalMapping/interface/EcalElectronicsMapping.h"
class EcalRegionCabling {
public:
@@ -73,4 +70,4 @@ class EcalRegionCabling {
const ESElectronicsMapper* es_mapping_;
};
-#endif
+#endif // EventFilter_EcalRawToDigi_interface_EcalRegionCabling_h
diff --git a/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h b/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h
new file mode 100644
index 0000000000000..abedcf5a2d479
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h
@@ -0,0 +1,91 @@
+#ifndef EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h
+#define EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h
+
+#include
+
+#include "DataFormats/EcalDetId/interface/EcalSubdetector.h"
+
+namespace ecal {
+ namespace raw {
+
+ /** \brief Ecal readout channel identification
+ [32:20] Unused (so far)
+ [19:13] DCC id
+ [12:6] tower
+ [5:3] strip
+ [2:0] xtal
+ Index starts from 1
+ */
+
+ class ElectronicsIdGPU {
+ public:
+ /** Default constructor -- invalid value */
+ constexpr ElectronicsIdGPU() : id_{0xFFFFFFFFu} {}
+ /** from raw */
+ constexpr ElectronicsIdGPU(uint32_t id) : id_{id} {}
+ /** Constructor from dcc,tower,channel **/
+ constexpr ElectronicsIdGPU(uint8_t const dccid, uint8_t const towerid, uint8_t const stripid, uint8_t const xtalid)
+ : id_{static_cast((xtalid & 0x7) | ((stripid & 0x7) << 3) | ((towerid & 0x7F) << 6) |
+ ((dccid & 0x7F) << 13))} {}
+
+ constexpr uint32_t operator()() { return id_; }
+ constexpr uint32_t rawId() const { return id_; }
+
+ /// get the DCC (Ecal Local DCC value not global one) id
+ constexpr uint8_t dccId() const { return (id_ >> 13) & 0x7F; }
+ /// get the tower id
+ constexpr uint8_t towerId() const { return (id_ >> 6) & 0x7F; }
+ /// get the tower id
+ constexpr uint8_t stripId() const { return (id_ >> 3) & 0x7; }
+ /// get the channel id
+ constexpr uint8_t xtalId() const { return (id_ & 0x7); }
+
+ /// get the subdet
+ //EcalSubdetector subdet() const;
+
+ /// get a fast, compact, unique index for linear lookups (maximum value = 4194303)
+ constexpr uint32_t linearIndex() const { return id_ & 0x3FFFFF; }
+
+ /// so far for EndCap only :
+ //int channelId() const; // xtal id between 1 and 25
+
+ static constexpr int kTowersInPhi = 4; // see EBDetId
+ static constexpr int kCrystalsInPhi = 20; // see EBDetId
+
+ static constexpr uint8_t MAX_DCCID = 54; //To be updated with correct and final number
+ static constexpr uint8_t MIN_DCCID = 1;
+ static constexpr uint8_t MAX_TOWERID = 70;
+ static constexpr uint8_t MIN_TOWERID = 1;
+ static constexpr uint8_t MAX_STRIPID = 5;
+ static constexpr uint8_t MIN_STRIPID = 1;
+ static constexpr uint8_t MAX_CHANNELID = 25;
+ static constexpr uint8_t MIN_CHANNELID = 1;
+ static constexpr uint8_t MAX_XTALID = 5;
+ static constexpr uint8_t MIN_XTALID = 1;
+
+ static constexpr int MIN_DCCID_EEM = 1;
+ static constexpr int MAX_DCCID_EEM = 9;
+ static constexpr int MIN_DCCID_EBM = 10;
+ static constexpr int MAX_DCCID_EBM = 27;
+ static constexpr int MIN_DCCID_EBP = 28;
+ static constexpr int MAX_DCCID_EBP = 45;
+ static constexpr int MIN_DCCID_EEP = 46;
+ static constexpr int MAX_DCCID_EEP = 54;
+
+ static constexpr int DCCID_PHI0_EBM = 10;
+ static constexpr int DCCID_PHI0_EBP = 28;
+
+ static constexpr int kDCCChannelBoundary = 17;
+ static constexpr int DCC_EBM = 10; // id of the DCC in EB- which contains phi=0 deg.
+ static constexpr int DCC_EBP = 28; // id of the DCC in EB+ which contains phi=0 deg.
+ static constexpr int DCC_EEM = 1; // id of the DCC in EE- which contains phi=0 deg.
+ static constexpr int DCC_EEP = 46; // id of the DCC in EE+ which contains phi=0 deg.
+
+ private:
+ uint32_t id_;
+ };
+
+ } // namespace raw
+} // namespace ecal
+
+#endif // EventFilter_EcalRawToDigi_interface_ElectronicsIdGPU_h
diff --git a/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h b/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h
new file mode 100644
index 0000000000000..004821afe3ed8
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h
@@ -0,0 +1,47 @@
+#ifndef EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h
+#define EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h
+
+#include "CondFormats/EcalObjects/interface/EcalMappingElectronics.h"
+
+#ifndef __CUDACC__
+#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
+#include "HeterogeneousCore/CUDACore/interface/ESProduct.h"
+#endif // __CUDACC__
+
+namespace ecal {
+ namespace raw {
+
+ class ElectronicsMappingGPU {
+ public:
+ struct Product {
+ ~Product();
+ uint32_t* eid2did;
+ };
+
+#ifndef __CUDACC__
+
+ // rearrange pedestals
+ ElectronicsMappingGPU(EcalMappingElectronics const&);
+
+ // will call dealloation for Product thru ~Product
+ ~ElectronicsMappingGPU() = default;
+
+ // get device pointers
+ Product const& getProduct(cudaStream_t) const;
+
+ //
+ static std::string name() { return std::string{"ecalElectronicsMappingGPU"}; }
+
+ private:
+ // in the future, we need to arrange so to avoid this copy on the host
+ // store eb first then ee
+ std::vector> eid2did_;
+
+ cms::cuda::ESProduct product_;
+#endif // __CUDACC__
+ };
+
+ } // namespace raw
+} // namespace ecal
+
+#endif // EventFilter_EcalRawToDigi_interface_ElectronicsMappingGPU_h
diff --git a/EventFilter/EcalRawToDigi/plugins/BuildFile.xml b/EventFilter/EcalRawToDigi/plugins/BuildFile.xml
index e55f1bcaab660..c3c2bd988e2c3 100644
--- a/EventFilter/EcalRawToDigi/plugins/BuildFile.xml
+++ b/EventFilter/EcalRawToDigi/plugins/BuildFile.xml
@@ -1,10 +1,14 @@
-
+
+
-
+
-
+
+
+
+
diff --git a/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h b/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h
new file mode 100644
index 0000000000000..a6429121adc82
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/DeclsForKernels.h
@@ -0,0 +1,70 @@
+#ifndef EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h
+#define EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h
+
+#include
+
+#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h"
+#include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
+#include "EventFilter/EcalRawToDigi/interface/DCCRawDataDefinitions.h"
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
+
+namespace ecal {
+ namespace raw {
+
+ constexpr auto empty_event_size = EMPTYEVENTSIZE;
+ constexpr uint32_t nfeds_max = 54;
+ constexpr uint32_t nbytes_per_fed_max = 10 * 1024;
+
+ struct InputDataCPU {
+ cms::cuda::host::unique_ptr data;
+ cms::cuda::host::unique_ptr offsets;
+ cms::cuda::host::unique_ptr feds;
+ };
+
+ struct ConfigurationParameters {
+ uint32_t maxChannelsEE;
+ uint32_t maxChannelsEB;
+ };
+
+ struct OutputDataCPU {
+ // [0] - eb, [1] - ee
+ cms::cuda::host::unique_ptr nchannels;
+ };
+
+ struct OutputDataGPU {
+ DigisCollection<::calo::common::DevStoragePolicy> digisEB, digisEE;
+
+ void allocate(ConfigurationParameters const &config, cudaStream_t cudaStream) {
+ digisEB.data =
+ cms::cuda::make_device_unique(config.maxChannelsEB * EcalDataFrame::MAXSAMPLES, cudaStream);
+ digisEE.data =
+ cms::cuda::make_device_unique(config.maxChannelsEE * EcalDataFrame::MAXSAMPLES, cudaStream);
+ digisEB.ids = cms::cuda::make_device_unique(config.maxChannelsEB, cudaStream);
+ digisEE.ids = cms::cuda::make_device_unique(config.maxChannelsEE, cudaStream);
+ }
+ };
+
+ struct ScratchDataGPU {
+ // [0] = EB
+ // [1] = EE
+ cms::cuda::device::unique_ptr pChannelsCounter;
+ };
+
+ struct InputDataGPU {
+ cms::cuda::device::unique_ptr data;
+ cms::cuda::device::unique_ptr offsets;
+ cms::cuda::device::unique_ptr feds;
+ };
+
+ struct ConditionsProducts {
+ ElectronicsMappingGPU::Product const &eMappingProduct;
+ };
+
+ } // namespace raw
+} // namespace ecal
+
+#endif // EventFilter_EcalRawToDigi_plugins_DeclsForKernels_h
diff --git a/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc b/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc
new file mode 100644
index 0000000000000..5563dd5b52cc8
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/EcalCPUDigisProducer.cc
@@ -0,0 +1,196 @@
+#include
+
+#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h"
+#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h"
+#include "DataFormats/EcalDetId/interface/EcalDetIdCollections.h"
+#include "DataFormats/EcalDigi/interface/EcalDataFrame.h"
+#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
+#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
+#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Framework/interface/stream/EDProducer.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/HostAllocator.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+#include "DeclsForKernels.h"
+#include "UnpackGPU.h"
+
+class EcalCPUDigisProducer : public edm::stream::EDProducer {
+public:
+ explicit EcalCPUDigisProducer(edm::ParameterSet const& ps);
+ ~EcalCPUDigisProducer() override;
+ static void fillDescriptions(edm::ConfigurationDescriptions&);
+
+private:
+ void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override;
+ void produce(edm::Event&, edm::EventSetup const&) override;
+
+private:
+ // input digi collections in GPU-friendly format
+ using InputProduct = cms::cuda::Product>;
+ edm::EDGetTokenT digisInEBToken_;
+ edm::EDGetTokenT digisInEEToken_;
+
+ // output digi collections in legacy format
+ edm::EDPutTokenT digisOutEBToken_;
+ edm::EDPutTokenT digisOutEEToken_;
+
+ // whether to produce dummy integrity collections
+ bool produceDummyIntegrityCollections_;
+
+ // dummy SRP collections
+ edm::EDPutTokenT ebSrFlagToken_;
+ edm::EDPutTokenT eeSrFlagToken_;
+
+ // dummy integrity for xtal data
+ edm::EDPutTokenT ebIntegrityGainErrorsToken_;
+ edm::EDPutTokenT ebIntegrityGainSwitchErrorsToken_;
+ edm::EDPutTokenT ebIntegrityChIdErrorsToken_;
+
+ // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection)
+ edm::EDPutTokenT eeIntegrityGainErrorsToken_;
+ edm::EDPutTokenT eeIntegrityGainSwitchErrorsToken_;
+ edm::EDPutTokenT eeIntegrityChIdErrorsToken_;
+
+ // dummy integrity errors
+ edm::EDPutTokenT integrityTTIdErrorsToken_;
+ edm::EDPutTokenT integrityBlockSizeErrorsToken_;
+
+ // FIXME better way to pass pointers from acquire to produce?
+ std::vector> idsebtmp, idseetmp;
+ std::vector> dataebtmp, dataeetmp;
+};
+
+void EcalCPUDigisProducer::fillDescriptions(edm::ConfigurationDescriptions& confDesc) {
+ edm::ParameterSetDescription desc;
+
+ desc.add("digisInLabelEB", edm::InputTag{"ecalRawToDigiGPU", "ebDigis"});
+ desc.add("digisInLabelEE", edm::InputTag{"ecalRawToDigiGPU", "eeDigis"});
+ desc.add("digisOutLabelEB", "ebDigis");
+ desc.add("digisOutLabelEE", "eeDigis");
+
+ desc.add("produceDummyIntegrityCollections", false);
+
+ std::string label = "ecalCPUDigisProducer";
+ confDesc.add(label, desc);
+}
+
+EcalCPUDigisProducer::EcalCPUDigisProducer(const edm::ParameterSet& ps)
+ : // input digi collections in GPU-friendly format
+ digisInEBToken_{consumes(ps.getParameter("digisInLabelEB"))},
+ digisInEEToken_{consumes(ps.getParameter("digisInLabelEE"))},
+ // output digi collections in legacy format
+ digisOutEBToken_{produces(ps.getParameter("digisOutLabelEB"))},
+ digisOutEEToken_{produces(ps.getParameter("digisOutLabelEE"))},
+ // whether to produce dummy integrity collections
+ produceDummyIntegrityCollections_{ps.getParameter("produceDummyIntegrityCollections")},
+ // dummy SRP collections
+ ebSrFlagToken_{produceDummyIntegrityCollections_ ? produces()
+ : edm::EDPutTokenT{}},
+ eeSrFlagToken_{produceDummyIntegrityCollections_ ? produces()
+ : edm::EDPutTokenT{}},
+ // dummy integrity for xtal data
+ ebIntegrityGainErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityGainErrors")
+ : edm::EDPutTokenT{}},
+ ebIntegrityGainSwitchErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityGainSwitchErrors")
+ : edm::EDPutTokenT{}},
+ ebIntegrityChIdErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityChIdErrors")
+ : edm::EDPutTokenT{}},
+ // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection)
+ eeIntegrityGainErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityGainErrors")
+ : edm::EDPutTokenT{}},
+ eeIntegrityGainSwitchErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityGainSwitchErrors")
+ : edm::EDPutTokenT{}},
+ eeIntegrityChIdErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityChIdErrors")
+ : edm::EDPutTokenT{}},
+ // dummy integrity errors
+ integrityTTIdErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityTTIdErrors")
+ : edm::EDPutTokenT{}},
+ integrityBlockSizeErrorsToken_{produceDummyIntegrityCollections_
+ ? produces("EcalIntegrityBlockSizeErrors")
+ : edm::EDPutTokenT{}} {}
+
+EcalCPUDigisProducer::~EcalCPUDigisProducer() {}
+
+void EcalCPUDigisProducer::acquire(edm::Event const& event,
+ edm::EventSetup const& setup,
+ edm::WaitingTaskWithArenaHolder taskHolder) {
+ // retrieve data/ctx
+ auto const& ebdigisProduct = event.get(digisInEBToken_);
+ auto const& eedigisProduct = event.get(digisInEEToken_);
+ cms::cuda::ScopedContextAcquire ctx{ebdigisProduct, std::move(taskHolder)};
+ auto const& ebdigis = ctx.get(ebdigisProduct);
+ auto const& eedigis = ctx.get(eedigisProduct);
+
+ // resize tmp buffers
+ dataebtmp.resize(ebdigis.size * EcalDataFrame::MAXSAMPLES);
+ dataeetmp.resize(eedigis.size * EcalDataFrame::MAXSAMPLES);
+ idsebtmp.resize(ebdigis.size);
+ idseetmp.resize(eedigis.size);
+
+ // enqeue transfers
+ cudaCheck(cudaMemcpyAsync(
+ dataebtmp.data(), ebdigis.data.get(), dataebtmp.size() * sizeof(uint16_t), cudaMemcpyDeviceToHost, ctx.stream()));
+ cudaCheck(cudaMemcpyAsync(
+ dataeetmp.data(), eedigis.data.get(), dataeetmp.size() * sizeof(uint16_t), cudaMemcpyDeviceToHost, ctx.stream()));
+ cudaCheck(cudaMemcpyAsync(
+ idsebtmp.data(), ebdigis.ids.get(), idsebtmp.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost, ctx.stream()));
+ cudaCheck(cudaMemcpyAsync(
+ idseetmp.data(), eedigis.ids.get(), idseetmp.size() * sizeof(uint32_t), cudaMemcpyDeviceToHost, ctx.stream()));
+}
+
+void EcalCPUDigisProducer::produce(edm::Event& event, edm::EventSetup const& setup) {
+ // output collections
+ auto digisEB = std::make_unique();
+ auto digisEE = std::make_unique();
+ digisEB->resize(idsebtmp.size());
+ digisEE->resize(idseetmp.size());
+
+ // cast constness away
+ // use pointers to buffers instead of move operator= semantics
+ // cause we have different allocators in there...
+ auto* dataEB = const_cast(digisEB->data().data());
+ auto* dataEE = const_cast(digisEE->data().data());
+ auto* idsEB = const_cast(digisEB->ids().data());
+ auto* idsEE = const_cast(digisEE->ids().data());
+
+ // copy data
+ std::memcpy(dataEB, dataebtmp.data(), dataebtmp.size() * sizeof(uint16_t));
+ std::memcpy(dataEE, dataeetmp.data(), dataeetmp.size() * sizeof(uint16_t));
+ std::memcpy(idsEB, idsebtmp.data(), idsebtmp.size() * sizeof(uint32_t));
+ std::memcpy(idsEE, idseetmp.data(), idseetmp.size() * sizeof(uint32_t));
+
+ event.put(digisOutEBToken_, std::move(digisEB));
+ event.put(digisOutEEToken_, std::move(digisEE));
+
+ if (produceDummyIntegrityCollections_) {
+ // dummy SRP collections
+ event.emplace(ebSrFlagToken_);
+ event.emplace(eeSrFlagToken_);
+ // dummy integrity for xtal data
+ event.emplace(ebIntegrityGainErrorsToken_);
+ event.emplace(ebIntegrityGainSwitchErrorsToken_);
+ event.emplace(ebIntegrityChIdErrorsToken_);
+ // dummy integrity for xtal data - EE specific (to be rivisited towards EB+EE common collection)
+ event.emplace(eeIntegrityGainErrorsToken_);
+ event.emplace(eeIntegrityGainSwitchErrorsToken_);
+ event.emplace(eeIntegrityChIdErrorsToken_);
+ // dummy integrity errors
+ event.emplace(integrityTTIdErrorsToken_);
+ event.emplace(integrityBlockSizeErrorsToken_);
+ }
+}
+
+DEFINE_FWK_MODULE(EcalCPUDigisProducer);
diff --git a/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc b/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc
new file mode 100644
index 0000000000000..84fcc7b2b2952
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/EcalRawESProducersGPUDefs.cc
@@ -0,0 +1,9 @@
+#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h"
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "HeterogeneousCore/CUDACore/interface/ConvertingESProducerT.h"
+
+using EcalElectronicsMappingGPUESProducer =
+ ConvertingESProducerT;
+
+DEFINE_FWK_EVENTSETUP_MODULE(EcalElectronicsMappingGPUESProducer);
diff --git a/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc b/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc
new file mode 100644
index 0000000000000..4f0743c9b1b51
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/EcalRawToDigiGPU.cc
@@ -0,0 +1,155 @@
+#include "CUDADataFormats/EcalDigi/interface/DigisCollection.h"
+#include "CondFormats/DataRecord/interface/EcalMappingElectronicsRcd.h"
+#include "DataFormats/EcalDigi/interface/EcalDigiCollections.h"
+#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h"
+#include "FWCore/Framework/interface/Event.h"
+#include "FWCore/Framework/interface/EventSetup.h"
+#include "FWCore/Framework/interface/MakerMacros.h"
+#include "FWCore/Framework/interface/stream/EDProducer.h"
+#include "FWCore/ParameterSet/interface/ParameterSet.h"
+#include "FWCore/Utilities/interface/ESGetToken.h"
+#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+#include "DeclsForKernels.h"
+#include "UnpackGPU.h"
+
+class EcalRawToDigiGPU : public edm::stream::EDProducer {
+public:
+ explicit EcalRawToDigiGPU(edm::ParameterSet const& ps);
+ ~EcalRawToDigiGPU() override;
+ static void fillDescriptions(edm::ConfigurationDescriptions&);
+
+private:
+ void acquire(edm::Event const&, edm::EventSetup const&, edm::WaitingTaskWithArenaHolder) override;
+ void produce(edm::Event&, edm::EventSetup const&) override;
+
+private:
+ edm::EDGetTokenT rawDataToken_;
+ using OutputProduct = cms::cuda::Product>;
+ edm::EDPutTokenT digisEBToken_, digisEEToken_;
+ edm::ESGetToken eMappingToken_;
+
+ cms::cuda::ContextState cudaState_;
+
+ std::vector fedsToUnpack_;
+
+ ecal::raw::ConfigurationParameters config_;
+ ecal::raw::OutputDataGPU outputGPU_;
+ ecal::raw::OutputDataCPU outputCPU_;
+};
+
+void EcalRawToDigiGPU::fillDescriptions(edm::ConfigurationDescriptions& confDesc) {
+ edm::ParameterSetDescription desc;
+
+ desc.add("InputLabel", edm::InputTag("rawDataCollector"));
+ std::vector feds(54);
+ for (uint32_t i = 0; i < 54; ++i)
+ feds[i] = i + 601;
+ desc.add>("FEDs", feds);
+ desc.add("maxChannelsEB", 61200);
+ desc.add("maxChannelsEE", 14648);
+ desc.add("digisLabelEB", "ebDigis");
+ desc.add("digisLabelEE", "eeDigis");
+
+ std::string label = "ecalRawToDigiGPU";
+ confDesc.add(label, desc);
+}
+
+EcalRawToDigiGPU::EcalRawToDigiGPU(const edm::ParameterSet& ps)
+ : rawDataToken_{consumes(ps.getParameter("InputLabel"))},
+ digisEBToken_{produces(ps.getParameter("digisLabelEB"))},
+ digisEEToken_{produces(ps.getParameter("digisLabelEE"))},
+ eMappingToken_{esConsumes()},
+ fedsToUnpack_{ps.getParameter>("FEDs")} {
+ config_.maxChannelsEB = ps.getParameter("maxChannelsEB");
+ config_.maxChannelsEE = ps.getParameter("maxChannelsEE");
+}
+
+EcalRawToDigiGPU::~EcalRawToDigiGPU() {}
+
+void EcalRawToDigiGPU::acquire(edm::Event const& event,
+ edm::EventSetup const& setup,
+ edm::WaitingTaskWithArenaHolder holder) {
+ // raii
+ cms::cuda::ScopedContextAcquire ctx{event.streamID(), std::move(holder), cudaState_};
+
+ // conditions
+ edm::ESHandle eMappingHandle = setup.getHandle(eMappingToken_);
+ auto const& eMappingProduct = eMappingHandle->getProduct(ctx.stream());
+
+ // bundle up conditions
+ ecal::raw::ConditionsProducts conditions{eMappingProduct};
+
+ // event data
+ edm::Handle rawDataHandle;
+ event.getByToken(rawDataToken_, rawDataHandle);
+
+ // scratch
+ ecal::raw::ScratchDataGPU scratchGPU = {cms::cuda::make_device_unique(2, ctx.stream())};
+
+ // input cpu data
+ ecal::raw::InputDataCPU inputCPU = {
+ cms::cuda::make_host_unique(ecal::raw::nfeds_max * ecal::raw::nbytes_per_fed_max, ctx.stream()),
+ cms::cuda::make_host_unique(ecal::raw::nfeds_max, ctx.stream()),
+ cms::cuda::make_host_unique(ecal::raw::nfeds_max, ctx.stream())};
+
+ // input data gpu
+ ecal::raw::InputDataGPU inputGPU = {cms::cuda::make_device_unique(
+ ecal::raw::nfeds_max * ecal::raw::nbytes_per_fed_max, ctx.stream()),
+ cms::cuda::make_device_unique(ecal::raw::nfeds_max, ctx.stream()),
+ cms::cuda::make_device_unique(ecal::raw::nfeds_max, ctx.stream())};
+
+ // output cpu
+ outputCPU_ = {cms::cuda::make_host_unique(2, ctx.stream())};
+
+ // output gpu
+ outputGPU_.allocate(config_, ctx.stream());
+
+ // iterate over feds
+ // TODO: another idea
+ // - loop over all feds to unpack and enqueue cuda memcpy
+ // - accumulate the sizes
+ // - after the loop launch cuda memcpy for sizes
+ // - enqueue the kernel
+ uint32_t currentCummOffset = 0;
+ uint32_t counter = 0;
+ for (auto const& fed : fedsToUnpack_) {
+ auto const& data = rawDataHandle->FEDData(fed);
+ auto const nbytes = data.size();
+
+ // skip empty feds
+ if (nbytes < ecal::raw::empty_event_size)
+ continue;
+
+ // copy raw data into plain buffer
+ std::memcpy(inputCPU.data.get() + currentCummOffset, data.data(), nbytes);
+ // set the offset in bytes from the start
+ inputCPU.offsets[counter] = currentCummOffset;
+ inputCPU.feds[counter] = fed;
+
+ // this is the current offset into the vector
+ currentCummOffset += nbytes;
+ ++counter;
+ }
+
+ ecal::raw::entryPoint(
+ inputCPU, inputGPU, outputGPU_, scratchGPU, outputCPU_, conditions, ctx.stream(), counter, currentCummOffset);
+}
+
+void EcalRawToDigiGPU::produce(edm::Event& event, edm::EventSetup const& setup) {
+ cms::cuda::ScopedContextProduce ctx{cudaState_};
+
+ // get the number of channels
+ outputGPU_.digisEB.size = outputCPU_.nchannels[0];
+ outputGPU_.digisEE.size = outputCPU_.nchannels[1];
+
+ ctx.emplace(event, digisEBToken_, std::move(outputGPU_.digisEB));
+ ctx.emplace(event, digisEEToken_, std::move(outputGPU_.digisEE));
+
+ // reset ptrs that are carried as members
+ outputCPU_.nchannels.reset();
+}
+
+DEFINE_FWK_MODULE(EcalRawToDigiGPU);
diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu
new file mode 100644
index 0000000000000..a25bf235d15f6
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.cu
@@ -0,0 +1,333 @@
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsIdGPU.h"
+
+#include "UnpackGPU.h"
+
+namespace ecal {
+ namespace raw {
+
+ __forceinline__ __device__ void print_raw_buffer(uint8_t const* const buffer,
+ uint32_t const nbytes,
+ uint32_t const nbytes_per_row = 20) {
+ for (uint32_t i = 0; i < nbytes; i++) {
+ if (i % nbytes_per_row == 0 && i > 0)
+ printf("\n");
+ printf("%02X ", buffer[i]);
+ }
+ }
+
+ __forceinline__ __device__ void print_first3bits(uint64_t const* buffer, uint32_t size) {
+ for (uint32_t i = 0; i < size; ++i) {
+ uint8_t const b61 = (buffer[i] >> 61) & 0x1;
+ uint8_t const b62 = (buffer[i] >> 62) & 0x1;
+ uint8_t const b63 = (buffer[i] >> 63) & 0x1;
+ printf("[word: %u] %u%u%u\n", i, b63, b62, b61);
+ }
+ }
+
+ __forceinline__ __device__ bool is_barrel(uint8_t dccid) {
+ return dccid >= ElectronicsIdGPU::MIN_DCCID_EBM && dccid <= ElectronicsIdGPU::MAX_DCCID_EBP;
+ }
+
+ __forceinline__ __device__ uint8_t fed2dcc(int fed) { return static_cast(fed - 600); }
+
+ __forceinline__ __device__ int zside_for_eb(ElectronicsIdGPU const& eid) {
+ int dcc = eid.dccId();
+ return ((dcc >= ElectronicsIdGPU::MIN_DCCID_EBM && dcc <= ElectronicsIdGPU::MAX_DCCID_EBM)) ? -1 : 1;
+ }
+
+ __forceinline__ __device__ bool is_synced_towerblock(uint16_t const dccbx,
+ uint16_t const bx,
+ uint16_t const dccl1,
+ uint16_t const l1) {
+ bool const bxsync = (bx == 0 && dccbx == 3564) || (bx == dccbx && dccbx != 3564);
+ bool const l1sync = (l1 == ((dccl1 - 1) & 0xfff));
+ return bxsync && l1sync;
+ }
+
+ __forceinline__ __device__ bool right_tower_for_eb(int tower) {
+ // for EB, two types of tower (LVRB top/bottom)
+ if ((tower > 12 && tower < 21) || (tower > 28 && tower < 37) || (tower > 44 && tower < 53) ||
+ (tower > 60 && tower < 69))
+ return true;
+ else
+ return false;
+ }
+
+ __forceinline__ __device__ uint32_t compute_ebdetid(ElectronicsIdGPU const& eid) {
+ // as in Geometry/EcalMaping/.../EcalElectronicsMapping
+ auto const dcc = eid.dccId();
+ auto const tower = eid.towerId();
+ auto const strip = eid.stripId();
+ auto const xtal = eid.xtalId();
+
+ int smid = 0;
+ int iphi = 0;
+ bool EBPlus = (zside_for_eb(eid) > 0);
+ bool EBMinus = !EBPlus;
+
+ if (zside_for_eb(eid) < 0) {
+ smid = dcc + 19 - ElectronicsIdGPU::DCCID_PHI0_EBM;
+ iphi = (smid - 19) * ElectronicsIdGPU::kCrystalsInPhi;
+ iphi += 5 * ((tower - 1) % ElectronicsIdGPU::kTowersInPhi);
+ } else {
+ smid = dcc + 1 - ElectronicsIdGPU::DCCID_PHI0_EBP;
+ iphi = (smid - 1) * ElectronicsIdGPU::kCrystalsInPhi;
+ iphi += 5 * (ElectronicsIdGPU::kTowersInPhi - ((tower - 1) % ElectronicsIdGPU::kTowersInPhi) - 1);
+ }
+
+ bool RightTower = right_tower_for_eb(tower);
+ int ieta = 5 * ((tower - 1) / ElectronicsIdGPU::kTowersInPhi) + 1;
+ if (RightTower) {
+ ieta += (strip - 1);
+ if (strip % 2 == 1) {
+ if (EBMinus)
+ iphi += (xtal - 1) + 1;
+ else
+ iphi += (4 - (xtal - 1)) + 1;
+ } else {
+ if (EBMinus)
+ iphi += (4 - (xtal - 1)) + 1;
+ else
+ iphi += (xtal - 1) + 1;
+ }
+ } else {
+ ieta += 4 - (strip - 1);
+ if (strip % 2 == 1) {
+ if (EBMinus)
+ iphi += (4 - (xtal - 1)) + 1;
+ else
+ iphi += (xtal - 1) + 1;
+ } else {
+ if (EBMinus)
+ iphi += (xtal - 1) + 1;
+ else
+ iphi += (4 - (xtal - 1)) + 1;
+ }
+ }
+
+ if (zside_for_eb(eid) < 0)
+ ieta = -ieta;
+
+ DetId did{DetId::Ecal, EcalBarrel};
+ return did.rawId() | ((ieta > 0) ? (0x10000 | (ieta << 9)) : ((-ieta) << 9)) | (iphi & 0x1FF);
+ }
+
+ __forceinline__ __device__ int adc(uint16_t sample) { return sample & 0xfff; }
+
+ __forceinline__ __device__ int gainId(uint16_t sample) { return (sample >> 12) & 0x3; }
+
+ template
+ __global__ void kernel_unpack_test(unsigned char const* __restrict__ data,
+ uint32_t const* __restrict__ offsets,
+ int const* __restrict__ feds,
+ uint16_t* samplesEB,
+ uint16_t* samplesEE,
+ uint32_t* idsEB,
+ uint32_t* idsEE,
+ uint32_t* pChannelsCounterEBEE,
+ uint32_t const* eid2did,
+ uint32_t const nbytesTotal) {
+ // indices
+ auto const ifed = blockIdx.x;
+
+ // offset in bytes
+ auto const offset = offsets[ifed];
+ // fed id
+ auto const fed = feds[ifed];
+ auto const isBarrel = is_barrel(static_cast(fed - 600));
+ // size
+ auto const size = ifed == gridDim.x - 1 ? nbytesTotal - offset : offsets[ifed + 1] - offset;
+ auto* samples = isBarrel ? samplesEB : samplesEE;
+ auto* ids = isBarrel ? idsEB : idsEE;
+ auto* pChannelsCounter = isBarrel ? &pChannelsCounterEBEE[0] : &pChannelsCounterEBEE[1];
+
+ // offset to the right raw buffer
+ uint64_t const* buffer = reinterpret_cast(data + offset);
+
+ // dump first 3 bits for each 64-bit word
+ //print_first3bits(buffer, size / 8);
+
+ //
+ // fed header
+ //
+ auto const fed_header = buffer[0];
+ uint32_t bx = (fed_header >> 20) & 0xfff;
+ uint32_t lv1 = (fed_header >> 32) & 0xffffff;
+
+ // 9 for fed + dcc header
+ // 36 for 4 EE TCC blocks or 18 for 1 EB TCC block
+ // 6 for SR block size
+
+ // dcc header w2
+ auto const w2 = buffer[2];
+ uint8_t const fov = (w2 >> 48) & 0xf;
+
+ //
+ // print Tower block headers
+ //
+ uint8_t ntccblockwords = isBarrel ? 18 : 36;
+ auto const* tower_blocks_start = buffer + 9 + ntccblockwords + 6;
+ auto const* trailer = buffer + (size / 8 - 1);
+ auto const* current_tower_block = tower_blocks_start;
+ while (current_tower_block != trailer) {
+ auto const w = *current_tower_block;
+ uint8_t ttid = w & 0xff;
+ uint16_t bxlocal = (w >> 16) & 0xfff;
+ uint16_t lv1local = (w >> 32) & 0xfff;
+ uint16_t block_length = (w >> 48) & 0x1ff;
+
+ uint16_t const dccbx = bx & 0xfff;
+ uint16_t const dccl1 = lv1 & 0xfff;
+ // fov>=1 is required to support simulated data for which bx==bxlocal==0
+ if (fov >= 1 && !is_synced_towerblock(dccbx, bxlocal, dccl1, lv1local)) {
+ current_tower_block += block_length;
+ continue;
+ }
+
+ // go through all the channels
+ // get the next channel coordinates
+ uint32_t nchannels = (block_length - 1) / 3;
+
+ // 1 threads per channel in this block
+ for (uint32_t ich = 0; ich < nchannels; ich += NTHREADS) {
+ auto const i_to_access = ich + threadIdx.x;
+ // threads outside of the range -> leave the loop
+ if (i_to_access >= nchannels)
+ break;
+
+ // inc the channel's counter and get the pos where to store
+ auto const wdata = current_tower_block[1 + i_to_access * 3];
+ uint8_t const stripid = wdata & 0x7;
+ uint8_t const xtalid = (wdata >> 4) & 0x7;
+ ElectronicsIdGPU eid{fed2dcc(fed), ttid, stripid, xtalid};
+ auto const didraw = isBarrel ? compute_ebdetid(eid) : eid2did[eid.linearIndex()];
+ // FIXME: what kind of channels are these guys
+ if (didraw == 0)
+ continue;
+
+ // get samples
+ uint16_t sampleValues[10];
+ sampleValues[0] = (wdata >> 16) & 0x3fff;
+ sampleValues[1] = (wdata >> 32) & 0x3fff;
+ sampleValues[2] = (wdata >> 48) & 0x3fff;
+ auto const wdata1 = current_tower_block[2 + i_to_access * 3];
+ sampleValues[3] = wdata1 & 0x3fff;
+ sampleValues[4] = (wdata1 >> 16) & 0x3fff;
+ sampleValues[5] = (wdata1 >> 32) & 0x3fff;
+ sampleValues[6] = (wdata1 >> 48) & 0x3fff;
+ auto const wdata2 = current_tower_block[3 + i_to_access * 3];
+ sampleValues[7] = wdata2 & 0x3fff;
+ sampleValues[8] = (wdata2 >> 16) & 0x3fff;
+ sampleValues[9] = (wdata2 >> 32) & 0x3fff;
+
+ // check gain
+ bool isSaturation = true;
+ short firstGainZeroSampID{-1}, firstGainZeroSampADC{-1};
+ for (uint32_t si = 0; si < 10; si++) {
+ if (gainId(sampleValues[si]) == 0) {
+ firstGainZeroSampID = si;
+ firstGainZeroSampADC = adc(sampleValues[si]);
+ break;
+ }
+ }
+ if (firstGainZeroSampID != -1) {
+ unsigned int plateauEnd = std::min(10u, (unsigned int)(firstGainZeroSampID + 5));
+ for (unsigned int s = firstGainZeroSampID; s < plateauEnd; s++) {
+ if (gainId(sampleValues[s]) == 0 && adc(sampleValues[s]) == firstGainZeroSampADC) {
+ ;
+ } else {
+ isSaturation = false;
+ break;
+ } //it's not saturation
+ }
+ // get rid of channels which are stuck in gain0
+ if (firstGainZeroSampID < 3) {
+ isSaturation = false;
+ }
+ if (!isSaturation)
+ continue;
+ } else { // there is no zero gainId sample
+ // gain switch check
+ short numGain = 1;
+ bool gainSwitchError = false;
+ for (unsigned int si = 1; si < 10; si++) {
+ if ((gainId(sampleValues[si - 1]) > gainId(sampleValues[si])) && numGain < 5)
+ gainSwitchError = true;
+ if (gainId(sampleValues[si - 1]) == gainId(sampleValues[si]))
+ numGain++;
+ else
+ numGain = 1;
+ }
+ if (gainSwitchError)
+ continue;
+ }
+
+ auto const pos = atomicAdd(pChannelsCounter, 1);
+
+ // store to global
+ ids[pos] = didraw;
+ samples[pos * 10] = sampleValues[0];
+ samples[pos * 10 + 1] = sampleValues[1];
+ samples[pos * 10 + 2] = sampleValues[2];
+ samples[pos * 10 + 3] = sampleValues[3];
+ samples[pos * 10 + 4] = sampleValues[4];
+ samples[pos * 10 + 5] = sampleValues[5];
+ samples[pos * 10 + 6] = sampleValues[6];
+ samples[pos * 10 + 7] = sampleValues[7];
+ samples[pos * 10 + 8] = sampleValues[8];
+ samples[pos * 10 + 9] = sampleValues[9];
+ }
+
+ current_tower_block += block_length;
+ }
+ }
+
+ void entryPoint(InputDataCPU const& inputCPU,
+ InputDataGPU& inputGPU,
+ OutputDataGPU& outputGPU,
+ ScratchDataGPU& scratchGPU,
+ OutputDataCPU& outputCPU,
+ ConditionsProducts const& conditions,
+ cudaStream_t cudaStream,
+ uint32_t const nfedsWithData,
+ uint32_t const nbytesTotal) {
+ // transfer
+ cudaCheck(cudaMemcpyAsync(inputGPU.data.get(),
+ inputCPU.data.get(),
+ nbytesTotal * sizeof(unsigned char),
+ cudaMemcpyHostToDevice,
+ cudaStream));
+ cudaCheck(cudaMemcpyAsync(inputGPU.offsets.get(),
+ inputCPU.offsets.get(),
+ nfedsWithData * sizeof(uint32_t),
+ cudaMemcpyHostToDevice,
+ cudaStream));
+ cudaCheck(cudaMemsetAsync(scratchGPU.pChannelsCounter.get(),
+ 0,
+ sizeof(uint32_t) * 2, // EB + EE
+ cudaStream));
+ cudaCheck(cudaMemcpyAsync(
+ inputGPU.feds.get(), inputCPU.feds.get(), nfedsWithData * sizeof(int), cudaMemcpyHostToDevice, cudaStream));
+
+ kernel_unpack_test<32><<>>(inputGPU.data.get(),
+ inputGPU.offsets.get(),
+ inputGPU.feds.get(),
+ outputGPU.digisEB.data.get(),
+ outputGPU.digisEE.data.get(),
+ outputGPU.digisEB.ids.get(),
+ outputGPU.digisEE.ids.get(),
+ scratchGPU.pChannelsCounter.get(),
+ conditions.eMappingProduct.eid2did,
+ nbytesTotal);
+ cudaCheck(cudaGetLastError());
+
+ // transfer the counters for how many eb and ee channels we got
+ cudaCheck(cudaMemcpyAsync(outputCPU.nchannels.get(),
+ scratchGPU.pChannelsCounter.get(),
+ sizeof(uint32_t) * 2,
+ cudaMemcpyDeviceToHost,
+ cudaStream));
+ }
+
+ } // namespace raw
+} // namespace ecal
diff --git a/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h
new file mode 100644
index 0000000000000..d98906e7e24a7
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/plugins/UnpackGPU.h
@@ -0,0 +1,23 @@
+#ifndef EventFilter_EcalRawToDigi_plugins_UnpackGPU_h
+#define EventFilter_EcalRawToDigi_plugins_UnpackGPU_h
+
+#include "DeclsForKernels.h"
+
+namespace ecal {
+ namespace raw {
+
+ // FIXME: bundle up uint32_t values
+ void entryPoint(InputDataCPU const&,
+ InputDataGPU&,
+ OutputDataGPU&,
+ ScratchDataGPU&,
+ OutputDataCPU&,
+ ConditionsProducts const&,
+ cudaStream_t,
+ uint32_t const,
+ uint32_t const);
+
+ } // namespace raw
+} // namespace ecal
+
+#endif // EventFilter_EcalRawToDigi_plugins_UnpackGPU_h
diff --git a/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py b/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py
index 849aaeeb414a4..00a54ad56c128 100644
--- a/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py
+++ b/EventFilter/EcalRawToDigi/python/ecalDigis_cff.py
@@ -5,3 +5,24 @@
ecalDigis = _ecalEBunpacker.clone()
ecalDigisTask = cms.Task(ecalDigis)
+
+# process modifier to run on GPUs
+from Configuration.ProcessModifiers.gpu_cff import gpu
+
+# GPU-friendly EventSetup modules
+from EventFilter.EcalRawToDigi.ecalElectronicsMappingGPUESProducer_cfi import ecalElectronicsMappingGPUESProducer
+
+# raw to digi on GPUs
+from EventFilter.EcalRawToDigi.ecalRawToDigiGPU_cfi import ecalRawToDigiGPU as _ecalRawToDigiGPU
+ecalDigisGPU = _ecalRawToDigiGPU.clone()
+
+# copy the digi from the GPU to the CPU and convert to legacy format
+from EventFilter.EcalRawToDigi.ecalCPUDigisProducer_cfi import ecalCPUDigisProducer as _ecalCPUDigisProducer
+_ecalDigis_gpu = _ecalCPUDigisProducer.clone(
+ digisInLabelEB = ('ecalDigisGPU', 'ebDigis'),
+ digisInLabelEE = ('ecalDigisGPU', 'eeDigis'),
+ produceDummyIntegrityCollections = True
+)
+gpu.toReplaceWith(ecalDigis, _ecalDigis_gpu)
+
+gpu.toReplaceWith(ecalDigisTask, cms.Task(ecalElectronicsMappingGPUESProducer, ecalDigisGPU, ecalDigis))
diff --git a/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc b/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc
new file mode 100644
index 0000000000000..8264c501a896c
--- /dev/null
+++ b/EventFilter/EcalRawToDigi/src/ElectronicsMappingGPU.cc
@@ -0,0 +1,57 @@
+#include "EventFilter/EcalRawToDigi/interface/ElectronicsMappingGPU.h"
+
+#include "FWCore/Utilities/interface/typelookup.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
+
+#include "DataFormats/EcalDetId/interface/EcalElectronicsId.h"
+
+namespace ecal {
+ namespace raw {
+
+ // TODO: 0x3FFFFF * 4B ~= 16MB
+ // tmp solution for linear mapping of eid -> did
+ ElectronicsMappingGPU::ElectronicsMappingGPU(EcalMappingElectronics const& mapping) : eid2did_(0x3FFFFF) {
+ // fill in eb
+ // TODO: EB vector is actually empty
+ auto const& barrelValues = mapping.barrelItems();
+ for (unsigned int i = 0; i < barrelValues.size(); i++) {
+ EcalElectronicsId eid{barrelValues[i].electronicsid};
+ EBDetId did{EBDetId::unhashIndex(i)};
+ eid2did_[eid.linearIndex()] = did.rawId();
+ }
+
+ // fill in ee
+ auto const& endcapValues = mapping.endcapItems();
+ for (unsigned int i = 0; i < endcapValues.size(); i++) {
+ EcalElectronicsId eid{endcapValues[i].electronicsid};
+ EEDetId did{EEDetId::unhashIndex(i)};
+ eid2did_[eid.linearIndex()] = did.rawId();
+ }
+ }
+
+ ElectronicsMappingGPU::Product::~Product() {
+ // deallocation
+ cudaCheck(cudaFree(eid2did));
+ }
+
+ ElectronicsMappingGPU::Product const& ElectronicsMappingGPU::getProduct(cudaStream_t cudaStream) const {
+ auto const& product = product_.dataForCurrentDeviceAsync(
+ cudaStream, [this](ElectronicsMappingGPU::Product& product, cudaStream_t cudaStream) {
+ // malloc
+ cudaCheck(cudaMalloc((void**)&product.eid2did, this->eid2did_.size() * sizeof(uint32_t)));
+
+ // transfer
+ cudaCheck(cudaMemcpyAsync(product.eid2did,
+ this->eid2did_.data(),
+ this->eid2did_.size() * sizeof(uint32_t),
+ cudaMemcpyHostToDevice,
+ cudaStream));
+ });
+
+ return product;
+ }
+
+ } // namespace raw
+} // namespace ecal
+
+TYPELOOKUP_DATA_REG(ecal::raw::ElectronicsMappingGPU);
diff --git a/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py b/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py
new file mode 100644
index 0000000000000..4fa955bd33836
--- /dev/null
+++ b/RecoLocalCalo/Configuration/python/customizeEcalOnlyForProfiling.py
@@ -0,0 +1,51 @@
+import FWCore.ParameterSet.Config as cms
+
+# Customise the ECAL-only reconstruction to run on GPU
+#
+# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits
+def customizeEcalOnlyForProfilingGPUOnly(process):
+
+ process.consumer = cms.EDAnalyzer("GenericConsumer",
+ eventProducts = cms.untracked.vstring('ecalMultiFitUncalibRecHitGPU')
+ )
+
+ process.consume_step = cms.EndPath(process.consumer)
+
+ process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step)
+
+ return process
+
+
+# Customise the ECAL-only reconstruction to run on GPU, and copy the data to the host
+#
+# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits
+def customizeEcalOnlyForProfilingGPUWithHostCopy(process):
+
+ process.consumer = cms.EDAnalyzer("GenericConsumer",
+ eventProducts = cms.untracked.vstring('ecalMultiFitUncalibRecHitSoA')
+ )
+
+ process.consume_step = cms.EndPath(process.consumer)
+
+ process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step)
+
+ return process
+
+
+# Customise the ECAL-only reconstruction to run on GPU, copy the data to the host, and convert to legacy format
+#
+# Currently, this means running only the unpacker and multifit, up to the uncalbrated rechits, on the GPU
+# and the rechits producer on the CPU
+#
+# The same customisation can be also used on the CPU workflow, running up to the rechits on CPU.
+def customizeEcalOnlyForProfiling(process):
+
+ process.consumer = cms.EDAnalyzer("GenericConsumer",
+ eventProducts = cms.untracked.vstring('ecalRecHit')
+ )
+
+ process.consume_step = cms.EndPath(process.consumer)
+
+ process.schedule = cms.Schedule(process.raw2digi_step, process.reconstruction_step, process.consume_step)
+
+ return process
diff --git a/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py b/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py
index 06fecf4787baf..75ae5fc0c202f 100644
--- a/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py
+++ b/RecoLocalCalo/Configuration/python/ecalLocalRecoSequence_cff.py
@@ -1,4 +1,5 @@
import FWCore.ParameterSet.Config as cms
+from Configuration.ProcessModifiers.gpu_cff import gpu
# TPG condition needed by ecalRecHit producer if TT recovery is ON
from RecoLocalCalo.EcalRecProducers.ecalRecHitTPGConditions_cff import *
@@ -43,6 +44,61 @@
ecalOnlyLocalRecoSequence = cms.Sequence(ecalOnlyLocalRecoTask)
+# ECAL rechit calibrations on GPU
+from RecoLocalCalo.EcalRecProducers.ecalRechitADCToGeVConstantGPUESProducer_cfi import ecalRechitADCToGeVConstantGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalRechitChannelStatusGPUESProducer_cfi import ecalRechitChannelStatusGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalIntercalibConstantsGPUESProducer_cfi import ecalIntercalibConstantsGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosGPUESProducer_cfi import ecalLaserAPDPNRatiosGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalLaserAPDPNRatiosRefGPUESProducer_cfi import ecalLaserAPDPNRatiosRefGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalLaserAlphasGPUESProducer_cfi import ecalLaserAlphasGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalLinearCorrectionsGPUESProducer_cfi import ecalLinearCorrectionsGPUESProducer
+from RecoLocalCalo.EcalRecProducers.ecalRecHitParametersGPUESProducer_cfi import ecalRecHitParametersGPUESProducer
+
+# ECAL rechits running on GPU
+from RecoLocalCalo.EcalRecProducers.ecalRecHitGPU_cfi import ecalRecHitGPU as _ecalRecHitGPU
+ecalRecHitGPU = _ecalRecHitGPU.clone(
+ uncalibrecHitsInLabelEB = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEB'),
+ uncalibrecHitsInLabelEE = cms.InputTag('ecalMultiFitUncalibRecHitGPU', 'EcalUncalibRecHitsEE')
+)
+
+# copy the rechits from GPU to CPU
+from RecoLocalCalo.EcalRecProducers.ecalCPURecHitProducer_cfi import ecalCPURecHitProducer as _ecalCPURecHitProducer
+ecalRecHitSoA = _ecalCPURecHitProducer.clone(
+ recHitsInLabelEB = cms.InputTag('ecalRecHitGPU', 'EcalRecHitsEB'),
+ recHitsInLabelEE = cms.InputTag('ecalRecHitGPU', 'EcalRecHitsEE')
+)
+
+# convert the rechits from SoA to legacy format
+from RecoLocalCalo.EcalRecProducers.ecalRecHitConvertGPU2CPUFormat_cfi import ecalRecHitConvertGPU2CPUFormat as _ecalRecHitConvertGPU2CPUFormat
+_ecalRecHit_gpu = _ecalRecHitConvertGPU2CPUFormat.clone(
+ recHitsLabelGPUEB = cms.InputTag('ecalRecHitSoA', 'EcalRecHitsEB'),
+ recHitsLabelGPUEE = cms.InputTag('ecalRecHitSoA', 'EcalRecHitsEE')
+)
+# TODO: the ECAL calibrated rechits produced on the GPU are not correct, yet.
+# When they are working and validated, remove this comment and uncomment the next line:
+#gpu.toReplaceWith(ecalRecHit, _ecalRecHit_gpu)
+
+# ECAL reconstruction on GPU
+gpu.toReplaceWith(ecalRecHitNoTPTask, cms.Task(
+ # ECAL rechit calibrations on GPU
+ ecalRechitADCToGeVConstantGPUESProducer,
+ ecalRechitChannelStatusGPUESProducer,
+ ecalIntercalibConstantsGPUESProducer,
+ ecalLaserAPDPNRatiosGPUESProducer,
+ ecalLaserAPDPNRatiosRefGPUESProducer,
+ ecalLaserAlphasGPUESProducer,
+ ecalLinearCorrectionsGPUESProducer,
+ ecalRecHitParametersGPUESProducer,
+ # ECAL rechits running on GPU
+ ecalRecHitGPU,
+ # copy the rechits from GPU to CPU
+ ecalRecHitSoA,
+ # convert the rechits from SoA to legacy format
+ ecalRecHit,
+ # ECAL preshower rechit legacy module
+ ecalPreshowerRecHit
+))
+
# Phase 2 modifications
from RecoLocalCalo.EcalRecProducers.ecalDetailedTimeRecHit_cfi import *
_phase2_timing_ecalRecHitTask = cms.Task( ecalRecHitTask.copy() , ecalDetailedTimeRecHit )
diff --git a/RecoLocalCalo/EcalRecAlgos/BuildFile.xml b/RecoLocalCalo/EcalRecAlgos/BuildFile.xml
index 2eaf053c342dd..c2858ae76d7bc 100644
--- a/RecoLocalCalo/EcalRecAlgos/BuildFile.xml
+++ b/RecoLocalCalo/EcalRecAlgos/BuildFile.xml
@@ -1,9 +1,13 @@
+
+
+
+
@@ -11,6 +15,7 @@
+
diff --git a/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml b/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml
new file mode 100644
index 0000000000000..4c98171091b84
--- /dev/null
+++ b/RecoLocalCalo/EcalRecAlgos/bin/BuildFile.xml
@@ -0,0 +1,17 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp
new file mode 100644
index 0000000000000..f010e3afdbb18
--- /dev/null
+++ b/RecoLocalCalo/EcalRecAlgos/bin/makeEcalMultifitResultsGpuValidationPlots.cpp
@@ -0,0 +1,564 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "DataFormats/Common/interface/Wrapper.h"
+#include "DataFormats/EcalRecHit/interface/EcalUncalibratedRecHit.h"
+#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h"
+#include "CUDADataFormats/EcalRecHitSoA/interface/EcalUncalibratedRecHit.h"
+
+#include "TStyle.h"
+
+void setAxis(TH2D *histo) {
+ histo->GetXaxis()->SetTitle("cpu");
+ histo->GetYaxis()->SetTitle("gpu");
+}
+
+void setAxisDelta(TH2D *histo) {
+ histo->GetXaxis()->SetTitle("cpu");
+ histo->GetYaxis()->SetTitle("#Delta gpu-cpu");
+}
+
+int main(int argc, char *argv[]) {
+ if (argc < 3) {
+ std::cout << "run with: ./validateGPU