From a47505d04e85ac9953612051d6c0ab4e1588e8cc Mon Sep 17 00:00:00 2001 From: Alejandro Sanchez Date: Mon, 1 Nov 2021 03:16:05 +0100 Subject: [PATCH 1/2] Create new GpuTask module to monitor and compare CPU and GPU generated Ecal Rec Hits --- DQM/EcalMonitorTasks/interface/Collections.h | 10 +- DQM/EcalMonitorTasks/interface/GpuTask.h | 53 ++++++++ .../plugins/EcalDQMonitorTask2.cc | 4 + .../python/CollectionTags_cfi.py | 6 +- .../python/EcalMonitorTask_cfi.py | 7 +- DQM/EcalMonitorTasks/python/GpuTask_cfi.py | 127 ++++++++++++++++++ DQM/EcalMonitorTasks/src/GpuTask.cc | 114 ++++++++++++++++ 7 files changed, 317 insertions(+), 4 deletions(-) create mode 100644 DQM/EcalMonitorTasks/interface/GpuTask.h create mode 100644 DQM/EcalMonitorTasks/python/GpuTask_cfi.py create mode 100644 DQM/EcalMonitorTasks/src/GpuTask.cc diff --git a/DQM/EcalMonitorTasks/interface/Collections.h b/DQM/EcalMonitorTasks/interface/Collections.h index d4549389131c4..4c0809f306f6a 100644 --- a/DQM/EcalMonitorTasks/interface/Collections.h +++ b/DQM/EcalMonitorTasks/interface/Collections.h @@ -41,6 +41,10 @@ namespace ecaldqm { kEEBasicCluster, kEBSuperCluster, kEESuperCluster, + kEBCpuRecHit, + kEECpuRecHit, + kEBGpuRecHit, + kEEGpuRecHit, nCollections }; @@ -78,7 +82,11 @@ namespace ecaldqm { "EBBasicCluster", "EEBasicCluster", "EBSuperCluster", - "EESuperCluster"}; + "EESuperCluster", + "EBCpuRecHit", + "EECpuRecHit", + "EBGpuRecHit", + "EEGpuRecHit"}; } // namespace ecaldqm diff --git a/DQM/EcalMonitorTasks/interface/GpuTask.h b/DQM/EcalMonitorTasks/interface/GpuTask.h new file mode 100644 index 0000000000000..b27852b9295e0 --- /dev/null +++ b/DQM/EcalMonitorTasks/interface/GpuTask.h @@ -0,0 +1,53 @@ +#ifndef DQM_EcalMonitorTasks_GpuTask_H +#define DQM_EcalMonitorTasks_GpuTask_H + +#include "DQWorkerTask.h" + +#include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" + +namespace ecaldqm { + + class GpuTask : public DQWorkerTask { + public: + GpuTask(); + ~GpuTask() override {} + + void addDependencies(DependencySet&) override; + + bool filterRunType(short const*) override; + + void beginEvent(edm::Event const&, edm::EventSetup const&, bool const&, bool&) override; + bool analyze(void const*, Collections) override; + + void runOnCpuRecHits(EcalRecHitCollection const&, Collections); + void runOnGpuRecHits(EcalRecHitCollection const&, Collections); + + private: + EcalRecHitCollection const* EBCpuRecHits_; + EcalRecHitCollection const* EECpuRecHits_; + }; + + inline bool GpuTask::analyze(void const* _p, Collections _collection) { + switch (_collection) { + case kEBCpuRecHit: + case kEECpuRecHit: + if (_p) + runOnCpuRecHits(*static_cast(_p), _collection); + return true; + break; + case kEBGpuRecHit: + case kEEGpuRecHit: + if (_p) + runOnGpuRecHits(*static_cast(_p), _collection); + return true; + break; + default: + break; + } + + return false; + } + +} // namespace ecaldqm + +#endif diff --git a/DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask2.cc b/DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask2.cc index 460df62ae446c..a550973967258 100644 --- a/DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask2.cc +++ b/DQM/EcalMonitorTasks/plugins/EcalDQMonitorTask2.cc @@ -119,6 +119,10 @@ void EcalDQMonitorTask::formSchedule(std::vector const& _p case ecaldqm::kEBReducedRecHit: case ecaldqm::kEERecHit: case ecaldqm::kEEReducedRecHit: + case ecaldqm::kEBCpuRecHit: + case ecaldqm::kEECpuRecHit: + case ecaldqm::kEBGpuRecHit: + case ecaldqm::kEEGpuRecHit: collectionTokens_[*colItr] = edm::EDGetToken(consumes(tag)); sch.first = &EcalDQMonitorTask::runOnCollection; break; diff --git a/DQM/EcalMonitorTasks/python/CollectionTags_cfi.py b/DQM/EcalMonitorTasks/python/CollectionTags_cfi.py index ffb63bec0415b..798e37d499b28 100644 --- a/DQM/EcalMonitorTasks/python/CollectionTags_cfi.py +++ b/DQM/EcalMonitorTasks/python/CollectionTags_cfi.py @@ -44,5 +44,9 @@ EBBasicCluster = cms.untracked.InputTag("particleFlowClusterECAL"), EEBasicCluster = cms.untracked.InputTag("particleFlowClusterECAL"), EBSuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALBarrel"), - EESuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALEndcapWithPreshower") + EESuperCluster = cms.untracked.InputTag("particleFlowSuperClusterECAL", "particleFlowSuperClusterECALEndcapWithPreshower"), + EBCpuRecHit = cms.untracked.InputTag("ecalRecHit@cpu", "EcalRecHitsEB"), + EECpuRecHit = cms.untracked.InputTag("ecalRecHit@cpu", "EcalRecHitsEE"), + EBGpuRecHit = cms.untracked.InputTag("ecalRecHit@cuda", "EcalRecHitsEB"), + EEGpuRecHit = cms.untracked.InputTag("ecalRecHit@cuda", "EcalRecHitsEE") ) diff --git a/DQM/EcalMonitorTasks/python/EcalMonitorTask_cfi.py b/DQM/EcalMonitorTasks/python/EcalMonitorTask_cfi.py index 2ffd474763694..427eee1c85b82 100644 --- a/DQM/EcalMonitorTasks/python/EcalMonitorTask_cfi.py +++ b/DQM/EcalMonitorTasks/python/EcalMonitorTask_cfi.py @@ -14,6 +14,7 @@ from DQM.EcalMonitorTasks.SelectiveReadoutTask_cfi import ecalSelectiveReadoutTask from DQM.EcalMonitorTasks.TimingTask_cfi import ecalTimingTask from DQM.EcalMonitorTasks.TrigPrimTask_cfi import ecalTrigPrimTask +from DQM.EcalMonitorTasks.GpuTask_cfi import ecalGpuTask from DQMServices.Core.DQMEDAnalyzer import DQMEDAnalyzer ecalMonitorTask = DQMEDAnalyzer('EcalDQMonitorTask', @@ -28,7 +29,8 @@ "RawDataTask", "RecoSummaryTask", "TimingTask", - "TrigPrimTask" + "TrigPrimTask", + "GpuTask" ), # task parameters (included from indivitual cfis) workerParameters = cms.untracked.PSet( @@ -41,7 +43,8 @@ RecoSummaryTask = ecalRecoSummaryTask, SelectiveReadoutTask = ecalSelectiveReadoutTask, TimingTask = ecalTimingTask, - TrigPrimTask = ecalTrigPrimTask + TrigPrimTask = ecalTrigPrimTask, + GpuTask = ecalGpuTask ), commonParameters = ecalCommonParams, collectionTags = ecalDQMCollectionTags, diff --git a/DQM/EcalMonitorTasks/python/GpuTask_cfi.py b/DQM/EcalMonitorTasks/python/GpuTask_cfi.py new file mode 100644 index 0000000000000..da211ae8d4b9a --- /dev/null +++ b/DQM/EcalMonitorTasks/python/GpuTask_cfi.py @@ -0,0 +1,127 @@ +import FWCore.ParameterSet.Config as cms + +nHitsBins = 100 +nHitsMax = 5000 + +energyBins = 100 +energyMax = 2.0 + +timeBins = 100 +timeMax = 12.5 + +flagsBins = 40 +flagsMax = 1500 + +deltaBins = 101 +delta = 0.2 + +ecalGpuTask = cms.untracked.PSet( + MEs = cms.untracked.PSet( + # CPU Plots + RecHitCpu = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT number of rec hits cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(nHitsMax), + nbins = cms.untracked.int32(nHitsBins), + low = cms.untracked.double(0), + title = cms.untracked.string('Rec Hits per Event') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('Number of total CPU Rec Hits per Event') + ), + RecHitCpuEnergy = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit energy cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(energyMax), + nbins = cms.untracked.int32(energyBins), + low = cms.untracked.double(0), + title = cms.untracked.string('Energy (Gev)') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('CPU Rec Hit Energy (GeV)') + ), + RecHitCpuTime = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit time cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(timeMax), + nbins = cms.untracked.int32(timeBins), + low = cms.untracked.double(-timeMax), + title = cms.untracked.string('Time (ns)') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('CPU Rec Hit Time') + ), + RecHitCpuFlags = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit flags cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(flagsMax), + nbins = cms.untracked.int32(flagsBins), + low = cms.untracked.double(0), + title = cms.untracked.string('Flags') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('CPU Rec Hit Flags') + ), + # GPU-CPU Difference Plots + RecHitGpuCpu = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT number of rec hits diff gpu-cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(delta*nHitsMax), + nbins = cms.untracked.int32(deltaBins), + low = cms.untracked.double(-delta*nHitsMax), + title = cms.untracked.string('GPU-CPU Rec Hits per Event') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('GPU-CPU difference of number of total Rec Hits per Event') + ), + RecHitGpuCpuEnergy = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit energy diff gpu-cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(delta*energyMax), + nbins = cms.untracked.int32(deltaBins), + low = cms.untracked.double(-delta*energyMax), + title = cms.untracked.string('GPU-CPU Energy (GeV)') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('GPU-CPU difference of Rec Hit Energy (GeV)') + ), + RecHitGpuCpuTime = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit time diff gpu-cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(delta*timeMax), + nbins = cms.untracked.int32(deltaBins), + low = cms.untracked.double(-delta*timeMax), + title = cms.untracked.string('GPU-CPU Time (ns)') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('GPU-CPU difference of Rec Hit Time') + ), + RecHitGpuCpuFlags = cms.untracked.PSet( + path = cms.untracked.string('%(subdet)s/%(prefix)sGpuTask/%(prefix)sGT rec hit flags diff gpu-cpu'), + kind = cms.untracked.string('TH1F'), + otype = cms.untracked.string('Ecal2P'), + xaxis = cms.untracked.PSet( + high = cms.untracked.double(delta*flagsMax), + nbins = cms.untracked.int32(deltaBins), + low = cms.untracked.double(-delta*flagsMax), + title = cms.untracked.string('GPU-CPU Flags') + ), + btype = cms.untracked.string('User'), + description = cms.untracked.string('GPU-CPU differnece of Rec Hit Flags') + ) + ) +) diff --git a/DQM/EcalMonitorTasks/src/GpuTask.cc b/DQM/EcalMonitorTasks/src/GpuTask.cc new file mode 100644 index 0000000000000..ae97f2605bf2a --- /dev/null +++ b/DQM/EcalMonitorTasks/src/GpuTask.cc @@ -0,0 +1,114 @@ +#include "DQM/EcalMonitorTasks/interface/GpuTask.h" + +#include "DQM/EcalCommon/interface/EcalDQMCommonUtils.h" + +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include "DataFormats/EcalRawData/interface/EcalDCCHeaderBlock.h" + +namespace ecaldqm { + GpuTask::GpuTask() : DQWorkerTask() {} + + void GpuTask::addDependencies(DependencySet& _dependencies) { + // Ensure we run on CpuRecHits before GpuRecHits + _dependencies.push_back(Dependency(kEBGpuRecHit, kEBCpuRecHit)); + _dependencies.push_back(Dependency(kEEGpuRecHit, kEECpuRecHit)); + } + + bool GpuTask::filterRunType(short const* _runType) { + for (unsigned iFED(0); iFED != ecaldqm::nDCC; iFED++) { + if (_runType[iFED] == EcalDCCHeaderBlock::COSMIC || _runType[iFED] == EcalDCCHeaderBlock::MTCC || + _runType[iFED] == EcalDCCHeaderBlock::COSMICS_GLOBAL || + _runType[iFED] == EcalDCCHeaderBlock::PHYSICS_GLOBAL || _runType[iFED] == EcalDCCHeaderBlock::COSMICS_LOCAL || + _runType[iFED] == EcalDCCHeaderBlock::PHYSICS_LOCAL) + return true; + } + + return false; + } + + void GpuTask::beginEvent(edm::Event const&, edm::EventSetup const&, bool const&, bool&) { + EBCpuRecHits_ = nullptr; + EECpuRecHits_ = nullptr; + } + + void GpuTask::runOnCpuRecHits(EcalRecHitCollection const& _hits, Collections _collection) { + MESet& meRecHitCpu(MEs_.at("RecHitCpu")); + MESet& meRecHitCpuEnergy(MEs_.at("RecHitCpuEnergy")); + MESet& meRecHitCpuTime(MEs_.at("RecHitCpuTime")); + MESet& meRecHitCpuFlags(MEs_.at("RecHitCpuFlags")); + + int iSubdet(_collection == kEBCpuRecHit ? EcalBarrel : EcalEndcap); + + // Save CpuRecHits for comparison with GpuRecHits + if (iSubdet == EcalBarrel) + EBCpuRecHits_ = &_hits; + else + EECpuRecHits_ = &_hits; + + unsigned nCpuHits(_hits.size()); + meRecHitCpu.fill(getEcalDQMSetupObjects(), iSubdet, nCpuHits); + + for (EcalRecHitCollection::const_iterator hitItr(_hits.begin()); hitItr != _hits.end(); ++hitItr) { + float cpuEnergy(hitItr->energy()); + if (cpuEnergy < 0.) + continue; + + float cpuTime(hitItr->time()); + uint32_t cpuFlags(hitItr->flagsBits()); + + meRecHitCpuEnergy.fill(getEcalDQMSetupObjects(), iSubdet, cpuEnergy); + meRecHitCpuTime.fill(getEcalDQMSetupObjects(), iSubdet, cpuTime); + meRecHitCpuFlags.fill(getEcalDQMSetupObjects(), iSubdet, cpuFlags); + } + } + + // Should always run after GpuTask::runOnGpuRecHits() + void GpuTask::runOnGpuRecHits(EcalRecHitCollection const& _gpuHits, Collections _collection) { + MESet& meRecHitGpuCpu(MEs_.at("RecHitGpuCpu")); + MESet& meRecHitGpuCpuEnergy(MEs_.at("RecHitGpuCpuEnergy")); + MESet& meRecHitGpuCpuTime(MEs_.at("RecHitGpuCpuTime")); + MESet& meRecHitGpuCpuFlags(MEs_.at("RecHitGpuCpuFlags")); + + int iSubdet(_collection == kEBGpuRecHit ? EcalBarrel : EcalEndcap); + + // Get CpuRecHits saved from GpuTask::runOnCpuRecHits() for this event + // Note: _gpuHits is a collection and cpuHits is a pointer to a collection + EcalRecHitCollection const* cpuHits = (iSubdet == EcalBarrel) ? EBCpuRecHits_ : EECpuRecHits_; + if (!cpuHits) { + edm::LogWarning("EcalDQM") << "GpuTask: Did not find " << ((iSubdet == EcalBarrel) ? "EB" : "EE") + << "CpuRecHits Collection. Aborting runOnGpuRecHits\n"; + return; + } + + unsigned nGpuHits(_gpuHits.size()); + unsigned nCpuHits(cpuHits->size()); + meRecHitGpuCpu.fill(getEcalDQMSetupObjects(), iSubdet, nGpuHits - nCpuHits); + + for (EcalRecHitCollection::const_iterator gpuItr(_gpuHits.begin()); gpuItr != _gpuHits.end(); ++gpuItr) { + // Find CpuRecHit with matching DetId + DetId gpuId(gpuItr->detid()); + EcalRecHitCollection::const_iterator cpuItr(cpuHits->find(gpuId)); + if (cpuItr == cpuHits->end()) { + edm::LogWarning("EcalDQM") << "GpuTask: Did not find DetId " << gpuId.rawId() << " in a CPU collection\n"; + continue; + } + + float gpuEnergy(gpuItr->energy()); + float cpuEnergy(cpuItr->energy()); + + float gpuTime(gpuItr->time()); + float cpuTime(cpuItr->time()); + + uint32_t gpuFlags(gpuItr->flagsBits()); + uint32_t cpuFlags(cpuItr->flagsBits()); + + meRecHitGpuCpuEnergy.fill(getEcalDQMSetupObjects(), iSubdet, gpuEnergy - cpuEnergy); + meRecHitGpuCpuTime.fill(getEcalDQMSetupObjects(), iSubdet, gpuTime - cpuTime); + meRecHitGpuCpuFlags.fill(getEcalDQMSetupObjects(), iSubdet, gpuFlags - cpuFlags); + } + } + + DEFINE_ECALDQM_WORKER(GpuTask); +} // namespace ecaldqm From 5c364e9bf58f7d3d4238a94998888474476e42d1 Mon Sep 17 00:00:00 2001 From: Alejandro Sanchez Date: Wed, 3 Nov 2021 17:44:58 +0100 Subject: [PATCH 2/2] Add switch to GpuTask to prevent unnecessary warnings when not in use --- DQM/EcalMonitorTasks/interface/GpuTask.h | 7 +++++-- DQM/EcalMonitorTasks/python/GpuTask_cfi.py | 3 +++ DQM/EcalMonitorTasks/src/GpuTask.cc | 18 +++++++++++++++--- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/DQM/EcalMonitorTasks/interface/GpuTask.h b/DQM/EcalMonitorTasks/interface/GpuTask.h index b27852b9295e0..383ff02af1ae0 100644 --- a/DQM/EcalMonitorTasks/interface/GpuTask.h +++ b/DQM/EcalMonitorTasks/interface/GpuTask.h @@ -23,6 +23,9 @@ namespace ecaldqm { void runOnGpuRecHits(EcalRecHitCollection const&, Collections); private: + void setParams(edm::ParameterSet const&) override; + + bool runOnGpu_; EcalRecHitCollection const* EBCpuRecHits_; EcalRecHitCollection const* EECpuRecHits_; }; @@ -37,9 +40,9 @@ namespace ecaldqm { break; case kEBGpuRecHit: case kEEGpuRecHit: - if (_p) + if (_p && runOnGpu_) runOnGpuRecHits(*static_cast(_p), _collection); - return true; + return runOnGpu_; break; default: break; diff --git a/DQM/EcalMonitorTasks/python/GpuTask_cfi.py b/DQM/EcalMonitorTasks/python/GpuTask_cfi.py index da211ae8d4b9a..69532e906c20f 100644 --- a/DQM/EcalMonitorTasks/python/GpuTask_cfi.py +++ b/DQM/EcalMonitorTasks/python/GpuTask_cfi.py @@ -16,6 +16,9 @@ delta = 0.2 ecalGpuTask = cms.untracked.PSet( + params = cms.untracked.PSet( + runOnGpu = cms.untracked.bool(False) + ), MEs = cms.untracked.PSet( # CPU Plots RecHitCpu = cms.untracked.PSet( diff --git a/DQM/EcalMonitorTasks/src/GpuTask.cc b/DQM/EcalMonitorTasks/src/GpuTask.cc index ae97f2605bf2a..2143795b679a4 100644 --- a/DQM/EcalMonitorTasks/src/GpuTask.cc +++ b/DQM/EcalMonitorTasks/src/GpuTask.cc @@ -8,12 +8,24 @@ #include "DataFormats/EcalRawData/interface/EcalDCCHeaderBlock.h" namespace ecaldqm { - GpuTask::GpuTask() : DQWorkerTask() {} + GpuTask::GpuTask() : DQWorkerTask(), runOnGpu_(false), EBCpuRecHits_(nullptr), EECpuRecHits_(nullptr) {} void GpuTask::addDependencies(DependencySet& _dependencies) { // Ensure we run on CpuRecHits before GpuRecHits - _dependencies.push_back(Dependency(kEBGpuRecHit, kEBCpuRecHit)); - _dependencies.push_back(Dependency(kEEGpuRecHit, kEECpuRecHit)); + if (runOnGpu_) { + _dependencies.push_back(Dependency(kEBGpuRecHit, kEBCpuRecHit)); + _dependencies.push_back(Dependency(kEEGpuRecHit, kEECpuRecHit)); + } + } + + void GpuTask::setParams(edm::ParameterSet const& _params) { + runOnGpu_ = _params.getUntrackedParameter("runOnGpu"); + if (!runOnGpu_) { + MEs_.erase(std::string("RecHitGpuCpu")); + MEs_.erase(std::string("RecHitGpuCpuEnergy")); + MEs_.erase(std::string("RecHitGpuCpuTime")); + MEs_.erase(std::string("RecHitGpuCpuFlags")); + } } bool GpuTask::filterRunType(short const* _runType) {