diff --git a/Configuration/ProcessModifiers/python/ecal_deepsc_cff.py b/Configuration/ProcessModifiers/python/ecal_deepsc_cff.py new file mode 100644 index 0000000000000..997decae5b524 --- /dev/null +++ b/Configuration/ProcessModifiers/python/ecal_deepsc_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier is for ECAL SuperCluster with ML studies + +ecal_deepsc = cms.Modifier() + diff --git a/Configuration/PyReleaseValidation/README.md b/Configuration/PyReleaseValidation/README.md index 4098491d9779c..e448bf053fd42 100644 --- a/Configuration/PyReleaseValidation/README.md +++ b/Configuration/PyReleaseValidation/README.md @@ -57,6 +57,7 @@ The offsets currently in use are: * 0.13: MLPF algorithm * 0.15: JME NanoAOD * 0.17: Run-3 deep core seeding for JetCore iteration +* 0.19: ECAL SuperClustering with DeepSC algorithm * 0.21: Production-like sequence * 0.24: 0 Tesla (Run-2, Run-3) * 0.31: Photon energy corrections with DRN architecture diff --git a/Configuration/PyReleaseValidation/python/relval_2017.py b/Configuration/PyReleaseValidation/python/relval_2017.py index cfb95356acc48..6fa76deeb654a 100644 --- a/Configuration/PyReleaseValidation/python/relval_2017.py +++ b/Configuration/PyReleaseValidation/python/relval_2017.py @@ -39,7 +39,8 @@ # (Patatrack HCAL-only: TTbar - on CPU) # (TTbar 0T, TTbar PU 0T) # (TTbar FastSim) -# (TTbar PU MLPF) +# (TTbar PU MLPF ecal_deepsc) +# (ZEE ecal_deesc) # (TTbar PU prod-like) # (QCD 1.8TeV DeepCore) # (TTbar DigiNoHLT) @@ -71,7 +72,8 @@ 11634.521, 11634.24,11834.24, 11634.301, - 11834.13, + 11834.13, 11834.19, + 11846.19, 11834.21, 11723.17, 11634.601, diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index 3a1138c372193..7be1e7b0d1816 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -517,6 +517,35 @@ def condition(self, fragment, stepList, key, hasHarvest): '--procModifiers': 'mlpf' } + +# ECAL DeepSC clustering studies workflow +class UpgradeWorkflow_ecalclustering(UpgradeWorkflow): + def setup_(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: + stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + def condition(self, fragment, stepList, key, hasHarvest): + return (fragment=="ZEE_14" or fragment=="TTbar_14TeV" or fragment=="WprimeTolNu_M3000_13TeV_pythia8" + or fragment=="DisplacedSUSY_stopToBottom_M_300_1000mm_13" or fragment=="RunEGamma2018D" ) + +upgradeWFs['ecalDeepSC'] = UpgradeWorkflow_ecalclustering( + steps = [ + 'Reco', + 'RecoNano', + ], + PU = [ + 'Reco', + 'RecoNano', + ], + suffix = '_ecalDeepSC', + offset = 0.19, +) +upgradeWFs['ecalDeepSC'].step3 = { + '--datatier': 'RECOSIM,MINIAODSIM,NANOAODSIM,DQMIO', + '--eventcontent': 'RECOSIM,MINIAODSIM,NANOEDMAODSIM,DQM', + '--procModifiers': 'ecal_deepsc' +} + + # photonDRN workflows class UpgradeWorkflow_photonDRN(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): @@ -543,6 +572,7 @@ def condition(self, fragment, stepList, key, hasHarvest): '--procModifiers': 'enableSonicTriton,photonDRN' } + # Patatrack workflows: # - 2018 conditions, TTbar # - 2018 conditions, Z->mumu, diff --git a/RecoEcal/EgammaClusterAlgos/BuildFile.xml b/RecoEcal/EgammaClusterAlgos/BuildFile.xml index 6334a637b8a2f..685870289393a 100644 --- a/RecoEcal/EgammaClusterAlgos/BuildFile.xml +++ b/RecoEcal/EgammaClusterAlgos/BuildFile.xml @@ -1,18 +1,24 @@ + + + + - + + + diff --git a/RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h b/RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h index 4e3558e89fe90..b311b99333ad8 100644 --- a/RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h +++ b/RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h @@ -13,11 +13,22 @@ #include "DataFormats/EgammaReco/interface/BasicCluster.h" #include "DataFormats/EgammaReco/interface/BasicClusterFwd.h" +#include "DataFormats/EcalDetId/interface/EBDetId.h" +#include "DataFormats/EcalDetId/interface/EEDetId.h" +#include "DataFormats/CaloRecHit/interface/CaloCluster.h" #include "DataFormats/EcalRecHit/interface/EcalRecHitCollections.h" +#include "Geometry/CaloTopology/interface/CaloTopology.h" +#include "Geometry/CaloTopology/interface/CaloSubdetectorTopology.h" +#include "Geometry/Records/interface/CaloTopologyRecord.h" +#include "Geometry/CaloGeometry/interface/CaloGeometry.h" +#include "Geometry/CaloGeometry/interface/CaloSubdetectorGeometry.h" + #include "RecoParticleFlow/PFClusterTools/interface/PFEnergyCalibration.h" #include "RecoEcal/EgammaClusterAlgos/interface/SCEnergyCorrectorSemiParm.h" +#include "RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h" +#include "RecoEcal/EgammaCoreTools/interface/CalibratedPFCluster.h" #include "FWCore/Framework/interface/Event.h" #include "FWCore/Framework/interface/ESHandle.h" @@ -37,6 +48,8 @@ #include "CondFormats/EcalObjects/interface/EcalSCDynamicDPhiParameters.h" #include "CondFormats/DataRecord/interface/EcalSCDynamicDPhiParametersRcd.h" +#include "RecoEcal/EgammaCoreTools/interface/SCProducerCache.h" + #include #include @@ -48,31 +61,16 @@ \date July 2012 */ +typedef std::shared_ptr CalibratedClusterPtr; +typedef std::vector CalibratedClusterPtrVector; + class PFECALSuperClusterAlgo { public: - enum clustering_type { kBOX = 1, kMustache = 2 }; + enum clustering_type { kBOX = 1, kMustache = 2, kDeepSC = 3 }; enum energy_weight { kRaw, kCalibratedNoPS, kCalibratedTotal }; - // simple class for associating calibrated energies - class CalibratedPFCluster { - public: - CalibratedPFCluster(const edm::Ptr& p) : cluptr(p) {} - - double energy() const { return cluptr->correctedEnergy(); } - double energy_nocalib() const { return cluptr->energy(); } - double eta() const { return cluptr->positionREP().eta(); } - double phi() const { return cluptr->positionREP().phi(); } - - edm::Ptr the_ptr() const { return cluptr; } - - private: - edm::Ptr cluptr; - }; - typedef std::shared_ptr CalibratedClusterPtr; - typedef std::vector CalibratedClusterPtrVector; - /// constructor - PFECALSuperClusterAlgo(); + PFECALSuperClusterAlgo(const reco::SCProducerCache* cache); void setVerbosityLevel(bool verbose) { verbose_ = verbose; } @@ -110,6 +108,7 @@ class PFECALSuperClusterAlgo { void setCrackCorrections(bool applyCrackCorrections) { applyCrackCorrections_ = applyCrackCorrections; } void setTokens(const edm::ParameterSet&, edm::ConsumesCollector&&); + void update(const edm::EventSetup&); void updateSCParams(const edm::EventSetup&); @@ -129,9 +128,17 @@ class PFECALSuperClusterAlgo { edm::ESGetToken esChannelStatusToken_; edm::ESGetToken ecalMustacheSCParametersToken_; edm::ESGetToken ecalSCDynamicDPhiParametersToken_; + edm::ESGetToken caloTopologyToken_; + edm::ESGetToken caloGeometryToken_; const reco::BeamSpot* beamSpot_; const ESChannelStatus* channelStatus_; + const CaloGeometry* geometry_; + const CaloSubdetectorGeometry* ebGeom_; + const CaloSubdetectorGeometry* eeGeom_; + const CaloSubdetectorGeometry* esGeom_; + const CaloTopology* topology_; + const EcalMustacheSCParameters* mustacheSCParams_; const EcalSCDynamicDPhiParameters* scDynamicDPhiParams_; @@ -144,7 +151,10 @@ class PFECALSuperClusterAlgo { clustering_type _clustype; energy_weight _eweight; void buildAllSuperClusters(CalibratedClusterPtrVector&, double seedthresh); - void buildSuperCluster(CalibratedClusterPtr&, CalibratedClusterPtrVector&); + void buildAllSuperClustersMustacheOrBox(CalibratedClusterPtrVector&, double seedthresh); + void buildAllSuperClustersDeepSC(CalibratedClusterPtrVector&, double seedthresh); + void buildSuperClusterMustacheOrBox(CalibratedClusterPtr&, CalibratedClusterPtrVector&); + void finalizeSuperCluster(CalibratedClusterPtr& seed, CalibratedClusterPtrVector& clustered, bool isEE); bool verbose_; @@ -173,6 +183,8 @@ class PFECALSuperClusterAlgo { bool applyCrackCorrections_; bool threshIsET_; + const reco::SCProducerCache* SCProducerCache_; + // OOT photons bool isOOTCollection_; edm::EDGetTokenT inputTagBarrelRecHits_; diff --git a/RecoEcal/EgammaClusterAlgos/src/PFECALSuperClusterAlgo.cc b/RecoEcal/EgammaClusterAlgos/src/PFECALSuperClusterAlgo.cc index 92b0ee63e7f6d..15c7a82c9325f 100644 --- a/RecoEcal/EgammaClusterAlgos/src/PFECALSuperClusterAlgo.cc +++ b/RecoEcal/EgammaClusterAlgos/src/PFECALSuperClusterAlgo.cc @@ -1,8 +1,11 @@ #include "RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h" +#include "RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h" #include "CommonTools/ParticleFlow/interface/PFClusterWidthAlgo.h" #include "RecoParticleFlow/PFClusterTools/interface/LinkByRecHit.h" #include "DataFormats/ParticleFlowReco/interface/PFLayer.h" #include "DataFormats/ParticleFlowReco/interface/PFRecHit.h" +#include "DataFormats/EcalDetId/interface/EBDetId.h" +#include "DataFormats/EcalDetId/interface/EEDetId.h" #include "DataFormats/EcalDetId/interface/ESDetId.h" #include "DataFormats/HcalDetId/interface/HcalDetId.h" #include "RecoEcal/EgammaCoreTools/interface/Mustache.h" @@ -26,8 +29,6 @@ namespace { typedef edm::View PFClusterView; typedef edm::Ptr PFClusterPtr; typedef edm::PtrVector PFClusterPtrVector; - typedef PFECALSuperClusterAlgo::CalibratedClusterPtr CalibClusterPtr; - typedef PFECALSuperClusterAlgo::CalibratedClusterPtrVector CalibClusterPtrVector; typedef std::pair EEPSPair; bool sortByKey(const EEPSPair& a, const EEPSPair& b) { return a.first < b.first; } @@ -37,23 +38,23 @@ namespace { return energy * std::sqrt(v.perp2() / v.mag2()); } - bool greaterByEt(const CalibClusterPtr& x, const CalibClusterPtr& y) { + bool greaterByEt(const CalibratedClusterPtr& x, const CalibratedClusterPtr& y) { const math::XYZPoint zero(0, 0, 0); - const double xpt = ptFast(x->energy(), x->the_ptr()->position(), zero); - const double ypt = ptFast(y->energy(), y->the_ptr()->position(), zero); + const double xpt = ptFast(x->energy(), x->ptr()->position(), zero); + const double ypt = ptFast(y->energy(), y->ptr()->position(), zero); return xpt > ypt; } - bool isSeed(const CalibClusterPtr& x, double threshold, bool useETcut) { + bool isSeed(const CalibratedClusterPtr& x, double threshold, bool useETcut) { const math::XYZPoint zero(0, 0, 0); double e_or_et = x->energy(); if (useETcut) - e_or_et = ptFast(e_or_et, x->the_ptr()->position(), zero); + e_or_et = ptFast(e_or_et, x->ptr()->position(), zero); return e_or_et > threshold; } - bool isLinkedByRecHit(const CalibClusterPtr& x, - const CalibClusterPtr& seed, + bool isLinkedByRecHit(const CalibratedClusterPtr& x, + const CalibratedClusterPtr& seed, const double threshold, const double majority, const double maxDEta, @@ -67,8 +68,8 @@ namespace { return false; } // now see if the clusters overlap in rechits - const auto& seedHitsAndFractions = seed->the_ptr()->hitsAndFractions(); - const auto& xHitsAndFractions = x->the_ptr()->hitsAndFractions(); + const auto& seedHitsAndFractions = seed->ptr()->hitsAndFractions(); + const auto& xHitsAndFractions = x->ptr()->hitsAndFractions(); double x_rechits_tot = xHitsAndFractions.size(); double x_rechits_match = 0.0; for (const std::pair& seedHit : seedHitsAndFractions) { @@ -81,8 +82,8 @@ namespace { return x_rechits_match / x_rechits_tot > majority; } - bool isClustered(const CalibClusterPtr& x, - const CalibClusterPtr seed, + bool isClustered(const CalibratedClusterPtr& x, + const CalibratedClusterPtr seed, const PFECALSuperClusterAlgo::clustering_type type, const EcalMustacheSCParameters* mustache_params, const EcalSCDynamicDPhiParameters* dynamic_dphi_params, @@ -107,14 +108,15 @@ namespace { } // namespace -PFECALSuperClusterAlgo::PFECALSuperClusterAlgo() : beamSpot_(nullptr) {} +PFECALSuperClusterAlgo::PFECALSuperClusterAlgo(const reco::SCProducerCache* cache) + : beamSpot_(nullptr), SCProducerCache_(cache) {} void PFECALSuperClusterAlgo::setPFClusterCalibration(const std::shared_ptr& calib) { _pfEnergyCalibration = calib; } void PFECALSuperClusterAlgo::setTokens(const edm::ParameterSet& iConfig, edm::ConsumesCollector&& cc) { - inputTagPFClusters_ = cc.consumes >(iConfig.getParameter("PFClusters")); + inputTagPFClusters_ = cc.consumes>(iConfig.getParameter("PFClusters")); inputTagPFClustersES_ = cc.consumes(iConfig.getParameter("ESAssociation")); inputTagBeamSpot_ = cc.consumes(iConfig.getParameter("BeamSpot")); @@ -137,10 +139,14 @@ void PFECALSuperClusterAlgo::setTokens(const edm::ParameterSet& iConfig, edm::Co regr_->setTokens(regconf, cc); } - if (isOOTCollection_) { // OOT photons only + if (isOOTCollection_ || _clustype == PFECALSuperClusterAlgo::kDeepSC) { // OOT photons and DeepSC uses rechits inputTagBarrelRecHits_ = cc.consumes(iConfig.getParameter("barrelRecHits")); inputTagEndcapRecHits_ = cc.consumes(iConfig.getParameter("endcapRecHits")); } + if (_clustype == PFECALSuperClusterAlgo::kDeepSC) { // DeepSC uses geometry also + caloTopologyToken_ = cc.esConsumes(); + caloGeometryToken_ = cc.esConsumes(); + } } void PFECALSuperClusterAlgo::update(const edm::EventSetup& setup) { @@ -153,6 +159,17 @@ void PFECALSuperClusterAlgo::update(const edm::EventSetup& setup) { edm::ESHandle esChannelStatusHandle_ = setup.getHandle(esChannelStatusToken_); channelStatus_ = esChannelStatusHandle_.product(); + + if (_clustype == PFECALSuperClusterAlgo::kDeepSC) { // DeepSC uses geometry + edm::ESHandle caloGeometryHandle_ = setup.getHandle(caloGeometryToken_); + geometry_ = caloGeometryHandle_.product(); + ebGeom_ = caloGeometryHandle_->getSubdetectorGeometry(DetId::Ecal, EcalBarrel); + eeGeom_ = caloGeometryHandle_->getSubdetectorGeometry(DetId::Ecal, EcalEndcap); + esGeom_ = caloGeometryHandle_->getSubdetectorGeometry(DetId::Ecal, EcalPreshower); + + edm::ESHandle caloTopologyHandle_ = setup.getHandle(caloTopologyToken_); + topology_ = caloTopologyHandle_.product(); + } } void PFECALSuperClusterAlgo::updateSCParams(const edm::EventSetup& setup) { @@ -167,7 +184,7 @@ void PFECALSuperClusterAlgo::updateSCParams(const edm::EventSetup& setup) { void PFECALSuperClusterAlgo::loadAndSortPFClusters(const edm::Event& iEvent) { //load input collections //Load the pfcluster collections - edm::Handle > pfclustersHandle; + edm::Handle> pfclustersHandle; iEvent.getByToken(inputTagPFClusters_, pfclustersHandle); edm::Handle psAssociationHandle; @@ -196,7 +213,7 @@ void PFECALSuperClusterAlgo::loadAndSortPFClusters(const edm::Event& iEvent) { //Select PF clusters available for the clustering for (size_t i = 0; i < clusters.size(); ++i) { auto cluster = clusters.ptrAt(i); - LogDebug("PFClustering") << "Loading PFCluster i=" << cluster.key() << " energy=" << cluster->energy() << std::endl; + LogDebug("PFClustering") << "Loading PFCluster i=" << cluster.key() << " energy=" << cluster->energy(); // protection for sim clusters if (cluster->caloID().detectors() == 0 && cluster->hitsAndFractions().empty()) @@ -224,13 +241,13 @@ void PFECALSuperClusterAlgo::loadAndSortPFClusters(const edm::Event& iEvent) { std::sort(_clustersEB.begin(), _clustersEB.end(), greaterByEt); std::sort(_clustersEE.begin(), _clustersEE.end(), greaterByEt); - // set recHit collections for OOT photons - if (isOOTCollection_) { + // set recHit collections for OOT photons and DeepSC + if (isOOTCollection_ || _clustype == PFECALSuperClusterAlgo::kDeepSC) { edm::Handle barrelRecHitsHandle; iEvent.getByToken(inputTagBarrelRecHits_, barrelRecHitsHandle); if (!barrelRecHitsHandle.isValid()) { throw cms::Exception("PFECALSuperClusterAlgo") - << "If you use OOT photons, need to specify proper barrel rec hit collection"; + << "If you use OOT photons or DeepSC, need to specify proper barrel rec hit collection"; } barrelRecHits_ = barrelRecHitsHandle.product(); @@ -238,7 +255,7 @@ void PFECALSuperClusterAlgo::loadAndSortPFClusters(const edm::Event& iEvent) { iEvent.getByToken(inputTagEndcapRecHits_, endcapRecHitsHandle); if (!endcapRecHitsHandle.isValid()) { throw cms::Exception("PFECALSuperClusterAlgo") - << "If you use OOT photons, need to specify proper endcap rec hit collection"; + << "If you use OOT photons or DeepSC, need to specify proper endcap rec hit collection"; } endcapRecHits_ = endcapRecHitsHandle.product(); } @@ -251,24 +268,70 @@ void PFECALSuperClusterAlgo::run() { buildAllSuperClusters(_clustersEE, threshPFClusterSeedEndcap_); } -void PFECALSuperClusterAlgo::buildAllSuperClusters(CalibClusterPtrVector& clusters, double seedthresh) { +void PFECALSuperClusterAlgo::buildAllSuperClusters(CalibratedClusterPtrVector& clusters, double seedthresh) { + if (_clustype == PFECALSuperClusterAlgo::kMustache || _clustype == PFECALSuperClusterAlgo::kBOX) + buildAllSuperClustersMustacheOrBox(clusters, seedthresh); + else if (_clustype == PFECALSuperClusterAlgo::kDeepSC) + buildAllSuperClustersDeepSC(clusters, seedthresh); +} + +void PFECALSuperClusterAlgo::buildAllSuperClustersMustacheOrBox(CalibratedClusterPtrVector& clusters, + double seedthresh) { auto seedable = std::bind(isSeed, _1, seedthresh, threshIsET_); + // make sure only seeds appear at the front of the list of clusters std::stable_partition(clusters.begin(), clusters.end(), seedable); + // in each iteration we are working on a list that is already sorted // in the cluster energy and remains so through each iteration // NB: since clusters is sorted in loadClusters any_of has O(1) // timing until you run out of seeds! while (std::any_of(clusters.cbegin(), clusters.cend(), seedable)) { - buildSuperCluster(clusters.front(), clusters); + buildSuperClusterMustacheOrBox(clusters.front(), clusters); } } -void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClusterPtrVector& clusters) { +void PFECALSuperClusterAlgo::buildAllSuperClustersDeepSC(CalibratedClusterPtrVector& clusters, double seedthresh) { + auto seedable = std::bind(isSeed, _1, seedthresh, threshIsET_); + // EcalClustersGraph utility class for DeepSC algorithm application + // make sure only seeds appear at the front of the list of clusters + auto last_seed = std::stable_partition(clusters.begin(), clusters.end(), seedable); + + reco::EcalClustersGraph ecalClusterGraph_{clusters, + static_cast(std::distance(clusters.begin(), last_seed)), + topology_, + ebGeom_, + eeGeom_, + barrelRecHits_, + endcapRecHits_, + SCProducerCache_}; + // Build sub-regions of the detector where the DeepSC algo will be run + ecalClusterGraph_.initWindows(); + // For each sub-region, prepare the DeepSC input tensors + ecalClusterGraph_.fillVariables(); + // Evaluate the DeepSC algorithm and save the scores + ecalClusterGraph_.evaluateScores(); + // Select the final SuperCluster using the CollectionStrategy defined in the cfi + ecalClusterGraph_.setThresholds(); + ecalClusterGraph_.selectClusters(); + // Extract the final SuperCluster collection + reco::EcalClustersGraph::EcalGraphOutput windows = ecalClusterGraph_.getGraphOutput(); + for (auto& [seed, clustered] : windows) { + bool isEE = false; + if (seed->ptr()->layer() == PFLayer::ECAL_ENDCAP) + isEE = true; + finalizeSuperCluster(seed, clustered, isEE); + } +} + +void PFECALSuperClusterAlgo::buildSuperClusterMustacheOrBox(CalibratedClusterPtr& seed, + CalibratedClusterPtrVector& clusters) { + CalibratedClusterPtrVector clustered; + double etawidthSuperCluster = 0.0; double phiwidthSuperCluster = 0.0; bool isEE = false; - switch (seed->the_ptr()->layer()) { + switch (seed->ptr()->layer()) { case PFLayer::ECAL_BARREL: phiwidthSuperCluster = phiwidthSuperClusterBarrel_; etawidthSuperCluster = etawidthSuperClusterBarrel_; @@ -286,6 +349,7 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust default: break; } + auto isClusteredWithSeed = std::bind(isClustered, _1, seed, @@ -295,6 +359,7 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust useDynamicDPhi_, etawidthSuperCluster, phiwidthSuperCluster); + auto matchesSeedByRecHit = std::bind(isLinkedByRecHit, _1, seed, satelliteThreshold_, fractionForMajority_, 0.1, 0.2); // this function shuffles the list of clusters into a list @@ -337,28 +402,37 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust // move the clustered clusters out of available cluster list // and into a temporary vector for building the SC - CalibratedClusterPtrVector clustered(clusters.begin(), not_clustered); + CalibratedClusterPtrVector clustered_tmp(clusters.begin(), not_clustered); + clustered = clustered_tmp; clusters.erase(clusters.begin(), not_clustered); + + // Finalize the SuperCluster passing the list of clustered clusters + finalizeSuperCluster(seed, clustered, isEE); +} + +void PFECALSuperClusterAlgo::finalizeSuperCluster(CalibratedClusterPtr& seed, + CalibratedClusterPtrVector& clustered, + bool isEE) { // need the vector of raw pointers for a PF width class std::vector bare_ptrs; // calculate necessary parameters and build the SC double posX(0), posY(0), posZ(0), corrSCEnergy(0), corrPS1Energy(0), corrPS2Energy(0), energyweight(0), energyweighttot(0); - for (auto& clus : clustered) { + for (const auto& clus : clustered) { double ePS1 = 0.0; double ePS2 = 0.0; energyweight = clus->energy_nocalib(); - bare_ptrs.push_back(clus->the_ptr().get()); + bare_ptrs.push_back(clus->ptr().get()); // update EE calibrated super cluster energies if (isEE) { - auto ee_key_val = std::make_pair(clus->the_ptr().key(), edm::Ptr()); + auto ee_key_val = std::make_pair(clus->ptr().key(), edm::Ptr()); const auto clustops = std::equal_range(EEtoPS_->begin(), EEtoPS_->end(), ee_key_val, sortByKey); std::vector psClusterPointers; for (auto i_ps = clustops.first; i_ps != clustops.second; ++i_ps) { psClusterPointers.push_back(i_ps->second.get()); } auto calibratedEnergies = _pfEnergyCalibration->calibrateEndcapClusterEnergies( - *(clus->the_ptr()), psClusterPointers, *channelStatus_, applyCrackCorrections_); + *(clus->ptr()), psClusterPointers, *channelStatus_, applyCrackCorrections_); ePS1 = calibratedEnergies.ps1Energy; ePS2 = calibratedEnergies.ps2Energy; } @@ -380,7 +454,7 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust default: break; } - const math::XYZPoint& cluspos = clus->the_ptr()->position(); + const math::XYZPoint& cluspos = clus->ptr()->position(); posX += energyweight * cluspos.X(); posY += energyweight * cluspos.Y(); posZ += energyweight * cluspos.Z(); @@ -397,19 +471,19 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust // now build the supercluster reco::SuperCluster new_sc(corrSCEnergy, math::XYZPoint(posX, posY, posZ)); new_sc.setCorrectedEnergy(corrSCEnergy); - new_sc.setSeed(clustered.front()->the_ptr()); + new_sc.setSeed(clustered.front()->ptr()); new_sc.setPreshowerEnergy(corrPS1Energy + corrPS2Energy); new_sc.setPreshowerEnergyPlane1(corrPS1Energy); new_sc.setPreshowerEnergyPlane2(corrPS2Energy); for (const auto& clus : clustered) { - new_sc.addCluster(clus->the_ptr()); + new_sc.addCluster(clus->ptr()); - auto& hits_and_fractions = clus->the_ptr()->hitsAndFractions(); + auto& hits_and_fractions = clus->ptr()->hitsAndFractions(); for (auto& hit_and_fraction : hits_and_fractions) { new_sc.addHitAndFraction(hit_and_fraction.first, hit_and_fraction.second); } if (isEE) { - auto ee_key_val = std::make_pair(clus->the_ptr().key(), edm::Ptr()); + auto ee_key_val = std::make_pair(clus->ptr().key(), edm::Ptr()); const auto clustops = std::equal_range(EEtoPS_->begin(), EEtoPS_->end(), ee_key_val, sortByKey); // EE rechits should be uniquely matched to sets of pre-shower // clusters at this point, so we throw an exception if otherwise @@ -456,7 +530,7 @@ void PFECALSuperClusterAlgo::buildSuperCluster(CalibClusterPtr& seed, CalibClust double scEtBS = ptFast(new_sc.energy(), new_sc.position(), beamSpot_->position()); if (scEtBS > threshSuperClusterEt_) { - switch (seed->the_ptr()->layer()) { + switch (seed->ptr()->layer()) { case PFLayer::ECAL_BARREL: if (isOOTCollection_) { DetId seedId = new_sc.seed()->seed(); diff --git a/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusterECAL_cfi.py b/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusterECAL_cfi.py index 2c81cf48e0e4f..e15dda00ca50a 100644 --- a/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusterECAL_cfi.py +++ b/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusterECAL_cfi.py @@ -1,10 +1,22 @@ import FWCore.ParameterSet.Config as cms from RecoEcal.EgammaClusterProducers.particleFlowSuperClusterECALMustache_cfi import particleFlowSuperClusterECALMustache as _particleFlowSuperClusterECALMustache - -# define the default ECAL clustering (Mustache or Box) +# define the default ECAL clustering (Mustache or Box or DeepSC) particleFlowSuperClusterECAL = _particleFlowSuperClusterECALMustache.clone() +from Configuration.ProcessModifiers.ecal_deepsc_cff import ecal_deepsc +_particleFlowSuperClusterECALDeepSC = _particleFlowSuperClusterECALMustache.clone( + ClusteringType = "DeepSC", + deepSuperClusterConfig = cms.PSet( + modelFile = cms.string("RecoEcal/EgammaClusterProducers/data/DeepSCModels/EOY_2018/model.pb"), + configFileClusterFeatures = cms.string("RecoEcal/EgammaClusterProducers/data/DeepSCModels/EOY_2018/config_clusters_inputs.txt"), + configFileWindowFeatures = cms.string("RecoEcal/EgammaClusterProducers/data/DeepSCModels/EOY_2018/config_window_inputs.txt"), + configFileHitsFeatures = cms.string("RecoEcal/EgammaClusterProducers/data/DeepSCModels/EOY_2018/config_hits_inputs.txt"), + collectionStrategy = cms.string("Cascade") + ) +) +ecal_deepsc.toReplaceWith(particleFlowSuperClusterECAL, _particleFlowSuperClusterECALDeepSC) + from Configuration.ProcessModifiers.pp_on_AA_cff import pp_on_AA pp_on_AA.toModify(particleFlowSuperClusterECAL, useDynamicDPhiWindow = False, phiwidth_SuperClusterBarrel = 0.20, @@ -15,3 +27,4 @@ thresh_SCEt = 1.0, thresh_PFClusterSeedBarrel = 0.5, thresh_PFClusterSeedEndcap = 0.5) + diff --git a/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusteringSequence_cff.py b/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusteringSequence_cff.py index cc9c95c3780ac..53ddabd9cdf1e 100644 --- a/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusteringSequence_cff.py +++ b/RecoEcal/EgammaClusterProducers/python/particleFlowSuperClusteringSequence_cff.py @@ -35,4 +35,3 @@ _phase2_hgcal_particleFlowSuperClusteringTask.add(particleFlowSuperClusterHGCal) phase2_hgcal.toReplaceWith( particleFlowSuperClusteringTask, _phase2_hgcal_particleFlowSuperClusteringTask ) - diff --git a/RecoEcal/EgammaClusterProducers/src/PFECALSuperClusterProducer.cc b/RecoEcal/EgammaClusterProducers/src/PFECALSuperClusterProducer.cc index 83f8440cd3b7e..92a7e7bc435b4 100644 --- a/RecoEcal/EgammaClusterProducers/src/PFECALSuperClusterProducer.cc +++ b/RecoEcal/EgammaClusterProducers/src/PFECALSuperClusterProducer.cc @@ -1,9 +1,3 @@ -/**\class PFECALSuperClusterProducer - -\author Nicolas Chanon -Additional authors for Mustache: Y. Gershtein, R. Patel, L. Gray -\date July 2012 -*/ #include "CondFormats/DataRecord/interface/GBRWrapperRcd.h" #include "CondFormats/GBRForest/interface/GBRForest.h" @@ -30,25 +24,41 @@ Additional authors for Mustache: Y. Gershtein, R. Patel, L. Gray #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/ParameterSet/interface/EmptyGroupDescription.h" #include "Geometry/CaloTopology/interface/CaloTopology.h" #include "Geometry/Records/interface/CaloTopologyRecord.h" #include "RecoEcal/EgammaClusterAlgos/interface/PFECALSuperClusterAlgo.h" #include "RecoEcal/EgammaClusterAlgos/interface/SCEnergyCorrectorSemiParm.h" #include "RecoEcal/EgammaCoreTools/interface/EcalClusterTools.h" - +#include "RecoEcal/EgammaCoreTools/interface/SCProducerCache.h" #include "TVector2.h" #include #include -class PFECALSuperClusterProducer : public edm::stream::EDProducer<> { +/* + * class PFECALSuperClusterProducer + * author Nicolas Chanon + * Additional authors for Mustache: Y. Gershtein, R. Patel, L. Gray + * Additional authors for DeepSC: D.Valsecchi, B.Marzocchi + * date July 2012 + * updates Feb 2022 + */ + +class PFECALSuperClusterProducer : public edm::stream::EDProducer> { public: - explicit PFECALSuperClusterProducer(const edm::ParameterSet&); + explicit PFECALSuperClusterProducer(const edm::ParameterSet&, const reco::SCProducerCache* gcache); ~PFECALSuperClusterProducer() override; void beginLuminosityBlock(const edm::LuminosityBlock&, const edm::EventSetup&) override; void produce(edm::Event&, const edm::EventSetup&) override; + static std::unique_ptr initializeGlobalCache(const edm::ParameterSet& config) { + return std::make_unique(config); + } + + static void globalEndJob(const reco::SCProducerCache*){}; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); private: @@ -81,18 +91,22 @@ class PFECALSuperClusterProducer : public edm::stream::EDProducer<> { DEFINE_FWK_MODULE(PFECALSuperClusterProducer); using namespace std; + using namespace edm; namespace { const std::string ClusterType__BOX("Box"); const std::string ClusterType__Mustache("Mustache"); + const std::string ClusterType__DeepSC("DeepSC"); const std::string EnergyWeight__Raw("Raw"); const std::string EnergyWeight__CalibratedNoPS("CalibratedNoPS"); const std::string EnergyWeight__CalibratedTotal("CalibratedTotal"); } // namespace -PFECALSuperClusterProducer::PFECALSuperClusterProducer(const edm::ParameterSet& iConfig) { +PFECALSuperClusterProducer::PFECALSuperClusterProducer(const edm::ParameterSet& iConfig, + const reco::SCProducerCache* gcache) + : superClusterAlgo_(gcache) { verbose_ = iConfig.getUntrackedParameter("verbose", false); superClusterAlgo_.setUseRegression(iConfig.getParameter("useRegression")); @@ -105,14 +119,12 @@ PFECALSuperClusterProducer::PFECALSuperClusterProducer(const edm::ParameterSet& _theclusteringtype = PFECALSuperClusterAlgo::kBOX; } else if (_typename == ClusterType__Mustache) { _theclusteringtype = PFECALSuperClusterAlgo::kMustache; + } else if (_typename == ClusterType__DeepSC) { + _theclusteringtype = PFECALSuperClusterAlgo::kDeepSC; } else { throw cms::Exception("InvalidClusteringType") << "You have not chosen a valid clustering type," - << " please choose from \"Box\" or \"Mustache\"!"; + << " please choose from \"Box\" or \"Mustache\" or \"DeepSC\"!"; } - superClusterAlgo_.setClusteringType(_theclusteringtype); - superClusterAlgo_.setUseDynamicDPhi(iConfig.getParameter("useDynamicDPhiWindow")); - // clusteringType and useDynamicDPhi need to be defined before setting the tokens in order to esConsume only the necessary records - superClusterAlgo_.setTokens(iConfig, consumesCollector()); std::string _weightname = iConfig.getParameter("EnergyWeight"); if (_weightname == EnergyWeight__Raw) { @@ -130,6 +142,8 @@ PFECALSuperClusterProducer::PFECALSuperClusterProducer(const edm::ParameterSet& // parameters for clustering bool seedThresholdIsET = iConfig.getParameter("seedThresholdIsET"); + bool useDynamicDPhi = iConfig.getParameter("useDynamicDPhiWindow"); + double threshPFClusterSeedBarrel = iConfig.getParameter("thresh_PFClusterSeedBarrel"); double threshPFClusterBarrel = iConfig.getParameter("thresh_PFClusterBarrel"); @@ -147,6 +161,11 @@ PFECALSuperClusterProducer::PFECALSuperClusterProducer(const edm::ParameterSet& double satelliteMajorityFraction = iConfig.getParameter("satelliteMajorityFraction"); bool dropUnseedable = iConfig.getParameter("dropUnseedable"); + superClusterAlgo_.setClusteringType(_theclusteringtype); + superClusterAlgo_.setUseDynamicDPhi(useDynamicDPhi); + // clusteringType and useDynamicDPhi need to be defined before setting the tokens in order to esConsume only the necessary records + superClusterAlgo_.setTokens(iConfig, consumesCollector()); + superClusterAlgo_.setVerbosityLevel(verbose_); superClusterAlgo_.setEnergyWeighting(_theenergyweight); superClusterAlgo_.setUseETForSeeding(seedThresholdIsET); @@ -354,7 +373,6 @@ void PFECALSuperClusterProducer::fillDescriptions(edm::ConfigurationDescriptions desc.add("PFBasicClusterCollectionEndcap", "particleFlowBasicClusterECALEndcap"); desc.add("PFClusters", edm::InputTag("particleFlowClusterECAL")); desc.add("thresh_PFClusterSeedBarrel", 1.0); - desc.add("ClusteringType", "Mustache"); desc.add("EnergyWeight", "Raw"); desc.add("BeamSpot", edm::InputTag("offlineBeamSpot")); desc.add("thresh_PFClusterSeedEndcap", 1.0); @@ -367,5 +385,29 @@ void PFECALSuperClusterProducer::fillDescriptions(edm::ConfigurationDescriptions desc.add("PFSuperClusterCollectionEndcapWithPreshower", "particleFlowSuperClusterECALEndcapWithPreshower"); desc.add("dropUnseedable", false); + + edm::ParameterSetDescription deepSCParams; + deepSCParams.add("modelFile", ""); + deepSCParams.add("configFileClusterFeatures", ""); + deepSCParams.add("configFileWindowFeatures", ""); + deepSCParams.add("configFileHitsFeatures", ""); + deepSCParams.add("nClusterFeatures", 12); + deepSCParams.add("nWindowFeatures", 18); + deepSCParams.add("nHitsFeatures", 4); + deepSCParams.add("maxNClusters", 40); + deepSCParams.add("maxNRechits", 40); + deepSCParams.add("batchSize", 64); + deepSCParams.add("collectionStrategy", "Cascade"); + + EmptyGroupDescription emptyGroup; + + // Add DeepSC parameters only to the specific ClusteringType + edm::ParameterSwitch switchNode( + edm::ParameterDescription("ClusteringType", ClusterType__Mustache, true), + ClusterType__Mustache >> emptyGroup or ClusterType__BOX >> emptyGroup or + ClusterType__DeepSC >> + edm::ParameterDescription("deepSuperClusterConfig", deepSCParams, true)); + desc.addNode(switchNode); + descriptions.add("particleFlowSuperClusterECALMustache", desc); } diff --git a/RecoEcal/EgammaCoreTools/BuildFile.xml b/RecoEcal/EgammaCoreTools/BuildFile.xml index 82cb929f52e52..3287680adf306 100644 --- a/RecoEcal/EgammaCoreTools/BuildFile.xml +++ b/RecoEcal/EgammaCoreTools/BuildFile.xml @@ -1,15 +1,21 @@ + + + + + - + + diff --git a/RecoEcal/EgammaCoreTools/interface/CalibratedPFCluster.h b/RecoEcal/EgammaCoreTools/interface/CalibratedPFCluster.h new file mode 100644 index 0000000000000..ed38058688237 --- /dev/null +++ b/RecoEcal/EgammaCoreTools/interface/CalibratedPFCluster.h @@ -0,0 +1,24 @@ +#ifndef RecoEcal_EgammaCoreTools_CalibratedPFCluster_h +#define RecoEcal_EgammaCoreTools_CalibratedPFCluster_h + +#include "DataFormats/ParticleFlowReco/interface/PFCluster.h" +#include "DataFormats/ParticleFlowReco/interface/PFClusterFwd.h" +#include "DataFormats/CaloRecHit/interface/CaloCluster.h" + +// simple class for associating calibrated energies +class CalibratedPFCluster { +public: + CalibratedPFCluster(const edm::Ptr& p) : ptr_(p) {} + + double energy() const { return ptr_->correctedEnergy(); } + double energy_nocalib() const { return ptr_->energy(); } + double eta() const { return ptr_->positionREP().eta(); } + double phi() const { return ptr_->positionREP().phi(); } + + edm::Ptr ptr() const { return ptr_; } + +private: + edm::Ptr ptr_; +}; + +#endif diff --git a/RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h b/RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h new file mode 100644 index 0000000000000..e93b3b70e5972 --- /dev/null +++ b/RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h @@ -0,0 +1,98 @@ +#ifndef RecoEcal_EgammaCoreTools_DeepSCGraphEvaluation_h +#define RecoEcal_EgammaCoreTools_DeepSCGraphEvaluation_h + +#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" +#include +#include +#include +#include +#include +#include + +//author: Davide Valsecchi +//description: +// Handles Tensorflow DNN graphs and variables scaler configuration. +// To be used for DeepSC. + +namespace reco { + + struct DeepSCConfiguration { + std::string modelFile; + std::string configFileClusterFeatures; + std::string configFileWindowFeatures; + std::string configFileHitsFeatures; + uint nClusterFeatures; + uint nWindowFeatures; + uint nHitsFeatures; + uint maxNClusters; + uint maxNRechits; + uint batchSize; + std::string collectionStrategy; + }; + + /* + * Structure representing the detector windows of a single events, to be evaluated with the DeepSC model. + * The index structure is described in the following + */ + + namespace DeepSCInputs { + enum ScalerType { + MeanRms, // scale as (var - mean)/rms + MinMax, // scale as (var - min) (max-min) + None // do nothing + }; + struct InputConfig { + // Each input variable is represented by the tuple + std::string varName; + ScalerType type; + float par1; + float par2; + }; + typedef std::vector InputConfigs; + typedef std::map FeaturesMap; + + struct Inputs { + std::vector>> clustersX; + std::vector>>> hitsX; + std::vector> windowX; + std::vector> isSeed; + }; + + }; // namespace DeepSCInputs + + class DeepSCGraphEvaluation { + public: + DeepSCGraphEvaluation(const DeepSCConfiguration&); + ~DeepSCGraphEvaluation(); + + std::vector getScaledInputs(const DeepSCInputs::FeaturesMap& variables, + const DeepSCInputs::InputConfigs& config) const; + + std::vector> evaluate(const DeepSCInputs::Inputs& inputs) const; + + // List of input variables names used to check the variables request as + // inputs in a dynamic way from configuration file. + // If an input variables is not found at construction time an expection is thrown. + static const std::vector availableClusterInputs; + static const std::vector availableWindowInputs; + static const std::vector availableHitsInputs; + + // Configuration of the input variables including the scaling parameters. + // The list is used to define the vector of input features passed to the tensorflow model. + DeepSCInputs::InputConfigs inputFeaturesClusters; + DeepSCInputs::InputConfigs inputFeaturesWindows; + DeepSCInputs::InputConfigs inputFeaturesHits; + + private: + void initTensorFlowGraphAndSession(); + DeepSCInputs::InputConfigs readInputFeaturesConfig(std::string file, + const std::vector& availableInputs) const; + + const DeepSCConfiguration cfg_; + std::unique_ptr graphDef_; + tensorflow::Session* session_; + }; + +}; // namespace reco + +#endif diff --git a/RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h b/RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h new file mode 100644 index 0000000000000..5a22442fd4721 --- /dev/null +++ b/RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h @@ -0,0 +1,123 @@ +#ifndef RecoEcal_EgammaCoreTools_EcalClustersGraph_h +#define RecoEcal_EgammaCoreTools_EcalClustersGraph_h + +/** + \file + Tools for manipulating ECAL Clusters as graphs + \author Davide Valsecchi, Badder Marzocchi + \date 05 October 2020 +*/ + +#include +#include +#include + +#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" +#include "FWCore/Utilities/interface/isFinite.h" + +#include "DataFormats/CaloRecHit/interface/CaloCluster.h" +#include "DataFormats/CaloRecHit/interface/CaloClusterFwd.h" +#include "DataFormats/EgammaReco/interface/SuperCluster.h" +#include "DataFormats/ParticleFlowReco/interface/PFCluster.h" +#include "DataFormats/ParticleFlowReco/interface/PFLayer.h" +#include "DataFormats/Math/interface/deltaPhi.h" + +#include "RecoEcal/EgammaCoreTools/interface/EcalClusterTools.h" +#include "DataFormats/EcalDetId/interface/EBDetId.h" +#include "DataFormats/EcalDetId/interface/EEDetId.h" + +#include "Geometry/CaloTopology/interface/CaloTopology.h" +#include "Geometry/CaloGeometry/interface/CaloGeometry.h" +#include "Geometry/CaloGeometry/interface/CaloSubdetectorGeometry.h" +#include "Geometry/Records/interface/CaloTopologyRecord.h" +#include "Geometry/CaloTopology/interface/CaloSubdetectorTopology.h" +#include "DataFormats/GeometryVector/interface/GlobalPoint.h" +#include "Geometry/EcalAlgo/interface/EcalBarrelGeometry.h" +#include "Geometry/EcalAlgo/interface/EcalEndcapGeometry.h" + +#include "RecoEcal/EgammaCoreTools/interface/CalibratedPFCluster.h" +#include "RecoEcal/EgammaCoreTools/interface/GraphMap.h" +#include "RecoEcal/EgammaCoreTools/interface/SCProducerCache.h" + +/* + * class: EcalClustersGraph + * Authors: D.Valsecchi, B.Marzocchi + * Date: January 2022 + * + * Utility class to handle all the PFClusters in ECAL as a graph. + * The DeepSC algorithm is applied on sub-graphs of clusters to form SuperCluster. + */ + +namespace reco { + + class EcalClustersGraph { + public: + typedef std::shared_ptr CalibratedClusterPtr; + typedef std::vector CalibratedClusterPtrVector; + typedef std::vector> EcalGraphOutput; + + EcalClustersGraph(CalibratedClusterPtrVector clusters, + int nSeeds, + const CaloTopology* topology, + const CaloSubdetectorGeometry* ebGeom, + const CaloSubdetectorGeometry* eeGeom, + const EcalRecHitCollection* recHitsEB, + const EcalRecHitCollection* recHitsEE, + const reco::SCProducerCache* cache); + + void fillVariables(); + + double scoreThreshold(const CaloCluster* cluster); + void initWindows(); + + void setThresholds(); + void evaluateScores(); + void selectClusters(); + + EcalGraphOutput getGraphOutput(); + + private: + std::array clusterPosition(const CaloCluster* cluster) const; + + // Sign flip deltaEta as in the Mustache + double deltaEta(double seed_eta, double cluster_eta) const { + return (1 - 2 * (seed_eta < 0)) * (cluster_eta - seed_eta); + } + + // The dEta-dPhi detector window dimension is chosen to that the algorithm is always larger than + // the Mustache dimension + std::array dynamicWindow(double seedEta) const; + + DeepSCInputs::FeaturesMap computeVariables(const CaloCluster* seed, const CaloCluster* cluster) const; + std::vector> fillHits(const CaloCluster* cluster) const; + DeepSCInputs::FeaturesMap computeWindowVariables(const std::vector& clusters) const; + + std::pair computeCovariances(const CaloCluster* cluster); + std::vector computeShowerShapes(const CaloCluster* cluster, bool full5x5); + + CalibratedClusterPtrVector clusters_; + uint nSeeds_; + uint nCls_; + + std::array locCov_; + std::pair widths_; + + //To compute the input variables + const CaloTopology* topology_; + const CaloSubdetectorGeometry* ebGeom_; + const CaloSubdetectorGeometry* eeGeom_; + const EcalRecHitCollection* recHitsEB_; + const EcalRecHitCollection* recHitsEE_; + const reco::SCProducerCache* scProducerCache_; + + // GraphMap for handling all the windows and scores + reco::GraphMap graphMap_; + reco::GraphMap::CollectionStrategy strategy_; + + // Raw input for the tensorflow DeepSCGraphEvaluation object + reco::DeepSCInputs::Inputs inputs_; + float threshold_; + }; + +} // namespace reco +#endif diff --git a/RecoEcal/EgammaCoreTools/interface/GraphMap.h b/RecoEcal/EgammaCoreTools/interface/GraphMap.h new file mode 100644 index 0000000000000..9d5914ea6664b --- /dev/null +++ b/RecoEcal/EgammaCoreTools/interface/GraphMap.h @@ -0,0 +1,92 @@ +#ifndef RecoEcal_EgammaCoreTools_GraphMap_h +#define RecoEcal_EgammaCoreTools_GraphMap_h + +#include +#include +#include +#include + +/* + * Class handling a sparse graph of clusters. + * + * Author: D. Valsecchi + * Date: 08-02-2022 + */ + +namespace reco { + + class GraphMap { + public: + GraphMap(uint nNodes); + + enum NodeCategory { kNode, kSeed, kNcategories }; + + void addNode(const uint index, const NodeCategory category); + void addNodes(const std::vector &indices, const std::vector &categories); + void addEdge(const uint i, const uint j); + void setAdjMatrix(const uint i, const uint j, const float score); + void setAdjMatrixSym(const uint i, const uint j, const float score); + void printGraphMap(); + + //Getters + const std::vector &getOutEdges(const uint i) const; + const std::vector &getInEdges(const uint i) const; + uint getAdjMatrix(const uint i, const uint j) const; + std::vector getAdjMatrixRow(const uint i) const; + std::vector getAdjMatrixCol(const uint j) const; + + enum CollectionStrategy { + Cascade, // Starting from the highest energy seed, collect all the nodes. + // Other seeds collected by higher energy seeds are ignored + CollectAndMerge, // First, for each simple node keep only the edge with the highest score. + // Then collect all the simple nodes around the other seeds. + // Edges between the seeds nodes are ignored. + // Finally, starting from the first seed, look for linked secondary seeds + // and if they pass the threshold, merge their noded. + SeedsFirst, // Like strategy D, but after solving the edges between the seeds, + // the simple nodes edges are cleaned to keep only the highest score link. + // Then proceed as strategy B. + CascadeHighest // First, for each simple node keep only the edge with the highest score. + // Then proceed as strategy A, from the first seed node cascading to the others. + // Secondary seeds linked are absorbed and ignored in the next iteration: + // this implies that nodes connected to these seed are lost. + }; + + // Output of the collection [{seed, [list of clusters]}] + typedef std::vector>> GraphOutput; + typedef std::map> GraphOutputMap; + // Apply the collection algorithms + void collectNodes(GraphMap::CollectionStrategy strategy, float threshold); + const GraphOutput &getGraphOutput() { return graphOutput_; }; + + private: + uint nNodes_; + // Map with list of indices of nodes for each category + std::map> nodesCategories_; + // Count of nodes for each category + std::map nodesCount_; + // Incoming edges, one list for each node (no distinction between type) + std::vector> edgesIn_; + // Outcoming edges, one list for each node + std::vector> edgesOut_; + // Adjacency matrix (i,j) --> score + // Rows are interpreted as OUT edges + // Columns are interpreted as IN edges + std::map, float> adjMatrix_; + + // Store for the graph collection result + GraphOutput graphOutput_; + + // Functions for the collection strategies + void collectCascading(float threshold); + void assignHighestScoreEdge(); + // Return both the output graph with only seedss and a GraphOutputMap + // of the collected simple nodes from each seed. + std::pair collectSeparately(float threshold); + void mergeSubGraphs(float threshold, GraphOutput seedsGraph, GraphOutputMap nodesGraphMap); + void resolveSuperNodesEdges(float threshold); + }; + +} // namespace reco + +#endif diff --git a/RecoEcal/EgammaCoreTools/interface/SCProducerCache.h b/RecoEcal/EgammaCoreTools/interface/SCProducerCache.h new file mode 100644 index 0000000000000..4406b2c44e14c --- /dev/null +++ b/RecoEcal/EgammaCoreTools/interface/SCProducerCache.h @@ -0,0 +1,38 @@ +#ifndef RecoEcal_EgammaCoreTools_SCProducerCache_h +#define RecoEcal_EgammaCoreTools_SCProducerCache_h + +#include "RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +namespace reco { + + class SCProducerCache { + public: + // Cache for SuperCluster Producer containing Tensorflow objects + SCProducerCache(const edm::ParameterSet& conf) { + // Here we will have to load the DNN PFID if present in the config + auto clustering_type = conf.getParameter("ClusteringType"); + + if (clustering_type == "DeepSC") { + const auto& pset_dnn = conf.getParameter("deepSuperClusterConfig"); + config.modelFile = pset_dnn.getParameter("modelFile"); + config.configFileClusterFeatures = pset_dnn.getParameter("configFileClusterFeatures"); + config.configFileWindowFeatures = pset_dnn.getParameter("configFileWindowFeatures"); + config.configFileHitsFeatures = pset_dnn.getParameter("configFileHitsFeatures"); + config.nClusterFeatures = pset_dnn.getParameter("nClusterFeatures"); + config.nWindowFeatures = pset_dnn.getParameter("nWindowFeatures"); + config.nHitsFeatures = pset_dnn.getParameter("nHitsFeatures"); + config.maxNClusters = pset_dnn.getParameter("maxNClusters"); + config.maxNRechits = pset_dnn.getParameter("maxNRechits"); + config.batchSize = pset_dnn.getParameter("batchSize"); + config.collectionStrategy = pset_dnn.getParameter("collectionStrategy"); + deepSCEvaluator = std::make_unique(config); + } + }; + + std::unique_ptr deepSCEvaluator; + reco::DeepSCConfiguration config; + }; +} // namespace reco + +#endif diff --git a/RecoEcal/EgammaCoreTools/src/DeepSCGraphEvaluation.cc b/RecoEcal/EgammaCoreTools/src/DeepSCGraphEvaluation.cc new file mode 100644 index 0000000000000..9fae05b9b64d0 --- /dev/null +++ b/RecoEcal/EgammaCoreTools/src/DeepSCGraphEvaluation.cc @@ -0,0 +1,250 @@ +#include "RecoEcal/EgammaCoreTools/interface/DeepSCGraphEvaluation.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "TMath.h" +#include +#include +using namespace reco; + +const std::vector DeepSCGraphEvaluation::availableClusterInputs = {"cl_energy", + "cl_et", + "cl_eta", + "cl_phi", + "cl_ieta", + "cl_iphi", + "cl_iz", + "cl_seed_dEta", + "cl_seed_dPhi", + "cl_seed_dEnergy", + "cl_seed_dEt", + "cl_nxtals"}; +const std::vector DeepSCGraphEvaluation::availableWindowInputs = { + "max_cl_energy", "max_cl_et", "max_cl_eta", "max_cl_phi", "max_cl_ieta", "max_cl_iphi", + "max_cl_iz", "max_cl_seed_dEta", "max_cl_seed_dPhi", "max_cl_seed_dEnergy", "max_cl_seed_dEt", "max_cl_nxtals", + "min_cl_energy", "min_cl_et", "min_cl_eta", "min_cl_phi", "min_cl_ieta", "min_cl_iphi", + "min_cl_iz", "min_cl_seed_dEta", "min_cl_seed_dPhi", "min_cl_seed_dEnergy", "min_cl_seed_dEt", "min_cl_nxtals", + "avg_cl_energy", "avg_cl_et", "avg_cl_eta", "avg_cl_phi", "avg_cl_ieta", "avg_cl_iphi", + "avg_cl_iz", "avg_cl_seed_dEta", "avg_cl_seed_dPhi", "avg_cl_seed_dEnergy", "avg_cl_seed_dEt", "avg_cl_nxtals"}; +const std::vector DeepSCGraphEvaluation::availableHitsInputs = {"ieta", "iphi", "iz", "en_withfrac"}; + +DeepSCGraphEvaluation::DeepSCGraphEvaluation(const DeepSCConfiguration& cfg) : cfg_(cfg) { + tensorflow::setLogging("0"); + // Init TF graph and session objects + initTensorFlowGraphAndSession(); + // Init scaler configs + inputFeaturesClusters = + readInputFeaturesConfig(cfg_.configFileClusterFeatures, DeepSCGraphEvaluation::availableClusterInputs); + if (inputFeaturesClusters.size() != cfg_.nClusterFeatures) { + throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and " + << "parameters in the scaler file."; + } + inputFeaturesWindows = + readInputFeaturesConfig(cfg_.configFileWindowFeatures, DeepSCGraphEvaluation::availableWindowInputs); + if (inputFeaturesWindows.size() != cfg_.nWindowFeatures) { + throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and " + << "parameters in the scaler file."; + } + inputFeaturesHits = readInputFeaturesConfig(cfg_.configFileHitsFeatures, DeepSCGraphEvaluation::availableHitsInputs); + if (inputFeaturesHits.size() != cfg_.nHitsFeatures) { + throw cms::Exception("WrongConfiguration") << "Mismatch between number of input features for Clusters and " + << "parameters in the scaler file."; + } +} + +DeepSCGraphEvaluation::~DeepSCGraphEvaluation() { + if (session_ != nullptr) + tensorflow::closeSession(session_); +} + +void DeepSCGraphEvaluation::initTensorFlowGraphAndSession() { + // load the graph definition + LogDebug("DeepSCGraphEvaluation") << "Loading graph"; + graphDef_ = + std::unique_ptr(tensorflow::loadGraphDef(edm::FileInPath(cfg_.modelFile).fullPath())); + LogDebug("DeepSCGraphEvaluation") << "Starting TF sessions"; + session_ = tensorflow::createSession(graphDef_.get()); + LogDebug("DeepSCGraphEvaluation") << "TF ready"; +} + +DeepSCInputs::InputConfigs DeepSCGraphEvaluation::readInputFeaturesConfig( + std::string file, const std::vector& availableInputs) const { + DeepSCInputs::InputConfigs features; + LogDebug("DeepSCGraphEvaluation") << "Reading scaler file: " << edm::FileInPath(file).fullPath(); + std::ifstream inputfile{edm::FileInPath(file).fullPath()}; + if (inputfile.fail()) { + throw cms::Exception("MissingFile") << "Input features config file not found: " << file; + } else { + // Now read mean, scale factors for each variable + float par1, par2; + std::string varName, type_str; + DeepSCInputs::ScalerType type; + while (inputfile >> varName >> type_str >> par1 >> par2) { + if (type_str == "MeanRms") + type = DeepSCInputs::ScalerType::MeanRms; + else if (type_str == "MinMax") + type = DeepSCInputs::ScalerType::MinMax; + else + type = DeepSCInputs::ScalerType::None; //do nothing + features.push_back(DeepSCInputs::InputConfig{.varName = varName, .type = type, .par1 = par1, .par2 = par2}); + // Protection for mismatch between requested variables and the available ones + auto match = std::find(availableInputs.begin(), availableInputs.end(), varName); + if (match == std::end(availableInputs)) { + throw cms::Exception("MissingInput") << "Requested input (" << varName << ") not available between DNN inputs"; + } + LogDebug("DeepSCGraphEvalutation") << "Registered input feature: " << varName << ", scaler=" << type_str; + } + } + return features; +} + +std::vector DeepSCGraphEvaluation::getScaledInputs(const DeepSCInputs::FeaturesMap& variables, + const DeepSCInputs::InputConfigs& config) const { + std::vector inputs; + inputs.reserve(config.size()); + // Loop on the list of requested variables and scaling values + // Different type of scaling are available: 0=no scaling, 1=standard scaler, 2=minmax + for (auto& [varName, type, par1, par2] : config) { + if (type == DeepSCInputs::ScalerType::MeanRms) + inputs.push_back((variables.at(varName) - par1) / par2); + else if (type == DeepSCInputs::ScalerType::MinMax) + inputs.push_back((variables.at(varName) - par1) / (par2 - par1)); + else if (type == DeepSCInputs::ScalerType::None) { + inputs.push_back(variables.at(varName)); // Do nothing on the variable + } + //Protection for mismatch between requested variables and the available ones + // have been added when the scaler config are loaded --> here we know that the variables are available + } + return inputs; +} + +std::vector> DeepSCGraphEvaluation::evaluate(const DeepSCInputs::Inputs& inputs) const { + LogDebug("DeepSCGraphEvaluation") << "Starting evaluation"; + + // Final output + std::vector> outputs_clustering; + + // We need to split the total inputs in N batches of size batchSize (configured in the producer) + // being careful with the last batch which will have less than batchSize elements + size_t nInputs = inputs.clustersX.size(); + uint iB = -1; // batch index + while (nInputs > 0) { + iB++; // go to next batch + size_t nItems; + if (nInputs >= cfg_.batchSize) { + nItems = cfg_.batchSize; + nInputs -= cfg_.batchSize; + } else { + nItems = nInputs; + nInputs = 0; + } + + // Inputs + tensorflow::Tensor clsX_{tensorflow::DT_FLOAT, + {static_cast(nItems), cfg_.maxNClusters, cfg_.nClusterFeatures}}; + tensorflow::Tensor windX_{tensorflow::DT_FLOAT, {static_cast(nItems), cfg_.nWindowFeatures}}; + tensorflow::Tensor hitsX_{tensorflow::DT_FLOAT, + {static_cast(nItems), cfg_.maxNClusters, cfg_.maxNRechits, cfg_.nHitsFeatures}}; + tensorflow::Tensor isSeedX_{tensorflow::DT_FLOAT, {static_cast(nItems), cfg_.maxNClusters, 1}}; + tensorflow::Tensor nClsSize_{tensorflow::DT_FLOAT, {static_cast(nItems)}}; + + // Look on batch dim + for (size_t b = 0; b < nItems; b++) { + const auto& cls_data = inputs.clustersX[iB * cfg_.batchSize + b]; + // Loop on clusters + for (size_t k = 0; k < cfg_.maxNClusters; k++) { + // Loop on features + for (size_t z = 0; z < cfg_.nClusterFeatures; z++) { + if (k < cls_data.size()) { + clsX_.tensor()(b, k, z) = float(cls_data[k][z]); + } else { + clsX_.tensor()(b, k, z) = 0.; + } + } + } + } + + // Look on batch dim + for (size_t b = 0; b < nItems; b++) { + const auto& wind_features = inputs.windowX[iB * cfg_.batchSize + b]; + // Loop on features + for (size_t k = 0; k < cfg_.nWindowFeatures; k++) { + windX_.matrix()(b, k) = float(wind_features[k]); + } + } + + // Look on batch dim + for (size_t b = 0; b < nItems; b++) { + const auto& hits_data = inputs.hitsX[iB * cfg_.batchSize + b]; + size_t ncls_in_window = hits_data.size(); + // Loop on clusters + for (size_t k = 0; k < cfg_.maxNClusters; k++) { + // Check padding + size_t nhits_in_cluster; + if (k < ncls_in_window) + nhits_in_cluster = hits_data[k].size(); + else + nhits_in_cluster = 0; + + // Loop on hits + for (size_t j = 0; j < cfg_.maxNRechits; j++) { + // Check the number of clusters and hits for padding + bool ok = j < nhits_in_cluster; + // Loop on rechits features + for (size_t z = 0; z < cfg_.nHitsFeatures; z++) { + if (ok) + hitsX_.tensor()(b, k, j, z) = float(hits_data[k][j][z]); + else + hitsX_.tensor()(b, k, j, z) = 0.; + } + } + } + } + + // Look on batch dim + for (size_t b = 0; b < nItems; b++) { + const auto& isSeed_data = inputs.isSeed[iB * cfg_.batchSize + b]; + // Loop on clusters + for (size_t k = 0; k < cfg_.maxNClusters; k++) { + if (k < isSeed_data.size()) { + isSeedX_.tensor()(b, k, 0) = float(isSeed_data[k]); + } else { + isSeedX_.tensor()(b, k, 0) = 0.; + } + } + } + + for (size_t b = 0; b < nItems; b++) { + nClsSize_.vec()(b) = float(inputs.clustersX[iB * cfg_.batchSize + b].size()); + } + + std::vector> feed_dict = { + {"input_1", clsX_}, {"input_2", windX_}, {"input_3", hitsX_}, {"input_4", isSeedX_}, {"input_5", nClsSize_}}; + + // prepare tensorflow outputs + std::vector outputs_tf; + // // Define the output and run + // // Run the models + LogDebug("DeepSCGraphEvaluation") << "Run model"; + tensorflow::run(session_, feed_dict, {"cl_class", "wind_class"}, &outputs_tf); + // Reading the 1st output: clustering probability + const auto& y_cl = outputs_tf[0].tensor(); + // Iterate on the clusters for each window + for (size_t b = 0; b < nItems; b++) { + uint ncls = inputs.clustersX[iB * cfg_.batchSize + b].size(); + std::vector cl_output(ncls); + for (size_t iC = 0; iC < ncls; iC++) { + if (iC < cfg_.maxNClusters) { + float y = y_cl(b, iC, 0); + // Applying sigmoid to logit + cl_output[iC] = 1 / (1 + std::exp(-y)); + } else { + // The number of clusters is over the padding max dim + cl_output[iC] = 0; + } + } + outputs_clustering.push_back(cl_output); + } + } + + return outputs_clustering; +} diff --git a/RecoEcal/EgammaCoreTools/src/EcalClustersGraph.cc b/RecoEcal/EgammaCoreTools/src/EcalClustersGraph.cc new file mode 100644 index 0000000000000..a9fa485603dca --- /dev/null +++ b/RecoEcal/EgammaCoreTools/src/EcalClustersGraph.cc @@ -0,0 +1,438 @@ +#include "RecoEcal/EgammaCoreTools/interface/EcalClustersGraph.h" +#include + +using namespace std; +using namespace reco; +using namespace reco::DeepSCInputs; + +typedef std::shared_ptr CalibratedClusterPtr; +typedef std::vector CalibratedClusterPtrVector; + +EcalClustersGraph::EcalClustersGraph(CalibratedClusterPtrVector clusters, + int nSeeds, + const CaloTopology* topology, + const CaloSubdetectorGeometry* ebGeom, + const CaloSubdetectorGeometry* eeGeom, + const EcalRecHitCollection* recHitsEB, + const EcalRecHitCollection* recHitsEE, + const SCProducerCache* cache) + : clusters_(clusters), + nSeeds_(nSeeds), + nCls_(clusters_.size()), + topology_(topology), + ebGeom_(ebGeom), + eeGeom_(eeGeom), + recHitsEB_(recHitsEB), + recHitsEE_(recHitsEE), + scProducerCache_(cache), + graphMap_(clusters.size()) { + // Prepare the batch size of the tensor inputs == number of windows + inputs_.clustersX.resize(nSeeds_); + inputs_.windowX.resize(nSeeds_); + inputs_.hitsX.resize(nSeeds_); + inputs_.isSeed.resize(nSeeds_); + + // Init the graph nodes + for (size_t i = 0; i < nCls_; i++) { + if (i < nSeeds_) + graphMap_.addNode(i, GraphMap::NodeCategory::kSeed); + else + graphMap_.addNode(i, GraphMap::NodeCategory::kNode); + } + + // Select the collection strategy from the config + if (scProducerCache_->config.collectionStrategy == "Cascade") { + strategy_ = GraphMap::CollectionStrategy::Cascade; + } else if (scProducerCache_->config.collectionStrategy == "CollectAndMerge") { + strategy_ = GraphMap::CollectionStrategy::CollectAndMerge; + } else if (scProducerCache_->config.collectionStrategy == "SeedsFirst") { + strategy_ = GraphMap::CollectionStrategy::SeedsFirst; + } else if (scProducerCache_->config.collectionStrategy == "CascadeHighest") { + strategy_ = GraphMap::CollectionStrategy::CascadeHighest; + } else { + edm::LogWarning("EcalClustersGraph") << "GraphMap::CollectionStrategy not recognized. Default to Cascade"; + strategy_ = GraphMap::CollectionStrategy::Cascade; + } + + LogTrace("EcalClustersGraph") << "EcalClustersGraph created. nSeeds " << nSeeds_ << ", nClusters " << nCls_ << endl; +} + +std::array EcalClustersGraph::clusterPosition(const CaloCluster* cluster) const { + std::array coordinates; + int ieta = -999; + int iphi = -999; + int iz = -99; + + const math::XYZPoint& caloPos = cluster->position(); + if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) { + EBDetId eb_id(ebGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z()))); + ieta = eb_id.ieta(); + iphi = eb_id.iphi(); + iz = 0; + } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) { + EEDetId ee_id(eeGeom_->getClosestCell(GlobalPoint(caloPos.x(), caloPos.y(), caloPos.z()))); + ieta = ee_id.ix(); + iphi = ee_id.iy(); + if (ee_id.zside() < 0) + iz = -1; + if (ee_id.zside() > 0) + iz = 1; + } + + coordinates[0] = ieta; + coordinates[1] = iphi; + coordinates[2] = iz; + return coordinates; +} + +std::array EcalClustersGraph::dynamicWindow(double seedEta) const { + // The dEta-dPhi detector window dimension is chosen to that the algorithm is always larger than + // the Mustache dimension + std::array window; + + double eta = std::abs(seedEta); + double deta_down = 0.; + double deta_up = 0.; + double dphi = 0.; + + //deta_down + constexpr float deta_down_bins[2] = {2.1, 2.5}; + if (eta < deta_down_bins[0]) + deta_down = -0.075; + else if (eta >= deta_down_bins[0] && eta < deta_down_bins[1]) + deta_down = -0.1875 * eta + 0.31875; + else if (eta >= deta_down_bins[1]) + deta_down = -0.15; + + //deta_up + constexpr float deta_up_bins[4] = {0.1, 1.3, 1.7, 1.9}; + if (eta < deta_up_bins[0]) + deta_up = 0.075; + else if (eta >= deta_up_bins[0] && eta < deta_up_bins[1]) + deta_up = 0.0758929 - 0.0178571 * eta + 0.0892857 * (eta * eta); + else if (eta >= deta_up_bins[1] && eta < deta_up_bins[2]) + deta_up = 0.2; + else if (eta >= deta_up_bins[2] && eta < deta_up_bins[3]) + deta_up = 0.625 - 0.25 * eta; + else if (eta >= deta_up_bins[3]) + deta_up = 0.15; + + //dphi + constexpr float dphi_bins[2] = {1.9, 2.7}; + if (eta < dphi_bins[0]) + dphi = 0.6; + else if (eta >= dphi_bins[0] && eta < dphi_bins[1]) + dphi = 1.075 - 0.25 * eta; + else if (eta >= dphi_bins[1]) + dphi = 0.4; + + window[0] = deta_down; + window[1] = deta_up; + window[2] = dphi; + + return window; +} + +void EcalClustersGraph::initWindows() { + for (uint is = 0; is < nSeeds_; is++) { + const auto& seedLocal = clusterPosition((*clusters_[is]).ptr().get()); + double seed_eta = clusters_[is]->eta(); + double seed_phi = clusters_[is]->phi(); + const auto& width = dynamicWindow(seed_eta); + // Add a self loop on the seed node + graphMap_.addEdge(is, is); + + // The graph associated to each seed includes only other seeds if they have a smaller energy. + // This is imposed to be consistent with the current trained model, which has been training on "non-overalapping windows". + // The effect of connecting all the seeds, and not only the smaller energy ones has been already tested: the reconstruction + // differences are negligible (tested with Cascade collection algo). + // In the next version of the training this requirement will be relaxed to have a model that fully matches the reconstruction + // mechanism in terms of overlapping seeds. + for (uint icl = is + 1; icl < nCls_; icl++) { + if (is == icl) + continue; + const auto& clusterLocal = clusterPosition((*clusters_[icl]).ptr().get()); + double cl_eta = clusters_[icl]->eta(); + double cl_phi = clusters_[icl]->phi(); + double dphi = deltaPhi(seed_phi, cl_phi); + double deta = deltaEta(seed_eta, cl_eta); + + if (seedLocal[2] == clusterLocal[2] && deta >= width[0] && deta <= width[1] && std::abs(dphi) <= width[2]) { + graphMap_.addEdge(is, icl); + } + } + } +} + +std::vector> EcalClustersGraph::fillHits(const CaloCluster* cluster) const { + const std::vector>& hitsAndFractions = cluster->hitsAndFractions(); + std::vector> out(hitsAndFractions.size()); + if (hitsAndFractions.empty()) { + edm::LogError("EcalClustersGraph") << "No hits in cluster!!"; + } + // Map containing the available features for the rechits + DeepSCInputs::FeaturesMap rechitsFeatures; + for (unsigned int i = 0; i < hitsAndFractions.size(); i++) { + rechitsFeatures.clear(); + if (hitsAndFractions[i].first.subdetId() == EcalBarrel) { + double energy = (*recHitsEB_->find(hitsAndFractions[i].first)).energy(); + EBDetId eb_id(hitsAndFractions[i].first); + rechitsFeatures["ieta"] = eb_id.ieta(); //ieta + rechitsFeatures["iphi"] = eb_id.iphi(); //iphi + rechitsFeatures["iz"] = 0.; //iz + rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second; //energy * fraction + } else if (hitsAndFractions[i].first.subdetId() == EcalEndcap) { + double energy = (*recHitsEE_->find(hitsAndFractions[i].first)).energy(); + EEDetId ee_id(hitsAndFractions[i].first); + rechitsFeatures["ieta"] = ee_id.ix(); //ix + rechitsFeatures["iphi"] = ee_id.iy(); //iy + if (ee_id.zside() < 0) + rechitsFeatures["iz"] = -1.; //iz + if (ee_id.zside() > 0) + rechitsFeatures["iz"] = +1.; //iz + rechitsFeatures["en_withfrac"] = energy * hitsAndFractions[i].second; //energy * fraction + } else { + edm::LogError("EcalClustersGraph") << "Rechit is not either EB or EE!!"; + } + // Use the method in DeepSCGraphEvaluation to get only the requested variables and possible a rescaling + // (depends on configuration) + out[i] = scProducerCache_->deepSCEvaluator->getScaledInputs(rechitsFeatures, + scProducerCache_->deepSCEvaluator->inputFeaturesHits); + } + return out; +} + +DeepSCInputs::FeaturesMap EcalClustersGraph::computeVariables(const CaloCluster* seed, + const CaloCluster* cluster) const { + DeepSCInputs::FeaturesMap clFeatures; + const auto& clusterLocal = clusterPosition(cluster); + double cl_energy = cluster->energy(); + double cl_eta = cluster->eta(); + double cl_phi = cluster->phi(); + double seed_energy = seed->energy(); + double seed_eta = seed->eta(); + double seed_phi = seed->phi(); + clFeatures["cl_energy"] = cl_energy; //cl_energy + clFeatures["cl_et"] = cl_energy / std::cosh(cl_eta); //cl_et + clFeatures["cl_eta"] = cl_eta; //cl_eta + clFeatures["cl_phi"] = cl_phi; //cl_phi + clFeatures["cl_ieta"] = clusterLocal[0]; //cl_ieta/ix + clFeatures["cl_iphi"] = clusterLocal[1]; //cl_iphi/iy + clFeatures["cl_iz"] = clusterLocal[2]; //cl_iz + clFeatures["cl_seed_dEta"] = deltaEta(seed_eta, cl_eta); //cl_dEta + clFeatures["cl_seed_dPhi"] = deltaPhi(seed_phi, cl_phi); //cl_dPhi + clFeatures["cl_seed_dEnergy"] = seed_energy - cl_energy; //cl_dEnergy + clFeatures["cl_seed_dEt"] = (seed_energy / std::cosh(seed_eta)) - (cl_energy / std::cosh(cl_eta)); //cl_dEt + clFeatures["cl_nxtals"] = cluster->hitsAndFractions().size(); // nxtals + return clFeatures; +} + +DeepSCInputs::FeaturesMap EcalClustersGraph::computeWindowVariables( + const std::vector& clusters) const { + size_t nCls = clusters.size(); + std::map min; + std::map max; + std::map avg; + for (const auto& clFeatures : clusters) { + for (auto const& [key, val] : clFeatures) { + avg[key] += (val / nCls); + if (val < min[key]) + min[key] = val; + if (val > max[key]) + max[key] = val; + } + } + DeepSCInputs::FeaturesMap windFeatures; + for (auto const& el : clusters.front()) { + windFeatures["max_" + el.first] = max[el.first]; + windFeatures["min_" + el.first] = min[el.first]; + windFeatures["avg_" + el.first] = avg[el.first]; + } + return windFeatures; +} + +std::pair EcalClustersGraph::computeCovariances(const CaloCluster* cluster) { + double numeratorEtaWidth = 0; + double numeratorPhiWidth = 0; + double denominator = cluster->energy(); + double clEta = cluster->position().eta(); + double clPhi = cluster->position().phi(); + std::shared_ptr this_cell; + EcalRecHitCollection::const_iterator rHit; + + const std::vector>& detId = cluster->hitsAndFractions(); + // Loop over recHits associated with the given SuperCluster + for (const auto& hit : detId) { + if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) { + rHit = recHitsEB_->find(hit.first); + if (rHit == recHitsEB_->end()) { + continue; + } + this_cell = ebGeom_->getGeometry(rHit->id()); + } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) { + rHit = recHitsEE_->find(hit.first); + if (rHit == recHitsEE_->end()) { + continue; + } + this_cell = eeGeom_->getGeometry(rHit->id()); + } else { + continue; + } + + GlobalPoint position = this_cell->getPosition(); + //take into account energy fractions + double energyHit = rHit->energy() * hit.second; + //form differences + double dPhi = deltaPhi(position.phi(), clPhi); + double dEta = position.eta() - clEta; + if (energyHit > 0) { + numeratorEtaWidth += energyHit * dEta * dEta; + numeratorPhiWidth += energyHit * dPhi * dPhi; + } + } + double etaWidth = sqrt(numeratorEtaWidth / denominator); + double phiWidth = sqrt(numeratorPhiWidth / denominator); + + return std::make_pair(etaWidth, phiWidth); +} + +std::vector EcalClustersGraph::computeShowerShapes(const CaloCluster* cluster, bool full5x5 = false) { + std::vector showerVars_; + showerVars_.resize(8); + widths_ = computeCovariances(cluster); + float e1 = 1.; + float e4 = 0.; + float r9 = 0.; + + if (full5x5) { + if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) { + locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_); + e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEB_); + e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) + + noZS::EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) + + noZS::EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) + + noZS::EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_); + r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy(); //r9 + + } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) { + locCov_ = noZS::EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_); + e1 = noZS::EcalClusterTools::eMax(*cluster, recHitsEE_); + e4 = noZS::EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) + + noZS::EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) + + noZS::EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) + + noZS::EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_); + r9 = noZS::EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy(); //r9 + } + } else { + if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_BARREL) { + locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEB_, topology_); + e1 = EcalClusterTools::eMax(*cluster, recHitsEB_); + e4 = EcalClusterTools::eTop(*cluster, recHitsEB_, topology_) + + EcalClusterTools::eRight(*cluster, recHitsEB_, topology_) + + EcalClusterTools::eBottom(*cluster, recHitsEB_, topology_) + + EcalClusterTools::eLeft(*cluster, recHitsEB_, topology_); + r9 = EcalClusterTools::e3x3(*cluster, recHitsEB_, topology_) / cluster->energy(); //r9 + } else if (PFLayer::fromCaloID(cluster->caloID()) == PFLayer::ECAL_ENDCAP) { + locCov_ = EcalClusterTools::localCovariances(*cluster, recHitsEE_, topology_); + e1 = EcalClusterTools::eMax(*cluster, recHitsEE_); + e4 = EcalClusterTools::eTop(*cluster, recHitsEE_, topology_) + + EcalClusterTools::eRight(*cluster, recHitsEE_, topology_) + + EcalClusterTools::eBottom(*cluster, recHitsEE_, topology_) + + EcalClusterTools::eLeft(*cluster, recHitsEE_, topology_); + r9 = EcalClusterTools::e3x3(*cluster, recHitsEE_, topology_) / cluster->energy(); + } + } + showerVars_[0] = r9; + showerVars_[1] = sqrt(locCov_[0]); //sigmaietaieta + showerVars_[2] = locCov_[1]; //sigmaietaiphi + showerVars_[3] = (!edm::isFinite(locCov_[2])) ? 0. : sqrt(locCov_[2]); //sigmaiphiiphi + showerVars_[4] = (e1 != 0.) ? 1. - e4 / e1 : -999.; //swiss_cross + showerVars_[5] = cluster->hitsAndFractions().size(); //nXtals + showerVars_[6] = widths_.first; //etaWidth + showerVars_[7] = widths_.second; //phiWidth + + return showerVars_; +} + +void EcalClustersGraph::fillVariables() { + LogDebug("EcalClustersGraph") << "Fill tensorflow input vector"; + const auto& deepSCEval = scProducerCache_->deepSCEvaluator; + // Reserving the batch dimension + inputs_.clustersX.reserve(nSeeds_); + inputs_.hitsX.reserve(nSeeds_); + inputs_.windowX.reserve(nSeeds_); + inputs_.isSeed.reserve(nSeeds_); + + // Looping on all the seeds (window) + for (uint is = 0; is < nSeeds_; is++) { + const auto seedPointer = (*clusters_[is]).ptr().get(); + std::vector unscaledClusterFeatures; + const auto& outEdges = graphMap_.getOutEdges(is); + size_t ncls = outEdges.size(); + // Reserve the vector size + inputs_.clustersX[is].reserve(ncls); + inputs_.hitsX[is].reserve(ncls); + inputs_.isSeed[is].reserve(ncls); + unscaledClusterFeatures.reserve(ncls); + // Loop on all the clusters + for (const auto ic : outEdges) { + LogTrace("EcalClustersGraph") << "seed: " << is << ", out edge --> " << ic; + const auto clPointer = (*clusters_[ic]).ptr().get(); + const auto& clusterFeatures = computeVariables(seedPointer, clPointer); + for (const auto& [key, val] : clusterFeatures) { + LogTrace("EcalCluster") << key << "=" << val; + } + unscaledClusterFeatures.push_back(clusterFeatures); + // Select and scale only the requested variables for the tensorflow model input + inputs_.clustersX[is].push_back(deepSCEval->getScaledInputs(clusterFeatures, deepSCEval->inputFeaturesClusters)); + // The scaling and feature selection on hits is performed inside the function for each hit + inputs_.hitsX[is].push_back(fillHits(clPointer)); + inputs_.isSeed[is].push_back(ic == is); + } + // For window we need the unscaled cluster features and then we select them + inputs_.windowX[is] = + deepSCEval->getScaledInputs(computeWindowVariables(unscaledClusterFeatures), deepSCEval->inputFeaturesWindows); + } + LogTrace("EcalClustersGraph") << "N. Windows: " << inputs_.clustersX.size(); +} + +void EcalClustersGraph::evaluateScores() { + // Evaluate model + const auto& scores = scProducerCache_->deepSCEvaluator->evaluate(inputs_); + for (uint i = 0; i < nSeeds_; ++i) { + uint k = 0; + LogTrace("EcalClustersGraph") << "Score) seed: " << i << ":"; + for (auto const& j : graphMap_.getOutEdges(i)) { + // Fill the scores from seed --> node (i --> j) + // Not symmetrically, in order to save multiple values for seeds in other + // seeds windows. + graphMap_.setAdjMatrix(i, j, scores[i][k]); + LogTrace("EcalClustersGraph") << "\t" << i << "-->" << j << ": " << scores[i][k]; + k++; + } + } +} + +void EcalClustersGraph::setThresholds() { + // Simple global threshold for the moment + threshold_ = 0.5; +} + +void EcalClustersGraph::selectClusters() { + // Collect the final superClusters as subgraphs + graphMap_.collectNodes(strategy_, threshold_); +} + +EcalClustersGraph::EcalGraphOutput EcalClustersGraph::getGraphOutput() { + EcalClustersGraph::EcalGraphOutput finalWindows_; + const auto& finalSuperClusters_ = graphMap_.getGraphOutput(); + for (const auto& [is, cls] : finalSuperClusters_) { + CalibratedClusterPtr seed = clusters_[is]; + CalibratedClusterPtrVector clusters_inWindow; + for (const auto& ic : cls) { + clusters_inWindow.push_back(clusters_[ic]); + } + finalWindows_.push_back({seed, clusters_inWindow}); + } + return finalWindows_; +} diff --git a/RecoEcal/EgammaCoreTools/src/GraphMap.cc b/RecoEcal/EgammaCoreTools/src/GraphMap.cc new file mode 100644 index 0000000000000..f7a0dc463eb39 --- /dev/null +++ b/RecoEcal/EgammaCoreTools/src/GraphMap.cc @@ -0,0 +1,305 @@ +#include "RecoEcal/EgammaCoreTools/interface/GraphMap.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" + +#include +#include + +using namespace reco; + +GraphMap::GraphMap(uint nNodes) : nNodes_(nNodes) { + // One entry for node in the edges_ list + edgesIn_.resize(nNodes); + edgesOut_.resize(nNodes); +} + +void GraphMap::addNode(const uint index, const NodeCategory category) { + nodesCategories_[category].push_back(index); + nodesCount_[category] += 1; +} + +void GraphMap::addNodes(const std::vector &indices, const std::vector &categories) { + for (size_t i = 0; i < indices.size(); i++) { + addNode(indices[i], categories[i]); + } +} + +void GraphMap::addEdge(const uint i, const uint j) { + // The first index is the starting node of the outcoming edge. + edgesOut_.at(i).push_back(j); + edgesIn_.at(j).push_back(i); + // Adding a connection in the adjacency matrix only in one direction + adjMatrix_[{i, j}] = 1.; +} + +void GraphMap::setAdjMatrix(const uint i, const uint j, const float score) { adjMatrix_[{i, j}] = score; }; + +void GraphMap::setAdjMatrixSym(const uint i, const uint j, const float score) { + adjMatrix_[{i, j}] = score; + adjMatrix_[{j, i}] = score; +}; + +const std::vector &GraphMap::getOutEdges(const uint i) const { return edgesOut_.at(i); }; + +const std::vector &GraphMap::getInEdges(const uint i) const { return edgesIn_.at(i); }; + +uint GraphMap::getAdjMatrix(const uint i, const uint j) const { return adjMatrix_.at({i, j}); }; + +std::vector GraphMap::getAdjMatrixRow(const uint i) const { + std::vector out; + for (const auto &j : getOutEdges(i)) { + out.push_back(adjMatrix_.at({i, j})); + } + return out; +}; + +std::vector GraphMap::getAdjMatrixCol(const uint j) const { + std::vector out; + for (const auto &i : getInEdges(j)) { + out.push_back(adjMatrix_.at({i, j})); + } + return out; +}; + +//================================================= +// Debugging info +void GraphMap::printGraphMap() { + edm::LogVerbatim("GraphMap") << "OUT edges" << std::endl; + uint seed = 0; + for (const auto &s : edgesOut_) { + edm::LogVerbatim("GraphMap") << "cl: " << seed << " --> "; + for (const auto &e : s) { + edm::LogVerbatim("GraphMap") << e << " (" << adjMatrix_[{seed, e}] << ") "; + } + edm::LogVerbatim("GraphMap") << std::endl; + seed++; + } + edm::LogVerbatim("GraphMap") << std::endl << "IN edges" << std::endl; + seed = 0; + for (const auto &s : edgesIn_) { + edm::LogVerbatim("GraphMap") << "cl: " << seed << " <-- "; + for (const auto &e : s) { + edm::LogVerbatim("GraphMap") << e << " (" << adjMatrix_[{e, seed}] << ") "; + } + edm::LogVerbatim("GraphMap") << std::endl; + seed++; + } + edm::LogVerbatim("GraphMap") << std::endl << "AdjMatrix" << std::endl; + for (const auto &s : nodesCategories_[NodeCategory::kSeed]) { + for (size_t n = 0; n < nNodes_; n++) { + edm::LogVerbatim("GraphMap") << std::setprecision(2) << adjMatrix_[{s, n}] << " "; + } + edm::LogVerbatim("GraphMap") << std::endl; + } +} + +//-------------------------------------------------------------- +// Nodes collection algorithms +void GraphMap::collectNodes(GraphMap::CollectionStrategy strategy, float threshold) { + // Clear any stored graph output + graphOutput_.clear(); + + if (strategy == GraphMap::CollectionStrategy::Cascade) { + // Starting from the highest energy seed, collect all the nodes. + // Other seeds collected by higher energy seeds are ignored + collectCascading(threshold); + } else if (strategy == GraphMap::CollectionStrategy::CollectAndMerge) { + // First, for each simple node (no seed) keep only the edge with the highest score. + // Then collect all the simple nodes around the seeds. + // Edges between the seed are ignored. + // Finally, starting from the first seed (highest pt), look for linked secondary seed + // and if they pass the threshold, merge their noded. + assignHighestScoreEdge(); + const auto &[seedsGraph, simpleNodesMap] = collectSeparately(threshold); + mergeSubGraphs(threshold, seedsGraph, simpleNodesMap); + } else if (strategy == GraphMap::CollectionStrategy::SeedsFirst) { + // Like strategy D, but after solving the edges between the seeds, + // the simple nodes edges are cleaned to keep only the highest score link. + // Then proceed as strategy B. + resolveSuperNodesEdges(threshold); + assignHighestScoreEdge(); + collectCascading(threshold); + } else if (strategy == GraphMap::CollectionStrategy::CascadeHighest) { + // First, for each simple node keep only the edge with the highest score. + // Then proceed as strategy A, from the first seed cascading to the others. + // Secondary seeds that are linked, are absorbed and ignored in the next iteration: + // this implies that nodes connected to these seeds are lost. + assignHighestScoreEdge(); + collectCascading(threshold); + } +} + +//---------------------------------------- +// Implementation of single actions + +void GraphMap::collectCascading(float threshold) { + // Starting from the highest energy seed, collect all the nodes. + // Other seeds collected by higher energy seeds are ignored + const auto &seeds = nodesCategories_[NodeCategory::kSeed]; + // seeds are already included in order + LogDebug("GraphMap") << "Cascading..."; + for (const auto &s : seeds) { + LogTrace("GraphMap") << "seed: " << s; + std::vector collectedNodes; + // Check if the seed if still available + if (adjMatrix_[{s, s}] < threshold) + continue; + // Loop on the out-coming edges + for (const auto &out : edgesOut_[s]) { + // Check the threshold for association + if (adjMatrix_[{s, out}] >= threshold) { + LogTrace("GraphMap") << "\tOut edge: " << s << " --> " << out; + // Save the node + collectedNodes.push_back(out); + // Remove all incoming edges to the selected node + // So that it cannot be taken from other SuperNodes + for (const auto &out_in : edgesIn_[out]) { + // There can be 4 cases: + // 1) out == s, out_in can be an incoming edge from other seed: to be removed + // 2) out == s, out_in==s (self-loop): zeroing will remove the current node from the available ones + // 3) out == r, out_in==s (current link): keep this + // 4) out == r, out_in==r (self-loop on other seeds): remove this making the other seed not accessible + if (out != s && out_in == s) + continue; // No need to remove the edge we are using + adjMatrix_[{out_in, out}] = 0.; + LogTrace("GraphMap") << "\t\t Deleted edge: " << out << " <-- " << out_in; + } + } + } + graphOutput_.push_back({s, collectedNodes}); + } +} + +void GraphMap::assignHighestScoreEdge() { + // First for each simple node (no seed) keep only the highest score link + // Then perform strategy A. + LogTrace("GraphMap") << "Keep only highest score edge"; + for (const auto &cl : nodesCategories_[NodeCategory::kNode]) { + std::pair maxPair{0, 0}; + bool found = false; + for (const auto &seed : edgesIn_[cl]) { + float score = adjMatrix_[{seed, cl}]; + if (score > maxPair.second) { + maxPair = {seed, score}; + found = true; + } + } + if (!found) + continue; + LogTrace("GraphMap") << "cluster: " << cl << " edge from " << maxPair.first; + // Second loop to remove all the edges apart from the max + for (const auto &seed : edgesIn_[cl]) { + if (seed != maxPair.first) { + adjMatrix_[{seed, cl}] = 0.; + } + } + } +} + +std::pair GraphMap::collectSeparately(float threshold) { + // Save a subgraph of only seeds, without self-loops + GraphOutput seedsGraph; + // Collect all the nodes around seeds, but not other seeds + GraphOutputMap simpleNodesGraphMap; + LogDebug("GraphMap") << "Collecting separately each seed..."; + // seeds are already included in order + for (const auto &s : nodesCategories_[NodeCategory::kSeed]) { + LogTrace("GraphMap") << "seed: " << s; + std::vector collectedNodes; + std::vector collectedSeeds; + // Check if the seed if still available + if (adjMatrix_[{s, s}] < threshold) + continue; + // Loop on the out-coming edges + for (const auto &out : edgesOut_[s]) { + // Check if it is another seed + // if out is a seed adjMatrix[self-loop] > 0 + if (out != s && adjMatrix_[{out, out}] > 0) { + // DO NOT CHECK the score of the edge, it will be checked during the merging + collectedSeeds.push_back(out); + // No self-loops are saved in the seed graph output + // Then continue and do not work on this edgeOut + continue; + } + // Check the threshold for association + if (adjMatrix_[{s, out}] >= threshold) { + LogTrace("GraphMap") << "\tOut edge: " << s << " --> " << out << " (" << adjMatrix_[{s, out}] << " )"; + // Save the node + collectedNodes.push_back(out); + // The links of the node to other seeds are not touched + // IF the function is called after assignHighestScoreEdge + // the other links have been already removed. + // IF not: the same node can be assigned to more subgraphs. + // The self-loop is included in this case in order to save the seed node + // in its own sub-graph. + } + } + simpleNodesGraphMap[s] = collectedNodes; + seedsGraph.push_back({s, collectedSeeds}); + } + return std::make_pair(seedsGraph, simpleNodesGraphMap); +} + +void GraphMap::mergeSubGraphs(float threshold, GraphOutput seedsGraph, GraphOutputMap nodesGraphMap) { + // We have the graph between the seed and a map of + // simple nodes connected to each seed separately. + // Now we link them and build superGraphs starting from the first seed + LogTrace("GraphMap") << "Starting merging"; + for (const auto &[s, other_seeds] : seedsGraph) { + LogTrace("GraphMap") << "seed: " << s; + // Check if the seed is still available + if (adjMatrix_[{s, s}] < threshold) + continue; + // If it is, we collect the final list of nodes + std::vector collectedNodes; + // Take the previously connected simple nodes to the current seed one + const auto &simpleNodes = nodesGraphMap[s]; + collectedNodes.insert(std::end(collectedNodes), std::begin(simpleNodes), std::end(simpleNodes)); + // Check connected seeds + for (const auto &out_s : other_seeds) { + // Check the score of the edge for the merging + // and check if the other seed has not been taken already + if (adjMatrix_[{out_s, out_s}] > threshold && adjMatrix_[{s, out_s}] > threshold) { + LogTrace("GraphMap") << "\tMerging nodes from seed: " << out_s; + // Take the nodes already linked to this seed + const auto &otherNodes = nodesGraphMap[out_s]; + // We don't check for duplicates because assignHighestScoreEdge() should + // have been called already + collectedNodes.insert(std::end(collectedNodes), std::begin(otherNodes), std::end(otherNodes)); + // Now let's disable the secondary seed + adjMatrix_[{out_s, out_s}] = 0.; + // This makes the strategy NOT RECURSIVE + // Other seeds linked to the disable seed won't be collected, but analyzed independently. + } + } + // Now remove the current seed from the available ones, + // if not other seeds could take it and we would have a double use of objects. + adjMatrix_[{s, s}] = 0; + graphOutput_.push_back({s, collectedNodes}); + } +} + +void GraphMap::resolveSuperNodesEdges(float threshold) { + LogTrace("GraphMap") << "Resolving seeds"; + for (const auto &s : nodesCategories_[NodeCategory::kSeed]) { + LogTrace("GraphMap") << "seed: " << s; + // Check if the seed if still available + if (adjMatrix_[{s, s}] < threshold) + continue; + // Loop on the out-coming edges + for (const auto &out : edgesOut_[s]) { + if (out != s && adjMatrix_[{out, out}] > 0 && adjMatrix_[{s, out}] > threshold) { + // This is a link to another still available seed + // If the edge score is good the other seed node is not disabled --> remove it + LogTrace("GraphMap") << "\tdisable seed: " << out; + adjMatrix_[{out, out}] = 0.; + // Remove the edges from that seed + // This is needed in order to be able to use the + // assignHighestScoreEdge after this function correctly + for (const auto &c : edgesOut_[out]) { + adjMatrix_[{out, c}] = 0.; + // We don't touch the edgesIn and edgesOut collections but we zero the adjmatrix --> more efficient + } + } + } + } +}