diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaL1SeededSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaL1SeededSequence_cfi.py index b4564dbd4a566..365463a73a7a2 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaL1SeededSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaL1SeededSequence_cfi.py @@ -48,8 +48,8 @@ linkingPSet = cms.PSet( type=cms.string("SuperClusteringDNN"), algo_verbosity=cms.int32(0), - onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v2p1.onnx"), - nnWorkingPoint=cms.double(0.3), + onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v3.onnx"), + nnWorkingPoint=cms.double(0.57247), ), tracksters_collections = [cms.InputTag("hltTiclTrackstersCLUE3DHighL1Seeded")], # to be changed to ticlTrackstersCLUE3DEM once separate CLUE3D iterations are introduced ) diff --git a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaUnseededSequence_cfi.py b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaUnseededSequence_cfi.py index 27be134b941cb..78fe399cbc696 100644 --- a/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaUnseededSequence_cfi.py +++ b/HLTrigger/Configuration/python/HLT_75e33/sequences/HLTHgcalTiclPFClusteringForEgammaUnseededSequence_cfi.py @@ -61,8 +61,8 @@ linkingPSet = cms.PSet( type=cms.string("SuperClusteringDNN"), algo_verbosity=cms.int32(0), - onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v2p1.onnx"), - nnWorkingPoint=cms.double(0.3), + onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v3.onnx"), + nnWorkingPoint=cms.double(0.57247), ), tracksters_collections = [cms.InputTag("hltTiclTrackstersCLUE3DHigh")], # to be changed to ticlTrackstersCLUE3DEM once separate CLUE3D iterations are introduced ) diff --git a/RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h b/RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h index 90ed787066022..bf7bddc762b22 100644 --- a/RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h +++ b/RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h @@ -2,6 +2,10 @@ // Author: Theo Cuisset - theo.cuisset@cern.ch // Date: 11/2023 +// Modified by Gamze Sokmen - gamze.sokmen@cern.ch +// Changes: Implementation of the delta time feature under a new DNN input version (v3) for the superclustering DNN and correcting the seed pT calculation. +// Date: 07/2025 + #ifndef __RecoHGCal_TICL_SuperclusteringDNNInputs_H__ #define __RecoHGCal_TICL_SuperclusteringDNNInputs_H__ @@ -10,6 +14,11 @@ #include namespace ticl { + + // any raw_dt outside +/- kDeltaTimeDefault is considered bad + static constexpr float kDeltaTimeDefault = 50.f; + static constexpr float kBadDeltaTime = -5.f; + class Trackster; // Abstract base class for DNN input preparation. @@ -87,7 +96,39 @@ namespace ticl { } }; + /* Third version of DNN by Gamze Sokmen and Shamik Ghosh, making use of time information as new variables. + Uses features : ['DeltaEta', 'DeltaPhi', 'multi_en', 'multi_eta', 'multi_pt', 'seedEta','seedPhi','seedEn', 'seedPt', theta', 'theta_xz_seedFrame', 'theta_yz_seedFrame', 'theta_xy_cmsFrame', 'theta_yz_cmsFrame', 'theta_xz_cmsFrame', 'explVar', 'explVarRatio', 'mod_deltaTime'] + */ + + class SuperclusteringDNNInputV3 : public AbstractSuperclusteringDNNInput { + public: + unsigned int featureCount() const override { return 18; } + + std::vector computeVector(ticl::Trackster const& ts_base, ticl::Trackster const& ts_toCluster) override; + + std::vector featureNames() const override { + return {"DeltaEtaBaryc", + "DeltaPhiBaryc", + "multi_en", + "multi_eta", + "multi_pt", + "seedEta", + "seedPhi", + "seedEn", + "seedPt", + "theta", + "theta_xz_seedFrame", + "theta_yz_seedFrame", + "theta_xy_cmsFrame", + "theta_yz_cmsFrame", + "theta_xz_cmsFrame", + "explVar", + "explVarRatio", + "mod_deltaTime"}; + } + }; + std::unique_ptr makeSuperclusteringDNNInputFromString(std::string dnnVersion); } // namespace ticl -#endif \ No newline at end of file +#endif diff --git a/RecoHGCal/TICL/plugins/SuperclusteringSampleDumper.cc b/RecoHGCal/TICL/plugins/SuperclusteringSampleDumper.cc index 7208a3c923a86..1600b9f9249f1 100644 --- a/RecoHGCal/TICL/plugins/SuperclusteringSampleDumper.cc +++ b/RecoHGCal/TICL/plugins/SuperclusteringSampleDumper.cc @@ -265,8 +265,8 @@ void SuperclusteringSampleDumper::fillDescriptions(edm::ConfigurationDescription ->setComment("Input trackster collection, same as what is used for superclustering inference."); desc.add("recoToSimAssociatorCP", edm::InputTag("tracksterSimTracksterAssociationLinkingbyCLUE3D", "recoToSim")); - desc.ifValue(edm::ParameterDescription("dnnInputsVersion", "v2", true), - edm::allowedValues("v1", "v2")) + desc.ifValue(edm::ParameterDescription("dnnInputsVersion", "v3", true), + edm::allowedValues("v1", "v2", "v3")) ->setComment( "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN."); // Cuts are intentionally looser than those used for inference in TracksterLinkingBySuperClustering.cpp diff --git a/RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.cc b/RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.cc index 49ad7f0c6b9e3..4bcf0c1200af6 100644 --- a/RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.cc +++ b/RecoHGCal/TICL/plugins/TracksterLinkingbySuperClusteringDNN.cc @@ -1,6 +1,6 @@ /* TICL plugin for electron superclustering in HGCAL using a DNN. -DNN designed by Alessandro Tarabini. +DNN designed by Alessandro Tarabini, Florian Beaudette, Gamze Sokmen, Shamik Ghosh, Theo Cuisset. Inputs are CLUE3D EM tracksters. Outputs are superclusters (as vectors of IDs of trackster) "Seed trackster" : seed of supercluster, always highest pT trackster of supercluster, normally should be an electron @@ -18,6 +18,9 @@ The loop is first on candidate, then on seeds as it is more efficient for step 4 Authors : Theo Cuisset , Shamik Ghosh Date : 11/2023 + +Updates : Logic works as it should and switching to v3 (Shamik) +Date: 07/2025 */ #include @@ -146,7 +149,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( Trackster const& ts_cand = inputTracksters[trackstersIndicesPt[ts_cand_idx_pt]]; if (ts_cand.raw_energy() < candidateEnergyThreshold_ || - !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand) + // !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand) + !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand)) continue; auto& tracksterTiles = tracksterTilesBothEndcaps_pt[ts_cand.barycenter().eta() > 0]; @@ -245,37 +249,37 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( Also mask seeds (only needed to add tracksters not in a supercluster to the output). */ std::vector tracksterMask(tracksterCount, false); - /* Index of the seed trackster of the previous iteration - Initialized with an id that cannot be obtained in input */ + /////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD + unsigned int previousCandTrackster_idx = std::numeric_limits::max(); unsigned int bestSeedForCurrentCandidate_idx = std::numeric_limits::max(); float bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_; - // Lambda to be called when there is a transition from one candidate to the next (as well as after the last iteration) - // Does the actual supercluster creation + // Track which tracksters were ever used as candidates + std::vector usedAsCandidate(tracksterCount, false); + auto onCandidateTransition = [&](unsigned ts_cand_idx) { - if (bestSeedForCurrentCandidate_idx < - std::numeric_limits::max()) { // At least one seed can be superclustered with the candidate - tracksterMask[ts_cand_idx] = true; // Mask the candidate so it is not considered as seed in later iterations - - // Look for a supercluster of the seed - std::vector>::iterator seed_supercluster_it = - std::find_if(outputSuperclusters.begin(), - outputSuperclusters.end(), - [bestSeedForCurrentCandidate_idx](std::vector const& sc) { - return sc[0] == bestSeedForCurrentCandidate_idx; - }); - - if (seed_supercluster_it == outputSuperclusters.end()) { // No supercluster exists yet for the seed. Create one. + if (bestSeedForCurrentCandidate_idx < std::numeric_limits::max()) { + tracksterMask[ts_cand_idx] = true; // Mask the candidate so it’s not reused as a seed + usedAsCandidate[ts_cand_idx] = true; + + // Find the supercluster the seed belongs to (even if it's already used in another supercluster) + // Find existing supercluster for the seed + auto seed_supercluster_it = std::find_if(outputSuperclusters.begin(), + outputSuperclusters.end(), + [bestSeedForCurrentCandidate_idx](const std::vector& sc) { + return sc[0] == bestSeedForCurrentCandidate_idx; + }); + if (seed_supercluster_it == outputSuperclusters.end()) { + // No supercluster exists for this seed, create one outputSuperclusters.emplace_back(std::initializer_list{bestSeedForCurrentCandidate_idx}); resultTracksters.emplace_back(inputTracksters[bestSeedForCurrentCandidate_idx]); linkedTracksterIdToInputTracksterId.emplace_back( std::initializer_list{bestSeedForCurrentCandidate_idx}); seed_supercluster_it = outputSuperclusters.end() - 1; - tracksterMask[bestSeedForCurrentCandidate_idx] = - true; // mask the seed as well (needed to find tracksters not in any supercluster) + tracksterMask[bestSeedForCurrentCandidate_idx] = true; } - // Index of the supercluster into resultTracksters, outputSuperclusters and linkedTracksterIdToInputTracksterId collections (the indices are the same) + unsigned int indexIntoOutputTracksters = seed_supercluster_it - outputSuperclusters.begin(); seed_supercluster_it->push_back(ts_cand_idx); resultTracksters[indexIntoOutputTracksters].mergeTracksters(inputTracksters[ts_cand_idx]); @@ -290,10 +294,10 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( } }; - //Iterate over minibatches + // Iterate over minibatches for (unsigned int batchIndex = 0; batchIndex < batchOutputs.size(); batchIndex++) { - std::vector const& currentBatchOutputs = batchOutputs[batchIndex]; // DNN score outputs - // Iterate over seed-candidate pairs inside current minibatch + std::vector const& currentBatchOutputs = batchOutputs[batchIndex]; + for (unsigned int indexInBatch = 0; indexInBatch < tracksterIndicesUsedInDNN[batchIndex].size(); indexInBatch++) { assert(indexInBatch < static_cast(batchOutputs[batchIndex].size())); @@ -303,21 +307,21 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( if (previousCandTrackster_idx != std::numeric_limits::max() && ts_cand_idx != previousCandTrackster_idx) { - // There is a transition from one seed to the next (don't make a transition for the first iteration) onCandidateTransition(previousCandTrackster_idx); } - if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !tracksterMask[ts_seed_idx]) { - // Check that the DNN suggests superclustering, that this seed-candidate assoc is better than previous ones, and that the seed is not already in a supercluster as candidate + // Ignore seed if it was previously used as a candidate + if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !usedAsCandidate[ts_seed_idx]) { bestSeedForCurrentCandidate_idx = ts_seed_idx; bestSeedForCurrentCandidate_dnnScore = currentDnnScore; } + previousCandTrackster_idx = ts_cand_idx; } } onCandidateTransition(previousCandTrackster_idx); - // Adding one-trackster superclusters for all tracksters not in a supercluster already that pass the seed threshold + // Create singleton superclusters for unused tracksters with enough pt for (unsigned int ts_id = 0; ts_id < tracksterCount; ts_id++) { if (!tracksterMask[ts_id] && inputTracksters[ts_id].raw_pt() >= seedPtThreshold_) { outputSuperclusters.emplace_back(std::initializer_list{ts_id}); @@ -326,6 +330,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( } } + /////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD + #ifdef EDM_ML_DEBUG for (std::vector const& sc : outputSuperclusters) { std::ostringstream s; @@ -340,8 +346,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSetDescription& desc) { TracksterLinkingAlgoBase::fillPSetDescription(desc); // adds algo_verbosity desc.add("onnxModelPath")->setComment("Path to DNN (as ONNX model)"); - desc.ifValue(edm::ParameterDescription("dnnInputsVersion", "v2", true), - edm::allowedValues("v1", "v2")) + desc.ifValue(edm::ParameterDescription("dnnInputsVersion", "v3", true), + edm::allowedValues("v1", "v2", "v3")) ->setComment( "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN."); desc.add("inferenceBatchSize", 1e5) @@ -379,4 +385,4 @@ void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSet {static_cast(Trackster::ParticleType::photon), static_cast(Trackster::ParticleType::electron)}) ->setComment("List of PID particle types (ticl::Trackster::ParticleType enum) to consider for PID filtering"); desc.add("PIDThreshold", 0.8)->setComment("PID score threshold"); -} +} \ No newline at end of file diff --git a/RecoHGCal/TICL/python/superclustering_cff.py b/RecoHGCal/TICL/python/superclustering_cff.py index 6f561223fd808..58d13a05a30f7 100644 --- a/RecoHGCal/TICL/python/superclustering_cff.py +++ b/RecoHGCal/TICL/python/superclustering_cff.py @@ -13,8 +13,8 @@ linkingPSet = cms.PSet( type=cms.string("SuperClusteringDNN"), algo_verbosity=cms.int32(0), - onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v2p1.onnx"), - nnWorkingPoint=cms.double(0.3), + onnxModelPath = cms.FileInPath("RecoHGCal/TICL/data/superclustering/supercls_v3.onnx"), + nnWorkingPoint=cms.double(0.57247), ), tracksters_collections = [cms.InputTag("ticlTrackstersCLUE3DHigh")], # to be changed to ticlTrackstersCLUE3DEM once separate CLUE3D iterations are introduced ) diff --git a/RecoHGCal/TICL/src/SuperclusteringDNNInputs.cc b/RecoHGCal/TICL/src/SuperclusteringDNNInputs.cc index be58724e8f5c9..560e0106ef554 100644 --- a/RecoHGCal/TICL/src/SuperclusteringDNNInputs.cc +++ b/RecoHGCal/TICL/src/SuperclusteringDNNInputs.cc @@ -1,6 +1,11 @@ /** Computation of input features for superclustering DNN. Used by plugins/TracksterLinkingBySuperClustering.cc and plugins/SuperclusteringSampleDumper.cc */ // Author: Theo Cuisset - theo.cuisset@cern.ch // Date: 11/2023 + +// Modified by Gamze Sokmen - gamze.sokmen@cern.ch +// Changes: Implementation of the delta time feature under a new DNN input version (v3) for the superclustering DNN and correcting the seed pT calculation. +// Date: 07/2025 + #include "RecoHGCal/TICL/interface/SuperclusteringDNNInputs.h" #include @@ -27,11 +32,11 @@ namespace ticl { ts_toCluster.barycenter().Phi() - ts_base.barycenter().phi(), //DeltaPhiBaryc ts_toCluster.raw_energy(), //multi_en ts_toCluster.barycenter().Eta(), //multi_eta - (ts_toCluster.raw_energy() * std::sin(ts_toCluster.barycenter().Theta())), //multi_pt + ts_toCluster.raw_pt(), //multi_pt ts_base.barycenter().Eta(), //seedEta ts_base.barycenter().Phi(), //seedPhi ts_base.raw_energy(), //seedEn - (ts_base.raw_energy() * std::sin(ts_toCluster.barycenter().Theta())), //seedPt + ts_base.raw_pt(), //seedPt }}; } @@ -86,11 +91,11 @@ namespace ticl { ts_toCluster.barycenter().Phi() - ts_base.barycenter().phi(), //DeltaPhiBaryc ts_toCluster.raw_energy(), //multi_en ts_toCluster.barycenter().Eta(), //multi_eta - (ts_toCluster.raw_energy() * std::sin(ts_toCluster.barycenter().Theta())), //multi_pt + ts_toCluster.raw_pt(), //multi_pt ts_base.barycenter().Eta(), //seedEta ts_base.barycenter().Phi(), //seedPhi ts_base.raw_energy(), //seedEn - (ts_base.raw_energy() * std::sin(ts_toCluster.barycenter().Theta())), //seedPt + ts_base.raw_pt(), //seedPt static_cast(Angle(pca_cand_cmsFrame, pca_seed_cmsFrame)), // theta : angle between seed and candidate Angle2D(XYVectorF(pca_cand_seedFrame.x(), pca_cand_seedFrame.z()), XYVectorF(0, 1)), // theta_xz_seedFrame Angle2D(XYVectorF(pca_cand_seedFrame.y(), pca_cand_seedFrame.z()), XYVectorF(0, 1)), // theta_yz_seedFrame @@ -105,12 +110,66 @@ namespace ticl { }}; } + std::vector SuperclusteringDNNInputV3::computeVector(Trackster const& ts_base, Trackster const& ts_toCluster) { + using ROOT::Math::XYVectorF; + using ROOT::Math::XYZVectorF; + using ROOT::Math::VectorUtil::Angle; + XYZVectorF const& pca_seed_cmsFrame(ts_base.eigenvectors(0)); + XYZVectorF const& pca_cand_cmsFrame(ts_toCluster.eigenvectors(0)); + XYZVectorF xs(pca_seed_cmsFrame.Cross(XYZVectorF(0, 0, 1)).Unit()); + ROOT::Math::Rotation3D rot(xs, xs.Cross(pca_seed_cmsFrame).Unit(), pca_seed_cmsFrame); + + XYZVectorF pca_cand_seedFrame = rot(pca_cand_cmsFrame); // seed coordinates + + float explVar_denominator = std::accumulate( + std::begin(ts_toCluster.eigenvalues()), std::end(ts_toCluster.eigenvalues()), 0.f, std::plus()); + float explVarRatio = 0.f; + if (explVar_denominator != 0.f) { + explVarRatio = ts_toCluster.eigenvalues()[0] / explVar_denominator; + } else { + edm::LogWarning("HGCalTICLSuperclustering") + << "Sum of eigenvalues was zero for trackster. Could not compute explained variance ratio."; + } + + // modified deltaTime: set the default values <-50 or >50 to -5 + float raw_dt = ts_toCluster.time() - ts_base.time(); + float mod_deltaTime = (raw_dt < -kDeltaTimeDefault || raw_dt > kDeltaTimeDefault) ? kBadDeltaTime : raw_dt; + + return {{ + std::abs(ts_toCluster.barycenter().Eta()) - std::abs(ts_base.barycenter().Eta()), // DeltaEtaBaryc + ts_toCluster.barycenter().Phi() - ts_base.barycenter().phi(), // DeltaPhiBaryc + ts_toCluster.raw_energy(), // multi_en + ts_toCluster.barycenter().Eta(), // multi_eta + ts_toCluster.raw_pt(), // multi_pt + ts_base.barycenter().Eta(), // seedEta + ts_base.barycenter().Phi(), // seedPhi + ts_base.raw_energy(), // seedEn + ts_base.raw_pt(), // seedPt + static_cast(Angle(pca_cand_cmsFrame, pca_seed_cmsFrame)), // theta + Angle2D(XYVectorF(pca_cand_seedFrame.x(), pca_cand_seedFrame.z()), // theta_xz_seedFrame + XYVectorF(0, 1)), + Angle2D(XYVectorF(pca_cand_seedFrame.y(), pca_cand_seedFrame.z()), // theta_yz_seedFrame + XYVectorF(0, 1)), + Angle2D(XYVectorF(pca_cand_cmsFrame.x(), pca_cand_cmsFrame.y()), // theta_xy_cmsFrame + XYVectorF(pca_seed_cmsFrame.x(), pca_seed_cmsFrame.y())), + Angle2D(XYVectorF(pca_cand_cmsFrame.y(), pca_cand_cmsFrame.z()), // theta_yz_cmsFrame + XYVectorF(pca_seed_cmsFrame.y(), pca_seed_cmsFrame.z())), + Angle2D(XYVectorF(pca_cand_cmsFrame.x(), pca_cand_cmsFrame.z()), // theta_xz_cmsFrame + XYVectorF(pca_seed_cmsFrame.x(), pca_seed_cmsFrame.z())), + ts_toCluster.eigenvalues()[0], // explVar + explVarRatio, // explVarRatio + mod_deltaTime // mod_deltaTime + }}; + } + std::unique_ptr makeSuperclusteringDNNInputFromString(std::string dnnInputVersion) { if (dnnInputVersion == "v1") return std::make_unique(); else if (dnnInputVersion == "v2") return std::make_unique(); + else if (dnnInputVersion == "v3") + return std::make_unique(); assert(false); } -} // namespace ticl \ No newline at end of file +} // namespace ticl