-
Notifications
You must be signed in to change notification settings - Fork 4.6k
Updated SuperclusterDNN v3 for superclustering in TICLv5 #48811
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
ad12e52
e48b573
cb9937e
65e32db
6307d9d
1526f0d
fa854df
75a461b
f70dfb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -151,7 +151,8 @@ void SuperclusteringSampleDumper::analyze(const edm::Event& evt, const edm::Even | |
| std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0); | ||
| std::stable_sort( | ||
| trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) { | ||
| return (*inputTracksters)[i1].raw_pt() > (*inputTracksters)[i2].raw_pt(); | ||
| return (*inputTracksters)[i1].raw_energy() * std::sin((*inputTracksters)[i1].barycenter().Theta()) > | ||
| (*inputTracksters)[i2].raw_energy() * std::sin((*inputTracksters)[i2].barycenter().Theta()); | ||
|
||
| }); | ||
|
|
||
| // Order of loops are reversed compared to SuperclusteringProducer (here outer is seed, inner is candidate), for performance reasons. | ||
|
|
@@ -162,7 +163,7 @@ void SuperclusteringSampleDumper::analyze(const edm::Event& evt, const edm::Even | |
| trackstersIndicesPt[ts_seed_idx_pt]; // Index of seed trackster in input collection (not in pT sorted collection) | ||
| Trackster const& ts_seed = (*inputTracksters)[ts_seed_idx_input]; | ||
|
|
||
| if (ts_seed.raw_pt() < seedPtThreshold_) | ||
| if (ts_seed.raw_energy() * std::sin(ts_seed.barycenter().Theta()) < seedPtThreshold_) | ||
|
||
| break; // All further seeds will have lower pT than threshold (due to pT sorting) | ||
|
|
||
| if (!checkExplainedVarianceRatioCut(ts_seed)) | ||
|
|
@@ -265,8 +266,8 @@ void SuperclusteringSampleDumper::fillDescriptions(edm::ConfigurationDescription | |
| ->setComment("Input trackster collection, same as what is used for superclustering inference."); | ||
| desc.add<edm::InputTag>("recoToSimAssociatorCP", | ||
| edm::InputTag("tracksterSimTracksterAssociationLinkingbyCLUE3D", "recoToSim")); | ||
| desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true), | ||
| edm::allowedValues<std::string>("v1", "v2")) | ||
| desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v3", true), | ||
| edm::allowedValues<std::string>("v1", "v2", "v3")) | ||
| ->setComment( | ||
| "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN."); | ||
| // Cuts are intentionally looser than those used for inference in TracksterLinkingBySuperClustering.cpp | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| /* | ||
| TICL plugin for electron superclustering in HGCAL using a DNN. | ||
| DNN designed by Alessandro Tarabini. | ||
| DNN designed by Alessandro Tarabini, Florian Beaudette, Gamze Sokmen, Shamik Ghosh, Theo Cuisset. | ||
|
|
||
| Inputs are CLUE3D EM tracksters. Outputs are superclusters (as vectors of IDs of trackster) | ||
| "Seed trackster" : seed of supercluster, always highest pT trackster of supercluster, normally should be an electron | ||
|
|
@@ -18,6 +18,9 @@ The loop is first on candidate, then on seeds as it is more efficient for step 4 | |
|
|
||
| Authors : Theo Cuisset <theo.cuisset@cern.ch>, Shamik Ghosh <shamik.ghosh@cern.ch> | ||
| Date : 11/2023 | ||
|
|
||
| Updates : Logic works as it should and switching to v3 (Shamik) | ||
| Date: 07/2025 | ||
| */ | ||
|
|
||
| #include <string> | ||
|
|
@@ -116,7 +119,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
| std::iota(trackstersIndicesPt.begin(), trackstersIndicesPt.end(), 0); | ||
| std::stable_sort( | ||
| trackstersIndicesPt.begin(), trackstersIndicesPt.end(), [&inputTracksters](unsigned int i1, unsigned int i2) { | ||
| return inputTracksters[i1].raw_pt() > inputTracksters[i2].raw_pt(); | ||
| return inputTracksters[i1].raw_energy() * std::sin(inputTracksters[i1].barycenter().Theta()) > | ||
| inputTracksters[i2].raw_energy() * std::sin(inputTracksters[i2].barycenter().Theta()); | ||
|
||
| }); | ||
|
|
||
| /* Evaluate in minibatches since running with trackster count = 3000 leads to a short-lived ~15GB memory allocation | ||
|
|
@@ -146,7 +150,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
| Trackster const& ts_cand = inputTracksters[trackstersIndicesPt[ts_cand_idx_pt]]; | ||
|
|
||
| if (ts_cand.raw_energy() < candidateEnergyThreshold_ || | ||
| !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand) | ||
| // !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand) | ||
| !checkExplainedVarianceRatioCut(ts_cand)) // || !trackstersPassesPIDCut(ts_cand)) | ||
| continue; | ||
|
|
||
| auto& tracksterTiles = tracksterTilesBothEndcaps_pt[ts_cand.barycenter().eta() > 0]; | ||
|
|
@@ -164,7 +169,7 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
|
|
||
| Trackster const& ts_seed = inputTracksters[trackstersIndicesPt[ts_seed_idx_pt]]; | ||
|
|
||
| if (ts_seed.raw_pt() < seedPtThreshold_) | ||
| if (ts_seed.raw_energy() * std::sin(ts_seed.barycenter().Theta()) < seedPtThreshold_) | ||
|
||
| break; // All further seeds will have lower pT than threshold (due to pT sorting) | ||
|
|
||
| if (!checkExplainedVarianceRatioCut(ts_seed) || !trackstersPassesPIDCut(ts_seed)) | ||
|
|
@@ -245,37 +250,37 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
| Also mask seeds (only needed to add tracksters not in a supercluster to the output). */ | ||
| std::vector<bool> tracksterMask(tracksterCount, false); | ||
|
|
||
| /* Index of the seed trackster of the previous iteration | ||
| Initialized with an id that cannot be obtained in input */ | ||
| /////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD | ||
|
|
||
| unsigned int previousCandTrackster_idx = std::numeric_limits<unsigned int>::max(); | ||
| unsigned int bestSeedForCurrentCandidate_idx = std::numeric_limits<unsigned int>::max(); | ||
| float bestSeedForCurrentCandidate_dnnScore = nnWorkingPoint_; | ||
|
|
||
| // Lambda to be called when there is a transition from one candidate to the next (as well as after the last iteration) | ||
| // Does the actual supercluster creation | ||
| // Track which tracksters were ever used as candidates | ||
| std::vector<bool> usedAsCandidate(tracksterCount, false); | ||
|
|
||
| auto onCandidateTransition = [&](unsigned ts_cand_idx) { | ||
| if (bestSeedForCurrentCandidate_idx < | ||
| std::numeric_limits<unsigned int>::max()) { // At least one seed can be superclustered with the candidate | ||
| tracksterMask[ts_cand_idx] = true; // Mask the candidate so it is not considered as seed in later iterations | ||
|
|
||
| // Look for a supercluster of the seed | ||
| std::vector<std::vector<unsigned int>>::iterator seed_supercluster_it = | ||
| std::find_if(outputSuperclusters.begin(), | ||
| outputSuperclusters.end(), | ||
| [bestSeedForCurrentCandidate_idx](std::vector<unsigned int> const& sc) { | ||
| return sc[0] == bestSeedForCurrentCandidate_idx; | ||
| }); | ||
|
|
||
| if (seed_supercluster_it == outputSuperclusters.end()) { // No supercluster exists yet for the seed. Create one. | ||
| if (bestSeedForCurrentCandidate_idx < std::numeric_limits<unsigned int>::max()) { | ||
| tracksterMask[ts_cand_idx] = true; // Mask the candidate so it’s not reused as a seed | ||
| usedAsCandidate[ts_cand_idx] = true; | ||
|
|
||
| // Find the supercluster the seed belongs to (even if it's already used in another supercluster) | ||
| // Find existing supercluster for the seed | ||
| auto seed_supercluster_it = std::find_if(outputSuperclusters.begin(), | ||
| outputSuperclusters.end(), | ||
| [bestSeedForCurrentCandidate_idx](const std::vector<unsigned int>& sc) { | ||
| return sc[0] == bestSeedForCurrentCandidate_idx; | ||
| }); | ||
| if (seed_supercluster_it == outputSuperclusters.end()) { | ||
| // No supercluster exists for this seed, create one | ||
| outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx}); | ||
| resultTracksters.emplace_back(inputTracksters[bestSeedForCurrentCandidate_idx]); | ||
| linkedTracksterIdToInputTracksterId.emplace_back( | ||
| std::initializer_list<unsigned int>{bestSeedForCurrentCandidate_idx}); | ||
| seed_supercluster_it = outputSuperclusters.end() - 1; | ||
| tracksterMask[bestSeedForCurrentCandidate_idx] = | ||
| true; // mask the seed as well (needed to find tracksters not in any supercluster) | ||
| tracksterMask[bestSeedForCurrentCandidate_idx] = true; | ||
| } | ||
| // Index of the supercluster into resultTracksters, outputSuperclusters and linkedTracksterIdToInputTracksterId collections (the indices are the same) | ||
|
|
||
| unsigned int indexIntoOutputTracksters = seed_supercluster_it - outputSuperclusters.begin(); | ||
| seed_supercluster_it->push_back(ts_cand_idx); | ||
| resultTracksters[indexIntoOutputTracksters].mergeTracksters(inputTracksters[ts_cand_idx]); | ||
|
|
@@ -290,10 +295,10 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
| } | ||
| }; | ||
|
|
||
| //Iterate over minibatches | ||
| // Iterate over minibatches | ||
| for (unsigned int batchIndex = 0; batchIndex < batchOutputs.size(); batchIndex++) { | ||
| std::vector<float> const& currentBatchOutputs = batchOutputs[batchIndex]; // DNN score outputs | ||
| // Iterate over seed-candidate pairs inside current minibatch | ||
| std::vector<float> const& currentBatchOutputs = batchOutputs[batchIndex]; | ||
|
|
||
| for (unsigned int indexInBatch = 0; indexInBatch < tracksterIndicesUsedInDNN[batchIndex].size(); indexInBatch++) { | ||
| assert(indexInBatch < static_cast<unsigned int>(batchOutputs[batchIndex].size())); | ||
|
|
||
|
|
@@ -303,29 +308,33 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
|
|
||
| if (previousCandTrackster_idx != std::numeric_limits<unsigned int>::max() && | ||
| ts_cand_idx != previousCandTrackster_idx) { | ||
| // There is a transition from one seed to the next (don't make a transition for the first iteration) | ||
| onCandidateTransition(previousCandTrackster_idx); | ||
| } | ||
|
|
||
| if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !tracksterMask[ts_seed_idx]) { | ||
| // Check that the DNN suggests superclustering, that this seed-candidate assoc is better than previous ones, and that the seed is not already in a supercluster as candidate | ||
| // Ignore seed if it was previously used as a candidate | ||
| if (currentDnnScore > bestSeedForCurrentCandidate_dnnScore && !usedAsCandidate[ts_seed_idx]) { | ||
| bestSeedForCurrentCandidate_idx = ts_seed_idx; | ||
| bestSeedForCurrentCandidate_dnnScore = currentDnnScore; | ||
| } | ||
|
|
||
| previousCandTrackster_idx = ts_cand_idx; | ||
| } | ||
| } | ||
| onCandidateTransition(previousCandTrackster_idx); | ||
|
|
||
| // Adding one-trackster superclusters for all tracksters not in a supercluster already that pass the seed threshold | ||
| // Create singleton superclusters for unused tracksters with enough pt | ||
| for (unsigned int ts_id = 0; ts_id < tracksterCount; ts_id++) { | ||
| if (!tracksterMask[ts_id] && inputTracksters[ts_id].raw_pt() >= seedPtThreshold_) { | ||
| if (!tracksterMask[ts_id] && | ||
| inputTracksters[ts_id].raw_energy() * std::sin(inputTracksters[ts_id].barycenter().Theta()) >= | ||
|
||
| seedPtThreshold_) { | ||
| outputSuperclusters.emplace_back(std::initializer_list<unsigned int>{ts_id}); | ||
| resultTracksters.emplace_back(inputTracksters[ts_id]); | ||
| linkedTracksterIdToInputTracksterId.emplace_back(std::initializer_list<unsigned int>{ts_id}); | ||
| } | ||
| } | ||
|
|
||
| /////////////////////////////////////////////////////////////////////////TRKBUILDINGMOD | ||
|
|
||
| #ifdef EDM_ML_DEBUG | ||
| for (std::vector<unsigned int> const& sc : outputSuperclusters) { | ||
| std::ostringstream s; | ||
|
|
@@ -340,8 +349,8 @@ void TracksterLinkingbySuperClusteringDNN::linkTracksters( | |
| void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSetDescription& desc) { | ||
| TracksterLinkingAlgoBase::fillPSetDescription(desc); // adds algo_verbosity | ||
| desc.add<edm::FileInPath>("onnxModelPath")->setComment("Path to DNN (as ONNX model)"); | ||
| desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v2", true), | ||
| edm::allowedValues<std::string>("v1", "v2")) | ||
| desc.ifValue(edm::ParameterDescription<std::string>("dnnInputsVersion", "v3", true), | ||
| edm::allowedValues<std::string>("v1", "v2", "v3")) | ||
| ->setComment( | ||
| "DNN inputs version tag. Defines which set of features is fed to the DNN. Must match with the actual DNN."); | ||
| desc.add<unsigned int>("inferenceBatchSize", 1e5) | ||
|
|
@@ -379,4 +388,4 @@ void TracksterLinkingbySuperClusteringDNN::fillPSetDescription(edm::ParameterSet | |
| {static_cast<int>(Trackster::ParticleType::photon), static_cast<int>(Trackster::ParticleType::electron)}) | ||
| ->setComment("List of PID particle types (ticl::Trackster::ParticleType enum) to consider for PID filtering"); | ||
| desc.add<double>("PIDThreshold", 0.8)->setComment("PID score threshold"); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
General comment for computing the pT. #48184 fixes the calculation of the pT, using the barycenter position, can we just revert these changes and go back to use
trackster.raw_pt()?