-
Notifications
You must be signed in to change notification settings - Fork 4.6k
bug fix in vertex selection of L2TauTagNNProducer #37291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
f1e90e9
6981053
7d0eef9
2475aeb
d8d790c
18448f4
c74d9c0
6b5abf0
074a706
c76cfb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -183,9 +183,10 @@ class L2TauNNProducer : public edm::stream::EDProducer<edm::GlobalCache<L2TauNNP | |||||||||||||
| const ZVertexSoA& patavtx_soa, | ||||||||||||||
| const reco::BeamSpot& beamspot, | ||||||||||||||
| const MagneticField* magfi); | ||||||||||||||
| std::vector<int> selectGoodVertices(const ZVertexSoA& patavtx_soa, | ||||||||||||||
| const pixelTrack::TrackSoA& patatracks_tsoa, | ||||||||||||||
| const std::vector<int>& TrackGood); | ||||||||||||||
| void selectGoodTracksAndVertices(const ZVertexSoA& patavtx_soa, | ||||||||||||||
| const pixelTrack::TrackSoA& patatracks_tsoa, | ||||||||||||||
| std::vector<int>& TrkGood, | ||||||||||||||
| std::vector<int>& VtxGood); | ||||||||||||||
| std::pair<float, float> impactParameter(int it, | ||||||||||||||
| const pixelTrack::TrackSoA& patatracks_tsoa, | ||||||||||||||
| float patatrackPhi, | ||||||||||||||
|
|
@@ -340,21 +341,21 @@ void L2TauNNProducer::checknan(tensorflow::Tensor& tensor, int debugLevel) { | |||||||||||||
| edm::LogWarning("InputVar") << "var is nan \nvar name= " | ||||||||||||||
| << L2TauTagNNv1::varNameMap.at(static_cast<L2TauTagNNv1::NNInputs>(var_idx)) | ||||||||||||||
| << "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx | ||||||||||||||
| << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx << std::endl; | ||||||||||||||
| << "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx; | ||||||||||||||
| if (debugLevel > 2) { | ||||||||||||||
| edm::LogWarning("InputVar") << "other vars in same cell \n"; | ||||||||||||||
| if (var_idx + 1 < tensor_shape.at(3)) | ||||||||||||||
| edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 1)) | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 1)) << std::endl; | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 1)); | ||||||||||||||
| if (var_idx + 2 < tensor_shape.at(3)) | ||||||||||||||
| edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 2)) | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 2)) << std::endl; | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 2)); | ||||||||||||||
| if (var_idx + 3 < tensor_shape.at(3)) | ||||||||||||||
| edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 3)) | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 3)) << std::endl; | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 3)); | ||||||||||||||
| if (var_idx + 4 < tensor_shape.at(3)) | ||||||||||||||
| edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 4)) | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 4)) << std::endl; | ||||||||||||||
| << "\t = " << getCell(static_cast<NNInputs>(var_idx + 4)); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -566,69 +567,52 @@ void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix, | |||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| std::vector<int> L2TauNNProducer::selectGoodVertices(const ZVertexSoA& patavtx_soa, | ||||||||||||||
| const pixelTrack::TrackSoA& patatracks_tsoa, | ||||||||||||||
| const std::vector<int>& TrackGood) { | ||||||||||||||
| auto maxTracks = patatracks_tsoa.stride(); | ||||||||||||||
| void L2TauNNProducer::selectGoodTracksAndVertices(const ZVertexSoA& patavtx_soa, | ||||||||||||||
| const pixelTrack::TrackSoA& patatracks_tsoa, | ||||||||||||||
| std::vector<int>& TrkGood, | ||||||||||||||
| std::vector<int>& VtxGood) { | ||||||||||||||
| const auto maxTracks = patatracks_tsoa.stride(); | ||||||||||||||
| const int nv = patavtx_soa.nvFinal; | ||||||||||||||
| std::vector<int> VtxGood; | ||||||||||||||
| if (nv == 0) | ||||||||||||||
| return VtxGood; | ||||||||||||||
| VtxGood.reserve(nv); | ||||||||||||||
|
|
||||||||||||||
| std::vector<double> maxChi2_; | ||||||||||||||
| std::vector<double> pTSquaredSum(nv); | ||||||||||||||
|
|
||||||||||||||
| for (int j = nv - 1; j >= 0; --j) { | ||||||||||||||
| std::vector<int> trk_ass_to_vtx; | ||||||||||||||
| auto vtx_idx = patavtx_soa.sortInd[j]; | ||||||||||||||
| assert(vtx_idx < nv); | ||||||||||||||
| for (int trk_idx = 0; trk_idx < maxTracks; trk_idx++) { | ||||||||||||||
| int vtx_ass_to_track = patavtx_soa.idv[trk_idx]; | ||||||||||||||
| if (vtx_ass_to_track == int16_t(vtx_idx)) | ||||||||||||||
| trk_ass_to_vtx.push_back(trk_idx); | ||||||||||||||
| } | ||||||||||||||
| auto nt = trk_ass_to_vtx.size(); | ||||||||||||||
| if (nt == 0) { | ||||||||||||||
| continue; | ||||||||||||||
| } | ||||||||||||||
| if (nt < 2) { | ||||||||||||||
| trk_ass_to_vtx.clear(); | ||||||||||||||
| continue; | ||||||||||||||
| auto const* quality = patatracks_tsoa.qualityData(); | ||||||||||||||
|
|
||||||||||||||
| // No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum). | ||||||||||||||
| std::vector<double> pTSquaredSum(nv, 0); | ||||||||||||||
|
||||||||||||||
| std::vector<int> nTrkAssociated(nv, 0); | ||||||||||||||
|
|
||||||||||||||
| for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) { | ||||||||||||||
| auto nHits = patatracks_tsoa.nHits(trk_idx); | ||||||||||||||
| if (nHits == 0) { | ||||||||||||||
| break; | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have expected
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this WAS the bug. it will not be required anymore once #37281 is merged
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, thanks for clarifying. |
||||||||||||||
| } | ||||||||||||||
| for (const auto& trk_idx : trk_ass_to_vtx) { | ||||||||||||||
| int vtx_ass_to_track = patavtx_soa.idv[trk_idx]; | ||||||||||||||
| if (vtx_ass_to_track != vtx_idx) | ||||||||||||||
| continue; | ||||||||||||||
| int vtx_ass_to_track = patavtx_soa.idv[trk_idx]; | ||||||||||||||
| if (vtx_ass_to_track >= 0 && vtx_ass_to_track <= nv) { | ||||||||||||||
| double patatrackPt = patatracks_tsoa.pt[trk_idx]; | ||||||||||||||
|
||||||||||||||
| double patatrackPt = patatracks_tsoa.pt[trk_idx]; | |
| auto patatrackPt = patatracks_tsoa.pt[trk_idx]; |
auto would give float rather than double, afaiu. Or was double intentional?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ++nTrkAssociated.at(vtx_ass_to_track); | |
| ++nTrkAssociated[vtx_ass_to_track]; |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| pTSquaredSum.at(vtx_ass_to_track) += patatrackPt * patatrackPt; | |
| pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt; |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, these requirements apply to the selection of good tracks, but they do not apply to the calculation of pTSquaredSum, correct? Related to the 2nd check, can nHits actually be negative?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The upper limit from maxVtx_ has been removed. Either re-instate it, or remove the corresponding configuration parameter (and class data member).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm generally confused about the role of vtxGood, but obviously I'm not the expert here.
Is the order of the elements in vtxGood important?
- If not, why do you need to use the sorting from the input vertices (
soa.sortInd[j])? - If yes, since the sorting from the input vertices is used, shouldn't one use the f.o.m. that was used for that sorting? Is that the purpose of
pTSquaredSum(to match exactly the f.o.m. used for the original sorting)? If so, wouldn't that value already be available insoa.ptv2[sortInd[j]]?
Is vtxGood meant to mimic the "trimmed" pixel vertices? If yes, why not simply use those as (additional) input to this producer, without having to recompute all this and having to keep this whole source code aligned with the pixel reco?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the order of the elements in
vtxGoodimportant?
- If not, why do you need to use the sorting from the input vertices (
soa.sortInd[j])?
No, the order is not important. patavtx_soa.sotrInd[vtx_idx] is needed to make track-vertex association, because patavtx_soa.idv[trk_idx] refers to sortInd instead of vtx_idx.
Is
vtxGoodmeant to mimic the "trimmed" pixel vertices? If yes, why not simply use those as (additional) input to this producer, without having to recompute all this and having to keep this whole source code aligned with the pixel reco?
In L2TauTag producer we prefer to avoid the use of legacy formats to eliminate unnecessary overhead from conversion and creation of the new collection. While indeed in the current version, the vertex selection mimics "trimmed" vertex producer logic, we plan to change the selection for future training (e.g. use soa.ptv2, review cuts applied to trkGood and vtxGood).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, the order is not important. [..]
In this particular loop, I don't see .idv, so it's not clear to me how [1] would differ from [2] if the order of vtxGood is not important.
[1]
for (int j = nv - 1; j >= 0; --j) {
auto vtx_idx = patavtx_soa.sortInd[j];
assert(vtx_idx < nv);
if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
pTSquaredSum[vtx_idx] > minSumPt2_) {
vtxGood.push_back(vtx_idx);
}
}[2]
for (int vtx_idx = nv - 1; vtx_idx >= 0; --vtx_idx) {
if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
pTSquaredSum[vtx_idx] > minSumPt2_) {
vtxGood.push_back(vtx_idx);
}
}There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I've misunderstood your original comment. Yes, in this particular loop [1] and [2] will result in equivalent NN inputs. So let's use [2] since it is simpler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm.. actually, as @valeriadamante pointed me out, after adding back maxVtx_ [1] and [2] will no longer be equivalent because if the number of potentially good vertices is greater than maxVtx_, we would want to keep the most energetic ones. However, the order in which they will be stored in vtxGood is not important.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, I see. In principle, one could also fill vtxGood then use if (vtxGood.size > maxVtx_) vtxGood.resize(maxVtx_) afterwards (without having the if (vtxGood.size > maxVtx_) check inside the for loop). I don't know what's better and faster, it might depend on the typical value of vtxGood.size and maxVtx_. You are free to use the implementation you prefer, of course.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (nTrkAssociated.at(vtx_idx) >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && | |
| if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac && |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| std::vector<int> TrkGood; | |
| std::vector<int> VtxGood; | |
| std::vector<int> trkGood; | |
| std::vector<int> vtxGood; |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| const int nTaus = static_cast<int>(allTaus.size()); | |
| const int nTaus = allTaus.size(); |
Is static_cast actually needed, or useful here?
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is now VERY different the the previous version.
Now only tracks contributing to the vertex fidning in patatrack (i.e not-small-pt HighPurity quadruplets( are considered.
Before all tracks (including loose triplets) were used.
So I do not see how this code can reproduce previous results even on CPU
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@VinInn sorry, can you, please, elaborate on what is the difference? In the master TrackGood are tracks with quality >= loose && nHits > 0. In the current version, TrkGood have exactly the same definition as far as I see...
cmssw/RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc
Lines 598 to 603 in d8d790c
| auto q = quality[trk_idx]; | |
| if (q < pixelTrack::Quality::loose) | |
| continue; | |
| if (nHits < 0) | |
| continue; | |
| TrkGood.push_back(trk_idx); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I missread the position of the closing bracket of line 588
if (vtx_ass_to_track >= 0 && vtx_ass_to_track <= nv) {
so ok.
Still I would invite you to reconsider the use of all tracks (including loose triplets far away from any vertex) in your CNN (and eventually clamp the pt of the track there as well).
Please also note that after the introduction of #36215 the correspondence of nLayers with nHits has changed as tracks now can have 2 hits in each layer. So you may wish to use nLayers now in the CNN.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@VinInn thank you for the suggestion! For the next iteration of the training, we'll consider applying a cleaning of the input tracks. Since the current training was done using all tracks that pass quality >= loose && nHits > 0, we prefer to use the same selection for the inference. If I interpret correctly the last @valeriadamante results, the presence of such low-quality tracks does not represent an issue as far as reproducibility is concerned:i.e. small differences in inputs result in small differences in the NN score. So the original problem was due to the missing break for nHits == 0.
About nHits vs. nLayers: apart from preselection (which shouldn't be sensitive to this since we compare to 0), we use nHits in the definition of track ndof: patatrackNdof = 2 * nHits - 5. Could you, please, confirm that we need to switch to nLayers in this formula to get the correct ndof of the track?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The correct one should be
patatrackNdof = 2 * std::min(6,nHits) - 5 as we fit 6 hits max.
BUT I should check that this is consistently done in patatrack itself...
As long as you did not give nHIts as direct input to the CNN no other change is needed (the CNN should not be very sensitive to how many hits we fit)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pedantic, but i think the convention is to use lower-case initials.