Skip to content
165 changes: 70 additions & 95 deletions RecoTauTag/HLTProducers/src/L2TauTagNNProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,10 @@ class L2TauNNProducer : public edm::stream::EDProducer<edm::GlobalCache<L2TauNNP
const ZVertexSoA& patavtx_soa,
const reco::BeamSpot& beamspot,
const MagneticField* magfi);
std::vector<int> selectGoodVertices(const ZVertexSoA& patavtx_soa,
const pixelTrack::TrackSoA& patatracks_tsoa,
const std::vector<int>& TrackGood);
void selectGoodTracksAndVertices(const ZVertexSoA& patavtx_soa,
const pixelTrack::TrackSoA& patatracks_tsoa,
std::vector<int>& trkGood,
std::vector<int>& vtxGood);
std::pair<float, float> impactParameter(int it,
const pixelTrack::TrackSoA& patatracks_tsoa,
float patatrackPhi,
Expand All @@ -210,11 +211,11 @@ class L2TauNNProducer : public edm::stream::EDProducer<edm::GlobalCache<L2TauNNP
const edm::EDGetTokenT<PixelTrackHeterogeneous> pataTracksToken_;
const edm::EDGetTokenT<reco::BeamSpot> beamSpotToken_;
const unsigned int maxVtx_;
const double fractionSumPt2_;
const double minSumPt2_;
const double trackPtMin_;
const double trackPtMax_;
const double trackChi2Max_;
const float fractionSumPt2_;
const float minSumPt2_;
const float trackPtMin_;
const float trackPtMax_;
const float trackChi2Max_;
std::string inputTensorName_;
std::string outputTensorName_;
const L2TauNNProducerCacheData* L2cacheData_;
Expand Down Expand Up @@ -340,21 +341,21 @@ void L2TauNNProducer::checknan(tensorflow::Tensor& tensor, int debugLevel) {
edm::LogWarning("InputVar") << "var is nan \nvar name= "
<< L2TauTagNNv1::varNameMap.at(static_cast<L2TauTagNNv1::NNInputs>(var_idx))
<< "\t var_idx = " << var_idx << "\t eta_idx = " << eta_idx
<< "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx << std::endl;
<< "\t phi_idx = " << phi_idx << "\t tau_idx = " << tau_idx;
if (debugLevel > 2) {
edm::LogWarning("InputVar") << "other vars in same cell \n";
if (var_idx + 1 < tensor_shape.at(3))
edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 1))
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 1)) << std::endl;
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 1));
if (var_idx + 2 < tensor_shape.at(3))
edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 2))
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 2)) << std::endl;
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 2));
if (var_idx + 3 < tensor_shape.at(3))
edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 3))
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 3)) << std::endl;
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 3));
if (var_idx + 4 < tensor_shape.at(3))
edm::LogWarning("InputVar") << L2TauTagNNv1::varNameMap.at(static_cast<NNInputs>(var_idx + 4))
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 4)) << std::endl;
<< "\t = " << getCell(static_cast<NNInputs>(var_idx + 4));
}
}
}
Expand Down Expand Up @@ -399,7 +400,8 @@ void L2TauNNProducer::standardizeTensor(tensorflow::Tensor& tensor) {

void L2TauNNProducer::fillL1TauVars(tensorflow::Tensor& cellGridMatrix, const std::vector<l1t::TauRef>& allTaus) {
using NNInputs = L2TauTagNNv1::NNInputs;
const int nTaus = static_cast<int>(allTaus.size());
//const int nTaus = static_cast<int>(allTaus.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//const int nTaus = static_cast<int>(allTaus.size());

const int nTaus = allTaus.size();
for (int tau_idx = 0; tau_idx < nTaus; tau_idx++) {
for (int eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
for (int phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
Expand Down Expand Up @@ -432,7 +434,8 @@ void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
const std::vector<l1t::TauRef>& allTaus,
const caloRecHitCollections& caloRecHits) {
using NNInputs = L2TauTagNNv1::NNInputs;
const int nTaus = static_cast<int>(allTaus.size());
//const int nTaus = static_cast<int>(allTaus.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//const int nTaus = static_cast<int>(allTaus.size());

const int nTaus = allTaus.size();
float deta, dphi;
int eta_idx = 0;
int phi_idx = 0;
Expand Down Expand Up @@ -566,69 +569,51 @@ void L2TauNNProducer::fillCaloRecHits(tensorflow::Tensor& cellGridMatrix,
}
}

std::vector<int> L2TauNNProducer::selectGoodVertices(const ZVertexSoA& patavtx_soa,
const pixelTrack::TrackSoA& patatracks_tsoa,
const std::vector<int>& TrackGood) {
auto maxTracks = patatracks_tsoa.stride();
void L2TauNNProducer::selectGoodTracksAndVertices(const ZVertexSoA& patavtx_soa,
const pixelTrack::TrackSoA& patatracks_tsoa,
std::vector<int>& trkGood,
std::vector<int>& vtxGood) {
const auto maxTracks = patatracks_tsoa.stride();
const int nv = patavtx_soa.nvFinal;
std::vector<int> VtxGood;
if (nv == 0)
return VtxGood;
VtxGood.reserve(nv);

std::vector<double> maxChi2_;
std::vector<double> pTSquaredSum(nv);

for (int j = nv - 1; j >= 0; --j) {
std::vector<int> trk_ass_to_vtx;
auto vtx_idx = patavtx_soa.sortInd[j];
assert(vtx_idx < nv);
for (int trk_idx = 0; trk_idx < maxTracks; trk_idx++) {
int vtx_ass_to_track = patavtx_soa.idv[trk_idx];
if (vtx_ass_to_track == int16_t(vtx_idx))
trk_ass_to_vtx.push_back(trk_idx);
}
auto nt = trk_ass_to_vtx.size();
if (nt == 0) {
continue;
}
if (nt < 2) {
trk_ass_to_vtx.clear();
continue;
trkGood.clear();
trkGood.reserve(maxTracks);
vtxGood.clear();
vtxGood.reserve(nv);
auto const* quality = patatracks_tsoa.qualityData();

// No need to sort either as the algorithms is just using the max (not even the location, just the max value of pt2sum).
std::vector<float> pTSquaredSum(nv, 0);
std::vector<int> nTrkAssociated(nv, 0);

for (int32_t trk_idx = 0; trk_idx < maxTracks; ++trk_idx) {
auto nHits = patatracks_tsoa.nHits(trk_idx);
if (nHits == 0) {
break;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have expected continue rather than break (assuming this is the n-hits of one given track). No?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this WAS the bug.
it is a guard
see for instance in
https://cmssdt.cern.ch/dxr/CMSSW/source/RecoPixelVertexing/PixelVertexFinding/plugins/gpuVertexFinder.cc#30

it will not be required anymore once #37281 is merged

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, thanks for clarifying.

}
for (const auto& trk_idx : trk_ass_to_vtx) {
int vtx_ass_to_track = patavtx_soa.idv[trk_idx];
if (vtx_ass_to_track != vtx_idx)
continue;
double patatrackPt = patatracks_tsoa.pt[trk_idx];
if (patatrackPt < trackPtMin_)
continue;
if (patatracks_tsoa.chi2(trk_idx) > trackChi2Max_)
continue;
if (patatrackPt > trackPtMax_) {
patatrackPt = trackPtMax_;
int vtx_ass_to_track = patavtx_soa.idv[trk_idx];
if (vtx_ass_to_track >= 0 && vtx_ass_to_track < nv) {
auto patatrackPt = patatracks_tsoa.pt[trk_idx];
++nTrkAssociated[vtx_ass_to_track];
if (patatrackPt >= trackPtMin_ && patatracks_tsoa.chi2(trk_idx) <= trackChi2Max_) {
patatrackPt = std::min(patatrackPt, trackPtMax_);
pTSquaredSum[vtx_ass_to_track] += patatrackPt * patatrackPt;
}
pTSquaredSum.at(vtx_idx) += patatrackPt * patatrackPt;
}
}
std::vector<size_t> sortIdxs(nv);
std::iota(sortIdxs.begin(), sortIdxs.end(), 0);
std::sort(sortIdxs.begin(), sortIdxs.end(), [&](size_t const i1, size_t const i2) {
return pTSquaredSum[i1] > pTSquaredSum[i2];
});
auto const minFOM_fromFrac = pTSquaredSum[sortIdxs.front()] * fractionSumPt2_;

for (int j = nv - 1; j >= 0; --j) {
auto idx = patavtx_soa.sortInd[j];

if (VtxGood.size() >= maxVtx_) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The upper limit from maxVtx_ has been removed. Either re-instate it, or remove the corresponding configuration parameter (and class data member).

break;
if (nHits > 0 and quality[trk_idx] >= pixelTrack::Quality::loose) {
trkGood.push_back(trk_idx);
}
if (pTSquaredSum[idx] >= minFOM_fromFrac && pTSquaredSum[idx] > minSumPt2_) {
VtxGood.push_back(idx);
}
if (nv > 0) {
const auto minFOM_fromFrac = (*std::max_element(pTSquaredSum.begin(), pTSquaredSum.end())) * fractionSumPt2_;
for (int j = nv - 1; j >= 0; --j) {
auto vtx_idx = patavtx_soa.sortInd[j];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm generally confused about the role of vtxGood, but obviously I'm not the expert here.

Is the order of the elements in vtxGood important?

  • If not, why do you need to use the sorting from the input vertices (soa.sortInd[j])?
  • If yes, since the sorting from the input vertices is used, shouldn't one use the f.o.m. that was used for that sorting? Is that the purpose of pTSquaredSum (to match exactly the f.o.m. used for the original sorting)? If so, wouldn't that value already be available in soa.ptv2[sortInd[j]]?

Is vtxGood meant to mimic the "trimmed" pixel vertices? If yes, why not simply use those as (additional) input to this producer, without having to recompute all this and having to keep this whole source code aligned with the pixel reco?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the order of the elements in vtxGood important?

  • If not, why do you need to use the sorting from the input vertices (soa.sortInd[j])?

No, the order is not important. patavtx_soa.sotrInd[vtx_idx] is needed to make track-vertex association, because patavtx_soa.idv[trk_idx] refers to sortInd instead of vtx_idx.

Is vtxGood meant to mimic the "trimmed" pixel vertices? If yes, why not simply use those as (additional) input to this producer, without having to recompute all this and having to keep this whole source code aligned with the pixel reco?

In L2TauTag producer we prefer to avoid the use of legacy formats to eliminate unnecessary overhead from conversion and creation of the new collection. While indeed in the current version, the vertex selection mimics "trimmed" vertex producer logic, we plan to change the selection for future training (e.g. use soa.ptv2, review cuts applied to trkGood and vtxGood).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the order is not important. [..]

In this particular loop, I don't see .idv, so it's not clear to me how [1] would differ from [2] if the order of vtxGood is not important.

[1]

    for (int j = nv - 1; j >= 0; --j) {
      auto vtx_idx = patavtx_soa.sortInd[j];
      assert(vtx_idx < nv);
      if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
          pTSquaredSum[vtx_idx] > minSumPt2_) {
        vtxGood.push_back(vtx_idx);
      }
    }

[2]

    for (int vtx_idx = nv - 1; vtx_idx >= 0; --vtx_idx) {
      if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
          pTSquaredSum[vtx_idx] > minSumPt2_) {
        vtxGood.push_back(vtx_idx);
      }
    }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I've misunderstood your original comment. Yes, in this particular loop [1] and [2] will result in equivalent NN inputs. So let's use [2] since it is simpler.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm.. actually, as @valeriadamante pointed me out, after adding back maxVtx_ [1] and [2] will no longer be equivalent because if the number of potentially good vertices is greater than maxVtx_, we would want to keep the most energetic ones. However, the order in which they will be stored in vtxGood is not important.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I see. In principle, one could also fill vtxGood then use if (vtxGood.size > maxVtx_) vtxGood.resize(maxVtx_) afterwards (without having the if (vtxGood.size > maxVtx_) check inside the for loop). I don't know what's better and faster, it might depend on the typical value of vtxGood.size and maxVtx_. You are free to use the implementation you prefer, of course.

assert(vtx_idx < nv);
if (nTrkAssociated[vtx_idx] >= 2 && pTSquaredSum[vtx_idx] >= minFOM_fromFrac &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can one actually have nTrkAssociated < 2 for a vertex? If not, could/should nTrkAssociated be removed altogether? If not, should the threshold be configurable and not hard-coded to 2?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@valeriadamante , I see you updated the PR, but I'd like to have a clarification on this question before restarting the tests.

Copy link
Contributor

@kandrosov kandrosov Mar 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps @VinInn can comment on this. For L2TauTag we copied this condition from here:

if (nt < 2) {
itrk.clear();
continue;
} // remove outliers

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, understood. If further changes are eventually made to this PR (or in a future one), you could consider adding comments on where these different cuts are taken from, and what they need to be kept aligned with.

pTSquaredSum[vtx_idx] > minSumPt2_) {
vtxGood.push_back(vtx_idx);
}
}
}
return VtxGood;
}

std::pair<float, float> L2TauNNProducer::impactParameter(int it,
Expand Down Expand Up @@ -678,30 +663,19 @@ void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
auto getCell = [&](NNInputs input) -> float& {
return getCellImpl(cellGridMatrix, tau_idx, phi_idx, eta_idx, input);
};
const int nTaus = static_cast<int>(allTaus.size());
for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
const float tauEta = allTaus[tau_idx]->eta();
const float tauPhi = allTaus[tau_idx]->phi();

auto maxTracks = patatracks_tsoa.stride();
auto const* quality = patatracks_tsoa.qualityData();
std::vector<int> trkGood;
std::vector<int> vtxGood;

std::vector<int> TrackGood;
for (int32_t it = 0; it < maxTracks; ++it) {
auto nHits = patatracks_tsoa.nHits(it);
if (nHits == 0)
break;
auto q = quality[it];
if (q < pixelTrack::Quality::loose)
continue;
if (nHits < 0)
continue;
TrackGood.push_back(it);
}
selectGoodTracksAndVertices(patavtx_soa, patatracks_tsoa, trkGood, vtxGood);

std::vector<int> VtxGood = selectGoodVertices(patavtx_soa, patatracks_tsoa, TrackGood);
//const int nTaus = static_cast<int>(allTaus.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//const int nTaus = static_cast<int>(allTaus.size());

const int nTaus = allTaus.size();
for (tau_idx = 0; tau_idx < nTaus; tau_idx++) {
const float tauEta = allTaus[tau_idx]->eta();
const float tauPhi = allTaus[tau_idx]->phi();

for (const auto it : TrackGood) {
for (const auto it : trkGood) {
const float patatrackPt = patatracks_tsoa.pt[it];
if (patatrackPt <= 0)
continue;
Expand All @@ -712,7 +686,7 @@ void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
const auto nHits = patatracks_tsoa.nHits(it);
if (nHits <= 0)
continue;
const int patatrackNdof = 2 * nHits - 5;
const int patatrackNdof = 2 * std::min(6, nHits) - 5;

const int vtx_idx_assTrk = patavtx_soa.idv[it];
if (reco::deltaR2(patatrackEta, patatrackPhi, tauEta, tauPhi) < dR2_max) {
Expand All @@ -728,7 +702,7 @@ void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
std::pair<float, float> impactParameters = impactParameter(it, patatracks_tsoa, patatrackPhi, beamspot, magfi);
getCell(NNInputs::PatatrackDxy) += impactParameters.first * patatrackPt;
getCell(NNInputs::PatatrackDz) += impactParameters.second * patatrackPt;
if ((std::find(VtxGood.begin(), VtxGood.end(), vtx_idx_assTrk) != VtxGood.end())) {
if ((std::find(vtxGood.begin(), vtxGood.end(), vtx_idx_assTrk) != vtxGood.end())) {
getCell(NNInputs::PatatrackPtSumWithVertex) += patatrackPt;
getCell(NNInputs::PatatrackSizeWithVertex) += 1.;
}
Expand All @@ -738,7 +712,7 @@ void L2TauNNProducer::fillPatatracks(tensorflow::Tensor& cellGridMatrix,
// normalize to sum and define stdDev
for (eta_idx = 0; eta_idx < L2TauTagNNv1::nCellEta; eta_idx++) {
for (phi_idx = 0; phi_idx < L2TauTagNNv1::nCellPhi; phi_idx++) {
getCell(NNInputs::nVertices) = VtxGood.size();
getCell(NNInputs::nVertices) = vtxGood.size();
if (getCell(NNInputs::PatatrackPtSum) > 0.) {
getCell(NNInputs::PatatrackDeltaEta) /= getCell(NNInputs::PatatrackPtSum);
getCell(NNInputs::PatatrackDeltaPhi) /= getCell(NNInputs::PatatrackPtSum);
Expand Down Expand Up @@ -804,7 +778,8 @@ void L2TauNNProducer::produce(edm::Event& event, const edm::EventSetup& eventset
caloRecHits.ee = &*eeCal;
caloRecHits.geometry = &*geometry;

const int nTaus = static_cast<int>(allTaus.size());
//const int nTaus = static_cast<int>(allTaus.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//const int nTaus = static_cast<int>(allTaus.size());

const int nTaus = allTaus.size();
tensorflow::Tensor cellGridMatrix(tensorflow::DT_FLOAT,
{nTaus, L2TauTagNNv1::nCellEta, L2TauTagNNv1::nCellPhi, L2TauTagNNv1::nVars});
const int n_inputs = nTaus * L2TauTagNNv1::nCellEta * L2TauTagNNv1::nCellPhi * L2TauTagNNv1::nVars;
Expand Down