Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 27 additions & 79 deletions CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,63 +13,28 @@
using namespace abcnet;
using PointCloud = cms::nanoflann::PointCloud<float, 2>; //default value for the DIM parameter is 3, but want to gather using 2 dims only (eta, phi). See template definition L11 of PhysicsTools/NearestNeighbors/interface/NearestNeighbors.h

template <typename A, typename B> void zip(const std::vector<A> &a, const std::vector<B> &b, std::vector<std::pair<A,B>> &zipped) {
for(size_t i=0; i<a.size(); ++i) {
zipped.push_back(std::make_pair(a[i], b[i]));
}
}

template <typename A, typename B> void unzip( const std::vector<std::pair<A, B>> &zipped, std::vector<A> &a, std::vector<B> &b) {
for(size_t i=0; i<a.size(); i++) {
a[i] = zipped[i].first;
b[i] = zipped[i].second;
}
}

template <typename A> std::vector<size_t> get_indices(std::vector<A> & v) {
//initialize original index locations
std::vector<size_t> idx(v.size());
iota(idx.begin(), idx.end(), 0);
//first, get indices mapping sorted to unsorted elements
stable_sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) {return v[i1] > v[i2];});
//then, invert the mapping to get unsorted ---> sorted mapping
std::vector<size_t> idx_inverse(idx.size());
for (size_t i = 0; i < idx.size(); ++i) idx_inverse[idx[i]] = i;

return idx_inverse;
}

std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > ABCNetMakeInputs::makeFeatureMap (const reco::CandidateView * pfCol, std::vector<size_t> & indices, bool debug) {
std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > ABCNetMakeInputs::makeFeatureMap (const reco::CandidateView * pfCol, const std::vector<size_t> & indices, bool debug) {

//store PF candidates and their pts into vectors
std::vector<pat::PackedCandidate> PFCands;
std::vector<float> Pts;
std::vector<const pat::PackedCandidate*> PFCands;

for (auto const & aPF : *pfCol ) {
const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&aPF);
PFCands.push_back(*lPack);
Pts.push_back(lPack->pt());
for (const auto &i : indices) {
const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&pfCol->at(i));
if(lPack == nullptr) {
// throw error
throw edm::Exception(edm::errors::LogicError,"ABCNetProducer: cannot get weights since inputs are not PackedCandidates");
}
PFCands.push_back(lPack);
} // end loop over PF candidates

//get indices mapping unsorted and sorted PF candidates
indices = get_indices(Pts);

//sort PF candidates based on their Pt
//zip the vectors
std::vector<std::pair<pat::PackedCandidate,float>> zipped_vec;
zip(PFCands, Pts, zipped_vec);
// Sort the vector of pairs
std::stable_sort(std::begin(zipped_vec), std::end(zipped_vec), [&](const auto& a, const auto& b) { return a.second > b.second; });
// Write the sorted pairs back to the original vectors
unzip(zipped_vec, PFCands, Pts);

//fill feature map
std::unordered_map<std::string, std::vector<float>> fts;
PointCloud::Points points;

for (auto const & aPF : PFCands) {

points.push_back({{float(aPF.eta()), float(aPF.phi())}});
PointCloud::Points points(4000, {{0.f, 0.f}});

for (unsigned i = 0; i < PFCands.size() && i < 4000; ++i) {
const auto &aPF = *PFCands[i];

points[i] = {{float(aPF.eta()), float(aPF.phi())}};

fts["PFCandEta"].push_back(aPF.eta()); //f0
fts["PFCandPhi"].push_back(aPF.phi()); //f1
Expand All @@ -85,51 +50,34 @@ std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<flo
fts["PFCandNumLayersHit"].push_back(aPF.trackerLayersWithMeasurement()); //f15
fts["PFCandFromPV"].push_back(aPF.fromPV()); //f16
fts["PFCandTrackHighPurity"].push_back(aPF.trackHighPurity()); //f17
if (aPF.bestTrack()) {
const auto *trk = aPF.bestTrack();
if (trk) {
if ( isinf(aPF.dz()/aPF.dzError()) ) fts["PFCandDZSig"].push_back(999.0); else fts["PFCandDZSig"].push_back(aPF.dz()/aPF.dzError()); //f10
if ( isinf(aPF.dxy()/aPF.dxyError()) ) fts["PFCandDXYSig"].push_back(999.0); else fts["PFCandDXYSig"].push_back(aPF.dxy()/aPF.dxyError()); //f11
const auto *trk = aPF.bestTrack();
fts["PFCandNormChi2"].push_back(trk->normalizedChi2()); //f13
fts["PFCandQuality"].push_back(trk->qualityMask()); //f18
}
else {
} else {
fts["PFCandDZSig"].push_back(0.0);
fts["PFCandDXYSig"].push_back(0.0);
fts["PFCandNormChi2"].push_back(999.0);
fts["PFCandQuality"].push_back(0.0);
}
if (aPF.pdgId() == 130 || aPF.pdgId() == 1) fts["PFCandHCalFrac"].push_back(aPF.hcalFraction()); else if (aPF.isIsolatedChargedHadron()) fts["PFCandHCalFrac"].push_back(aPF.rawHcalFraction()); else fts["PFCandHCalFrac"].push_back(0.0); //f12

}
if (debug) {
std::cout << "*** NOW CHECKING THE SORTING OF PF CANDIDATES ***" << std::endl;
if ( !std::is_sorted(fts["PFCandLogPt"].begin(), fts["PFCandLogPt"].end(), std::greater<float>() ) ) std::cout << "*** WARNING *** PFCandidates are not sorted in Pt!!!" << std::endl;
else std::cout << "No issues with the sorting of PF candidates detected" << std::endl;
}

for (int i = 0; i < (4000-static_cast<int>(fts["PFCandEta"].size())); i++) points.push_back({{float(0.0), float(0.0)}});

// method to gather kNN indices
auto knn = [&](size_t num_neighbors, size_t max_support_size, size_t max_query_size = 0) {
if (max_query_size == 0)
max_query_size = max_support_size;

PointCloud::Points supports(points.begin(), points.begin() + std::min(max_support_size, points.size()));
PointCloud::Points queries(points.begin(), points.begin() + std::min(max_query_size, points.size()));
auto result = PointCloud::knn<float>(supports, queries, num_neighbors); // queries.size() * num_neighbors
std::vector<float> output(max_query_size * num_neighbors, max_support_size - 1);
std::copy(result.begin(), result.begin() + std::min(result.size(), output.size()), output.begin());

//std::cout << "[knn] k=" << num_neighbors << ", num_points=" << points.size()
//<< ", max_support_size=" << max_support_size << ", max_query_size=" << max_query_size
//<< ", knn_result_size=" << result.size() << ", final_output_size=" << output.size() << std::endl;
return output;
};

std::vector<float> KNNs = knn(21, points.size(), points.size()); //gather 21 k-nearest neighbors. This vector has size (n_particles * 21)
//std::cout << "size of KNNs is " << KNNs.size() << std::endl;
//for each particle, the first KNN is the particle itself. The training was done excluding this first KNN. Exclude it during evaluation as well
for (int i = 4000-1; i >= 0; i--) KNNs.erase(KNNs.begin()+i*21); //remove elements starting from the end
return {fts,KNNs};
auto knn_indices = PointCloud::knn<float>(points, points, 21); // queries.size() * num_neighbors
std::vector<float> kNNs;
kNNs.reserve(4000 * 20);
for (unsigned i = 0; i < 4000; ++i){
for (unsigned j = 1; j < 21; ++j){
kNNs.push_back(knn_indices.at(i * 21 + j));
}
}

return {fts, kNNs};
};
2 changes: 1 addition & 1 deletion CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace abcnet {
ABCNetMakeInputs() {}
~ABCNetMakeInputs() {}

static std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > makeFeatureMap( const reco::CandidateView * PFCol, std::vector<size_t> & indices, bool debug = false);
static std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > makeFeatureMap( const reco::CandidateView * PFCol, const std::vector<size_t> & indices, bool debug = false);

};
} // namespace abcnet
Expand Down
29 changes: 9 additions & 20 deletions CommonTools/PileupAlgos/plugins/ABCNetProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ void ABCNetProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup)
const reco::CandidateView *pfCol = PFCandidates.product();

//make feature map and KNNs, preprocess features
std::vector<size_t> indices;
std::vector<size_t> indices(pfCol->size()); // indices sorted by pT: e.g., 0th element is the idx w/ highest pT
std::iota(indices.begin(), indices.end(), 0);
std::sort(indices.begin(), indices.end(), [&](const size_t i, const size_t j) {
return pfCol->at(i).pt() > pfCol->at(j).pt();
});
auto [features, KNNs] = ABCNetMakeInputs::makeFeatureMap(pfCol, indices, debug_);
ABCNetProducer::preprocess(features, debug_);

Expand Down Expand Up @@ -225,25 +229,10 @@ void ABCNetProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup)
}

//initialize container for ABCNet weights and fill it
std::vector<float> weights;

int PFCounter = 0;
for(auto const& aPF : *pfCol) {
const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&aPF);
float abcweight = -1.;
if(lPack == nullptr) {
// throw error
throw edm::Exception(edm::errors::LogicError,"ABCNetProducer: cannot get weights since inputs are not PackedCandidates");
}
else{
if (indices.at(PFCounter) >= (unsigned)n_pf_cands_) { //if the particle wasn't considered in evaluation, assign 0 weight
abcweight = 0.0;
}
else abcweight = outputs.at(0).tensor<float,3>()(0, indices.at(PFCounter), n_feats_);
//abcweight = (abcweight > 0.01) ? abcweight : 0.0; //set a lower threshold to ABCNet weights? to be tested
}
weights.push_back(abcweight);
PFCounter++;
std::vector<float> weights(PFCandidates->size(), 0);

for (size_t i = 0; i < indices.size() && i < size_t(n_pf_cands_); ++i) {
weights[indices.at(i)] = outputs.at(0).tensor<float, 3>()(0, i, n_feats_);
}

edm::ValueMap<float> ABCNetOut;
Expand Down