fiemmi · hqucms · Nov 14, 2022
diff --git a/CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.cc b/CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.cc
@@ -13,63 +13,28 @@
 using namespace abcnet;
 using PointCloud = cms::nanoflann::PointCloud<float, 2>; //default value for the DIM parameter is 3, but want to gather using 2 dims only (eta, phi). See template definition L11 of PhysicsTools/NearestNeighbors/interface/NearestNeighbors.h
 
-template <typename A, typename B> void zip(const std::vector<A> &a, const std::vector<B> &b, std::vector<std::pair<A,B>> &zipped) {
-  for(size_t i=0; i<a.size(); ++i) {
-    zipped.push_back(std::make_pair(a[i], b[i]));
-  }
-}
-
-template <typename A, typename B> void unzip( const std::vector<std::pair<A, B>> &zipped, std::vector<A> &a, std::vector<B> &b) {
-  for(size_t i=0; i<a.size(); i++) {
-    a[i] = zipped[i].first;
-    b[i] = zipped[i].second;
-  }
-}
-
-template <typename A> std::vector<size_t> get_indices(std::vector<A> & v) {
-  //initialize original index locations
-  std::vector<size_t> idx(v.size());
-  iota(idx.begin(), idx.end(), 0);
-  //first, get indices mapping sorted to unsorted elements
-  stable_sort(idx.begin(), idx.end(), [&v](size_t i1, size_t i2) {return v[i1] > v[i2];});
-  //then, invert the mapping to get unsorted ---> sorted mapping
-  std::vector<size_t> idx_inverse(idx.size());
-  for (size_t i = 0; i < idx.size(); ++i) idx_inverse[idx[i]] = i;
-
-  return idx_inverse;
-}
-
-std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > ABCNetMakeInputs::makeFeatureMap (const reco::CandidateView * pfCol, std::vector<size_t> & indices, bool debug) {
+std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> > ABCNetMakeInputs::makeFeatureMap (const reco::CandidateView * pfCol, const std::vector<size_t> & indices, bool debug) {
 
   //store PF candidates and their pts into vectors
-  std::vector<pat::PackedCandidate> PFCands;
-  std::vector<float> Pts;
+  std::vector<const pat::PackedCandidate*> PFCands;
 
-  for (auto const & aPF : *pfCol ) {
-    const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&aPF);    
-    PFCands.push_back(*lPack);
-    Pts.push_back(lPack->pt());
+  for (const auto &i : indices) {
+    const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&pfCol->at(i));
+    if(lPack == nullptr) { 
+      // throw error
+      throw edm::Exception(edm::errors::LogicError,"ABCNetProducer: cannot get weights since inputs are not PackedCandidates");
+    }
+    PFCands.push_back(lPack);
   } // end loop over PF candidates
-
-  //get indices mapping unsorted and sorted PF candidates
-  indices = get_indices(Pts);
-
-  //sort PF candidates based on their Pt
-  //zip the vectors
-  std::vector<std::pair<pat::PackedCandidate,float>> zipped_vec;
-  zip(PFCands, Pts, zipped_vec);
-  // Sort the vector of pairs
-  std::stable_sort(std::begin(zipped_vec), std::end(zipped_vec), [&](const auto& a, const auto& b) { return a.second > b.second; });
-  // Write the sorted pairs back to the original vectors
-  unzip(zipped_vec, PFCands, Pts);
 
   //fill feature map
   std::unordered_map<std::string, std::vector<float>> fts;
-  PointCloud::Points points;
-
-  for (auto const & aPF : PFCands) {
-
-    points.push_back({{float(aPF.eta()), float(aPF.phi())}});
+  PointCloud::Points points(4000, {{0.f, 0.f}});
+
+  for (unsigned i = 0; i < PFCands.size() && i < 4000; ++i) {
+    const auto &aPF = *PFCands[i];
+
+    points[i] = {{float(aPF.eta()), float(aPF.phi())}};
 
     fts["PFCandEta"].push_back(aPF.eta()); //f0
     fts["PFCandPhi"].push_back(aPF.phi()); //f1
@@ -85,51 +50,34 @@ std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<flo
     fts["PFCandNumLayersHit"].push_back(aPF.trackerLayersWithMeasurement()); //f15
     fts["PFCandFromPV"].push_back(aPF.fromPV()); //f16
     fts["PFCandTrackHighPurity"].push_back(aPF.trackHighPurity()); //f17
-    if (aPF.bestTrack()) {
+    const auto *trk = aPF.bestTrack();
+    if (trk) {
       if ( isinf(aPF.dz()/aPF.dzError()) ) fts["PFCandDZSig"].push_back(999.0); else fts["PFCandDZSig"].push_back(aPF.dz()/aPF.dzError()); //f10
       if ( isinf(aPF.dxy()/aPF.dxyError()) ) fts["PFCandDXYSig"].push_back(999.0); else fts["PFCandDXYSig"].push_back(aPF.dxy()/aPF.dxyError()); //f11
-      const auto *trk = aPF.bestTrack();
       fts["PFCandNormChi2"].push_back(trk->normalizedChi2()); //f13
       fts["PFCandQuality"].push_back(trk->qualityMask()); //f18
-    }
-    else {
+    } else {
       fts["PFCandDZSig"].push_back(0.0);
       fts["PFCandDXYSig"].push_back(0.0);
       fts["PFCandNormChi2"].push_back(999.0);
       fts["PFCandQuality"].push_back(0.0);
     }
     if (aPF.pdgId() == 130 || aPF.pdgId() == 1) fts["PFCandHCalFrac"].push_back(aPF.hcalFraction()); else if (aPF.isIsolatedChargedHadron()) fts["PFCandHCalFrac"].push_back(aPF.rawHcalFraction()); else fts["PFCandHCalFrac"].push_back(0.0); //f12
-
   }
   if (debug) {
     std::cout << "*** NOW CHECKING THE SORTING OF PF CANDIDATES ***" << std::endl;
     if ( !std::is_sorted(fts["PFCandLogPt"].begin(), fts["PFCandLogPt"].end(), std::greater<float>() ) ) std::cout << "*** WARNING *** PFCandidates are not sorted in Pt!!!" << std::endl;
     else std::cout << "No issues with the sorting of PF candidates detected" << std::endl;
   }
-
-  for (int i = 0; i < (4000-static_cast<int>(fts["PFCandEta"].size())); i++) points.push_back({{float(0.0), float(0.0)}});
 
-  // method to gather kNN indices
-  auto knn = [&](size_t num_neighbors, size_t max_support_size, size_t max_query_size = 0) {
-    if (max_query_size == 0)
-      max_query_size = max_support_size;
-
-    PointCloud::Points supports(points.begin(), points.begin() + std::min(max_support_size, points.size()));
-    PointCloud::Points queries(points.begin(), points.begin() + std::min(max_query_size, points.size()));
-    auto result = PointCloud::knn<float>(supports, queries, num_neighbors);  // queries.size() * num_neighbors
-    std::vector<float> output(max_query_size * num_neighbors, max_support_size - 1);
-    std::copy(result.begin(), result.begin() + std::min(result.size(), output.size()), output.begin());
-
-    //std::cout << "[knn] k=" << num_neighbors << ", num_points=" << points.size()
-    //<< ", max_support_size=" << max_support_size << ", max_query_size=" << max_query_size
-    //<< ", knn_result_size=" << result.size() << ", final_output_size=" << output.size() << std::endl;
-    return output;
-  };
-
-  std::vector<float> KNNs = knn(21, points.size(), points.size()); //gather 21 k-nearest neighbors. This vector has size (n_particles * 21)
-  //std::cout << "size of KNNs is " << KNNs.size() << std::endl;
-  //for each particle, the first KNN is the particle itself. The training was done excluding this first KNN. Exclude it during evaluation as well
-  for (int i = 4000-1; i >= 0; i--) KNNs.erase(KNNs.begin()+i*21); //remove elements starting from the end
-  return {fts,KNNs};
+  auto knn_indices = PointCloud::knn<float>(points, points, 21);  // queries.size() * num_neighbors
+  std::vector<float> kNNs;
+  kNNs.reserve(4000 * 20);
+  for (unsigned i = 0; i < 4000; ++i){
+    for (unsigned j = 1; j < 21; ++j){
+      kNNs.push_back(knn_indices.at(i * 21 + j));
+    }
+  }
 
+  return {fts, kNNs};
 };
diff --git a/CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.h b/CommonTools/PileupAlgos/plugins/ABCNetMakeInputs.h
@@ -16,7 +16,7 @@ namespace abcnet {
     ABCNetMakeInputs() {}
     ~ABCNetMakeInputs() {}
 
-    static std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> >  makeFeatureMap( const reco::CandidateView * PFCol, std::vector<size_t> & indices, bool debug = false);
+    static std::tuple< std::unordered_map<std::string, std::vector<float>>, std::vector<float> >  makeFeatureMap( const reco::CandidateView * PFCol, const std::vector<size_t> & indices, bool debug = false);
 
   };
 }  // namespace abcnet

diff --git a/CommonTools/PileupAlgos/plugins/ABCNetProducer.cc b/CommonTools/PileupAlgos/plugins/ABCNetProducer.cc
@@ -182,7 +182,11 @@ void ABCNetProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup)
   const reco::CandidateView *pfCol = PFCandidates.product();
 
   //make feature map and KNNs, preprocess features
-  std::vector<size_t> indices; 
+  std::vector<size_t> indices(pfCol->size()); // indices sorted by pT: e.g., 0th element is the idx w/ highest pT
+  std::iota(indices.begin(), indices.end(), 0);
+  std::sort(indices.begin(), indices.end(), [&](const size_t i, const size_t j) {
+    return pfCol->at(i).pt() > pfCol->at(j).pt();
+  });
   auto [features, KNNs] = ABCNetMakeInputs::makeFeatureMap(pfCol, indices, debug_);
   ABCNetProducer::preprocess(features, debug_);
 
@@ -225,25 +229,10 @@ void ABCNetProducer::produce(edm::Event& iEvent, const edm::EventSetup& iSetup)
   }
 
   //initialize container for ABCNet weights and fill it
-  std::vector<float> weights;
-
-  int PFCounter = 0;
-  for(auto const& aPF : *pfCol) {
-    const pat::PackedCandidate *lPack = dynamic_cast<const pat::PackedCandidate*>(&aPF);
-    float abcweight = -1.;
-    if(lPack == nullptr) { 
-      // throw error
-      throw edm::Exception(edm::errors::LogicError,"ABCNetProducer: cannot get weights since inputs are not PackedCandidates");
-    }
-    else{
-      if (indices.at(PFCounter) >= (unsigned)n_pf_cands_) { //if the particle wasn't considered in evaluation, assign 0 weight
-	abcweight = 0.0;
-      }
-      else abcweight = outputs.at(0).tensor<float,3>()(0, indices.at(PFCounter), n_feats_);
-      //abcweight = (abcweight > 0.01) ? abcweight : 0.0; //set a lower threshold to ABCNet weights? to be tested
-    }
-    weights.push_back(abcweight);
-    PFCounter++;
+  std::vector<float> weights(PFCandidates->size(), 0);
+
+  for (size_t i = 0; i < indices.size() && i < size_t(n_pf_cands_); ++i) {
+    weights[indices.at(i)] = outputs.at(0).tensor<float, 3>()(0, i, n_feats_);
   }
 
   edm::ValueMap<float> ABCNetOut;