cms-flaf · prabhatsolanki · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1 @@
-*.onnx filter=lfs diff=lfs merge=lfs -text
+*.onnx !text !filter !merge !diff
diff --git a/Analysis/data/feature_order.json b/Analysis/data/feature_order.json
@@ -0,0 +1,20 @@
+{
+  "feature_order": [
+    "pt",
+    "eta",
+    "mass",
+    "seedingJet_pt",
+    "seedingJet_eta",
+    "seedingJet_mass",
+    "decayMode_0",
+    "decayMode_1",
+    "decayMode_10",
+    "decayMode_11",
+    "btagPNetB",
+    "btagPNetCvB",
+    "btagPNetCvL",
+    "btagPNetCvNotB",
+    "btagPNetQvG"
+  ],
+  "model_type": "moe_gate"
+}
diff --git a/Analysis/data/model.onnx b/Analysis/data/model.onnx
diff --git a/Analysis/hh_bbtautau.py b/Analysis/hh_bbtautau.py
@@ -1,5 +1,5 @@
 import ROOT
-
+import json
 if __name__ == "__main__":
     sys.path.append(os.environ["ANALYSIS_PATH"])
 
@@ -485,6 +485,93 @@ def defineLeptonPreselection(self):  # needs channel def
             "eleEta2016 && tau1_iso_medium && muon1_tightId && muon2_tightId && firstele_mvaIso",
         )
 
+    def define_w_ff_columns(self):
+        if self.run_ffs and "tauTau" in self.config['channelSelection']:
+            print("Defining w_ff columns using ONNX runner ...")
+            # Load the exact feature order expected by ONNX 
+            analysis_path = os.environ["ANALYSIS_PATH"]
+            feat_order_path = os.path.join(analysis_path, "Analysis/data/feature_order.json")
+
+            with open(feat_order_path, 'r') as f:
+                fo = json.load(f)
+            feature_order = fo['feature_order']        
+            model_type    = fo.get('model_type', 'single')
+
+            # Columns we know how to map directly from per tau branches
+            direct_cont = {
+                'pt', 'eta', 'mass',
+                'seedingJet_pt', 'seedingJet_eta', 'seedingJet_mass',
+                'btagPNetB', 'btagPNetCvB', 'btagPNetCvL', 'btagPNetCvNotB', 'btagPNetQvG'
+            }
+
+            # Build a C++ expression that creates a float vector in the ONNX feature_order
+            def make_tau_vec_expr(tau_prefix: str) -> str:
+                items = []
+                for feat in feature_order:
+                    if feat in direct_cont:
+                        items.append(f"{tau_prefix}_{feat}")
+                    elif feat.startswith("decayMode_"):
+                        dm_val = feat.split("_", 1)[1]
+                        items.append(f"({tau_prefix}_decayMode == {dm_val} ? 1.f : 0.f)")
+                    else:
+                        items.append("0.f")
+                return "std::vector<float>{" + ", ".join(items) + "}"
+
+            # Per-tau raw feature vectors
+            tau1_feature_vector = make_tau_vec_expr("tau1")
+            tau2_feature_vector = make_tau_vec_expr("tau2")
+
+            tau1_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau1_feature_vector})"
+            tau2_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau2_feature_vector})"
+
+            self.df = self.df.Define("tau1_w_ff", tau1_expression)
+            self.df = self.df.Define("tau2_w_ff", tau2_expression)
+
+            # Iso / anti-iso flags
+            deepTau_medium_wp = Utilities.WorkingPointsTauVSjet.Medium.value
+            deepTau_vvloose_wp = Utilities.WorkingPointsTauVSjet.VVLoose.value
+            deepTau_vsjet_version = f"idDeepTau{self.deepTauYear()}v{self.deepTauVersion}VSjet"
+
+            self.df = self.df.Define("tau1_is_iso_ff",      f"tau1_{deepTau_vsjet_version} >= {deepTau_medium_wp}")
+            self.df = self.df.Define("tau1_is_antiiso_ff",  f"(tau1_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau1_{deepTau_vsjet_version} < {deepTau_medium_wp})")
+            self.df = self.df.Define("tau2_is_iso_ff",      f"tau2_{deepTau_vsjet_version} >= {deepTau_medium_wp}")
+            self.df = self.df.Define("tau2_is_antiiso_ff",  f"(tau2_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau2_{deepTau_vsjet_version} < {deepTau_medium_wp})")
+
+            # Combine per-event
+            self.df = self.df.Define("is_tau1_leading", "tau1_pt > tau2_pt")
+            self.df = self.df.Define("ff_lead",         "is_tau1_leading ? tau1_w_ff : tau2_w_ff")
+            self.df = self.df.Define("ff_sublead",      "is_tau1_leading ? tau2_w_ff : tau1_w_ff")
+            self.df = self.df.Define("iso_lead",        "is_tau1_leading ? tau1_is_iso_ff : tau2_is_iso_ff")
+            self.df = self.df.Define("antiiso_lead",    "is_tau1_leading ? tau1_is_antiiso_ff : tau2_is_antiiso_ff")
+            self.df = self.df.Define("iso_sublead",     "is_tau1_leading ? tau2_is_iso_ff : tau1_is_iso_ff")
+            self.df = self.df.Define("antiiso_sublead", "is_tau1_leading ? tau2_is_antiiso_ff : tau1_is_antiiso_ff")
+
+            self.df = self.df.Define("ff_comb_weight", """
+                if (!tauTau) return 1.0f;
+                float weight = 0.f;
+                if (!OS) return 0.f;
+
+                // Case 1: Leading tau is anti-iso, subleading is iso
+                if (antiiso_lead && iso_sublead) {
+                    weight += ff_lead;
+                }
+                // Case 2: Leading tau is iso, subleading is anti-iso
+                if (iso_lead && antiiso_sublead) {
+                    weight += ff_sublead;
+                }
+                // Case 3: Both are anti-iso
+                if (antiiso_lead && antiiso_sublead) {
+                    weight -= (ff_lead * ff_sublead);
+                }
+                return weight;
+            """)
+        else:
+            print("FF runner not active. Defining default w_ff columns.")
+            self.df = self.df.Define("tau1_w_ff", "1.0f")
+            self.df = self.df.Define("tau2_w_ff", "1.0f")
+            if "ff_comb_weight" not in self.df.GetColumnNames():
+                self.df = self.df.Define("ff_comb_weight", "1.0f")
+
     def defineQCDRegions(self):
         self.DefineAndAppend("OS", "tau1_charge*tau2_charge < 0")
         self.DefineAndAppend("SS", "!OS")
@@ -548,6 +635,8 @@ def __init__(
         whichType=3,
         wantScales=True,
         colToSave=[],
+        run_ffs=False
+
     ):
         super(DataFrameBuilderForHistograms, self).__init__(df)
         self.deepTauVersion = config["deepTauVersion"]
@@ -564,6 +653,7 @@ def __init__(
         self.wantTriggerSFErrors = wantTriggerSFErrors
         self.wantScales = isCentral and wantScales
         self.colToSave = colToSave
+        self.run_ffs = run_ffs
 
 
 def PrepareDfForDNN(dfForHistograms):
@@ -589,6 +679,7 @@ def PrepareDfForHistograms(dfForHistograms):
     dfForHistograms.defineCRs()
     dfForHistograms.defineCategories()
     dfForHistograms.defineQCDRegions()
+    dfForHistograms.define_w_ff_columns()
     return dfForHistograms
 
 

diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py
@@ -59,6 +59,7 @@ def GetDfw(
     if global_params["process_name"] == "DY":
         datasetType = 2
     kwargset["whichType"] = datasetType
+    kwargset["run_ffs"] = global_params.get("run_ffs", False)
     dfw = analysis.DataFrameBuilderForHistograms(df, global_params, period, **kwargset)
 
     if df_caches:
@@ -105,3 +106,15 @@ def DefineWeightForHistograms(
         ):
             weight_name = unc_cfg_dict[uncName]["expression"].format(scale=uncScale)
     dfw.df = dfw.df.Define(final_weight_name, weight_name)
+
+    # fake-factor “shape” weight, only for central
+    if isCentral:
+        cols = set(map(str, dfw.df.GetColumnNames()))
+        if "ff_comb_weight" in cols:
+            if process_group == "data":
+                # data: the ML weight is just the FF comb weight
+                dfw.df = dfw.df.Define("weight_MLshape_Central", "ff_comb_weight")
+            else:
+                # MC: multiply the already defined final_weight by the FF comb weight
+                dfw.df = dfw.df.Define("weight_MLshape_Central", "final_weight * ff_comb_weight")
+            dfw.colToSave.append("weight_MLshape_Central")
diff --git a/Analysis/include/FFNetONNX.h b/Analysis/include/FFNetONNX.h
@@ -0,0 +1,86 @@
+#pragma once
+#include "/cvmfs/sft.cern.ch/lcg/views/LCG_107/x86_64-el9-gcc11-opt/include/onnxruntime/onnxruntime_cxx_api.h"
+
+#include <memory>
+#include <vector>
+#include <string>
+#include <stdexcept>
+#include <fstream>
+#include <iostream>
+
+namespace ff_interface {
+
+class FFNetONNXRunner {
+public:
+    explicit FFNetONNXRunner(const std::string& model_path)
+        : env_(ORT_LOGGING_LEVEL_WARNING, "FFNetInference"),
+          session_opts_(),
+          feature_count_(0) {
+
+        std::ifstream f(model_path.c_str());
+        if (!f.good()) throw std::runtime_error("Could not find ONNX model file: " + model_path);
+
+        session_opts_.SetIntraOpNumThreads(1);
+        session_ = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_opts_);
+
+        // Input shape to learn expected feature length (batch, F)
+        Ort::AllocatorWithDefaultOptions alloc;
+        auto type_info   = session_->GetInputTypeInfo(0);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+        auto shape       = tensor_info.GetShape();
+        if (shape.size() != 2) {
+            throw std::runtime_error("Model input is not rank-2 [batch, features].");
+        }
+        if (shape[1] <= 0) {
+            throw std::runtime_error("Model feature dimension is dynamic/unknown. Export should fix it.");
+        }
+        feature_count_ = static_cast<size_t>(shape[1]);
+        std::cout << "[FFNetONNXRunner] Loaded " << model_path
+                  << " expecting feature_count=" << feature_count_ << std::endl;
+    }
+
+    // pass the raw vector in the exact feature_order 
+    float compute_w_ff(const std::vector<float>& raw_input) const {
+        if (raw_input.size() != feature_count_) {
+            throw std::runtime_error(
+                "[FFNetONNXRunner] raw_input.size()=" + std::to_string(raw_input.size()) +
+                " does not match model feature_count=" + std::to_string(feature_count_) +
+                ". Build the vector in the saved feature_order.json."
+            );
+        }
+        Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+        std::vector<int64_t> input_shape{1, static_cast<int64_t>(raw_input.size())};
+        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
+            mem_info,
+            const_cast<float*>(raw_input.data()),
+            raw_input.size(),
+            input_shape.data(), input_shape.size()
+        );
+
+        const char* input_names[]  = {"raw_input"};
+        const char* output_names[] = {"w_ff"};
+        auto outputs = session_->Run(Ort::RunOptions{nullptr}, input_names, &input_tensor, 1, output_names, 1);
+        return outputs[0].GetTensorData<float>()[0];
+    }
+
+    size_t feature_count() const { return feature_count_; }
+
+private:
+    Ort::Env env_;
+    Ort::SessionOptions session_opts_;
+    std::unique_ptr<Ort::Session> session_;
+    size_t feature_count_;
+};
+
+inline std::unique_ptr<FFNetONNXRunner> g_ff_runner_instance;
+inline void initialize_ff_runner(const std::string& model_path) {
+    g_ff_runner_instance = std::make_unique<FFNetONNXRunner>(model_path);
+}
+inline FFNetONNXRunner& get_ff_runner() {
+    if (!g_ff_runner_instance) throw std::runtime_error("FF runner not initialized.");
+    return *g_ff_runner_instance;
+}
+inline void finalize_ff_runner() {
+    g_ff_runner_instance.reset();  
+  }
+} // namespace ff_interface
diff --git a/Analysis/make_stackplots.py b/Analysis/make_stackplots.py
@@ -1,27 +1,18 @@
 import os
 
-era = "ERA_string"
-ver = "your_version"
-indir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/merged/"
-plotdir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/plots/"
-
-varnames = [
-    "tau1_pt",
-    "tau2_pt",
-    "b1_pt",
-    "b2_pt",
-    "tautau_m_vis",
-    "bb_m_vis",
-    "MT2",
-]  # "bbtautau_mass"
-
-channellist = ["eE", "eMu", "muMu", "eTau", "muTau", "tauTau"]
+era = "Run3_2022"
+ver = "v2510_2"
+indir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/"
+plotdir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/plots/"
+
+#varnames = ["tau1_pt", "tau2_pt", "tau1_eta", "tau2_eta", "tautau_m_vis"] #"bbtautau_mass"
+varnames = ["tautau_m_vis"] #"bbtautau_mass"
+
+channellist = ["tauTau"]
 
 cat = "inclusive"
 
-using_uncertainties = (
-    True  # When we turn on Up/Down, the file storage changes due to renameHists.py
-)
+using_uncertainties = False #When we turn on Up/Down, the file storage changes due to renameHists.py
 
 for var in varnames:
     for channel in channellist:
@@ -31,14 +22,8 @@
         outname = os.path.join(plotdir, f"HHbbtautau_{channel}_{var}_StackPlot.pdf")
 
         if not using_uncertainties:
-            os.system(
-                f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals"
-            )
+            os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals")
 
         else:
-            filename = os.path.join(
-                indir, var, "tmp", f"all_histograms_{var}_hadded.root"
-            )
-            os.system(
-                f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals"
-            )
+            filename = os.path.join(indir, var, 'tmp', f"all_histograms_{var}_hadded.root")
+            os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion SS_AntiIso")
diff --git a/Events.root b/Events.root
diff --git a/FLAF b/FLAF
diff --git a/config/Run3_2022EE/samples.yaml b/config/Run3_2022EE/samples.yaml
@@ -20,6 +20,7 @@ GLOBAL:
     # - keep ^MuonEG_.*
     # - keep ^Jet.*
     # - keep ^MET_.*
+    - keep ^QCD_*
     - keep ^GluGlutoHHto2B2Tau_.*
     - keep ^VBFHHto2B2Tau_.*
   use_stitching: false

diff --git a/config/global.yaml b/config/global.yaml
@@ -51,12 +51,12 @@ vars_to_plot:
   - tau1_pt
 
 channelSelection:
-  - eTau
-  - muTau
+  #- eTau
+  #- muTau
   - tauTau
-  - eE
-  - eMu
-  - muMu
+  #- eE
+  #- eMu
+  #- muMu
 
 met_type: "PuppiMET"
 deepTauVersion: 2p5
@@ -530,3 +530,5 @@ unc_to_not_consider_boosted:
   - bTagShapeSF_hfstats2
   - bTagShapeSF_cferr1
   - bTagShapeSF_cferr2
+
+ff_feature_order_json: Analysis/data/feature_order.json
diff --git a/config/plot/inputs.yaml b/config/plot/inputs.yaml
@@ -23,11 +23,11 @@
     - VV
   title: "VV"
   color: kOrange-4
-- name: W
-  types:
-    - W
-  title: "W #rightarrow l#nu + jets"
-  color: kOrange+1
+#- name: W
+#  types:
+#    - W
+#  title: "W #rightarrow l#nu + jets"
+#  color: kOrange+1
 - name: single_H
   title: "H"
   types:
@@ -39,12 +39,12 @@
 #   title: "Other"
 #   types: []
 #   color: kPink+2
-- name: GluGlutoHHto2B2Tau_kl_1p00_kt_1p00_c2_0p00
-  title: "GGtoHHto2B2Tau"
-  type: signal
-  types:
-    - HHnonRes
-  color: kCyan
+#- name: GluGlutoHHto2B2Tau_0p00_1p00_0p00
+#  title: "GGtoHHto2B2Tau"
+#  type: signal
+#  types:
+#    - HHnonRes
+#  color: kCyan
 # - name: GluGluToBulkGraviton_1250
 #   title: "GGBG-1250"
 #   type: signal
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		*.onnx filter=lfs diff=lfs merge=lfs -text
		*.onnx !text !filter !merge !diff
+37 −14		Analysis/HistMergerFromHists.py
+41 −29		Analysis/HistProducerFromNTuple.py
+42 −1		Analysis/HistTupleProducer.py
+302 −0		Analysis/QCD_estimation_MLFF.py