Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1 @@
*.onnx filter=lfs diff=lfs merge=lfs -text
*.onnx !text !filter !merge !diff
20 changes: 20 additions & 0 deletions Analysis/data/feature_order.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"feature_order": [
"pt",
"eta",
"mass",
"seedingJet_pt",
"seedingJet_eta",
"seedingJet_mass",
"decayMode_0",
"decayMode_1",
"decayMode_10",
"decayMode_11",
"btagPNetB",
"btagPNetCvB",
"btagPNetCvL",
"btagPNetCvNotB",
"btagPNetQvG"
],
"model_type": "moe_gate"
}
Binary file added Analysis/data/model.onnx
Binary file not shown.
93 changes: 92 additions & 1 deletion Analysis/hh_bbtautau.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import ROOT

import json
if __name__ == "__main__":
sys.path.append(os.environ["ANALYSIS_PATH"])

Expand Down Expand Up @@ -485,6 +485,93 @@ def defineLeptonPreselection(self): # needs channel def
"eleEta2016 && tau1_iso_medium && muon1_tightId && muon2_tightId && firstele_mvaIso",
)

def define_w_ff_columns(self):
if self.run_ffs and "tauTau" in self.config['channelSelection']:
print("Defining w_ff columns using ONNX runner ...")
# Load the exact feature order expected by ONNX
analysis_path = os.environ["ANALYSIS_PATH"]
feat_order_path = os.path.join(analysis_path, "Analysis/data/feature_order.json")

with open(feat_order_path, 'r') as f:
fo = json.load(f)
feature_order = fo['feature_order']
model_type = fo.get('model_type', 'single')

# Columns we know how to map directly from per tau branches
direct_cont = {
'pt', 'eta', 'mass',
'seedingJet_pt', 'seedingJet_eta', 'seedingJet_mass',
'btagPNetB', 'btagPNetCvB', 'btagPNetCvL', 'btagPNetCvNotB', 'btagPNetQvG'
}

# Build a C++ expression that creates a float vector in the ONNX feature_order
def make_tau_vec_expr(tau_prefix: str) -> str:
items = []
for feat in feature_order:
if feat in direct_cont:
items.append(f"{tau_prefix}_{feat}")
elif feat.startswith("decayMode_"):
dm_val = feat.split("_", 1)[1]
items.append(f"({tau_prefix}_decayMode == {dm_val} ? 1.f : 0.f)")
else:
items.append("0.f")
return "std::vector<float>{" + ", ".join(items) + "}"

# Per-tau raw feature vectors
tau1_feature_vector = make_tau_vec_expr("tau1")
tau2_feature_vector = make_tau_vec_expr("tau2")

tau1_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau1_feature_vector})"
tau2_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau2_feature_vector})"

self.df = self.df.Define("tau1_w_ff", tau1_expression)
self.df = self.df.Define("tau2_w_ff", tau2_expression)

# Iso / anti-iso flags
deepTau_medium_wp = Utilities.WorkingPointsTauVSjet.Medium.value
deepTau_vvloose_wp = Utilities.WorkingPointsTauVSjet.VVLoose.value
deepTau_vsjet_version = f"idDeepTau{self.deepTauYear()}v{self.deepTauVersion}VSjet"

self.df = self.df.Define("tau1_is_iso_ff", f"tau1_{deepTau_vsjet_version} >= {deepTau_medium_wp}")
self.df = self.df.Define("tau1_is_antiiso_ff", f"(tau1_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau1_{deepTau_vsjet_version} < {deepTau_medium_wp})")
self.df = self.df.Define("tau2_is_iso_ff", f"tau2_{deepTau_vsjet_version} >= {deepTau_medium_wp}")
self.df = self.df.Define("tau2_is_antiiso_ff", f"(tau2_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau2_{deepTau_vsjet_version} < {deepTau_medium_wp})")

# Combine per-event
self.df = self.df.Define("is_tau1_leading", "tau1_pt > tau2_pt")
self.df = self.df.Define("ff_lead", "is_tau1_leading ? tau1_w_ff : tau2_w_ff")
self.df = self.df.Define("ff_sublead", "is_tau1_leading ? tau2_w_ff : tau1_w_ff")
self.df = self.df.Define("iso_lead", "is_tau1_leading ? tau1_is_iso_ff : tau2_is_iso_ff")
self.df = self.df.Define("antiiso_lead", "is_tau1_leading ? tau1_is_antiiso_ff : tau2_is_antiiso_ff")
self.df = self.df.Define("iso_sublead", "is_tau1_leading ? tau2_is_iso_ff : tau1_is_iso_ff")
self.df = self.df.Define("antiiso_sublead", "is_tau1_leading ? tau2_is_antiiso_ff : tau1_is_antiiso_ff")

self.df = self.df.Define("ff_comb_weight", """
if (!tauTau) return 1.0f;
float weight = 0.f;
if (!OS) return 0.f;

// Case 1: Leading tau is anti-iso, subleading is iso
if (antiiso_lead && iso_sublead) {
weight += ff_lead;
}
// Case 2: Leading tau is iso, subleading is anti-iso
if (iso_lead && antiiso_sublead) {
weight += ff_sublead;
}
// Case 3: Both are anti-iso
if (antiiso_lead && antiiso_sublead) {
weight -= (ff_lead * ff_sublead);
}
return weight;
""")
else:
print("FF runner not active. Defining default w_ff columns.")
self.df = self.df.Define("tau1_w_ff", "1.0f")
self.df = self.df.Define("tau2_w_ff", "1.0f")
if "ff_comb_weight" not in self.df.GetColumnNames():
self.df = self.df.Define("ff_comb_weight", "1.0f")

def defineQCDRegions(self):
self.DefineAndAppend("OS", "tau1_charge*tau2_charge < 0")
self.DefineAndAppend("SS", "!OS")
Expand Down Expand Up @@ -548,6 +635,8 @@ def __init__(
whichType=3,
wantScales=True,
colToSave=[],
run_ffs=False

):
super(DataFrameBuilderForHistograms, self).__init__(df)
self.deepTauVersion = config["deepTauVersion"]
Expand All @@ -564,6 +653,7 @@ def __init__(
self.wantTriggerSFErrors = wantTriggerSFErrors
self.wantScales = isCentral and wantScales
self.colToSave = colToSave
self.run_ffs = run_ffs


def PrepareDfForDNN(dfForHistograms):
Expand All @@ -589,6 +679,7 @@ def PrepareDfForHistograms(dfForHistograms):
dfForHistograms.defineCRs()
dfForHistograms.defineCategories()
dfForHistograms.defineQCDRegions()
dfForHistograms.define_w_ff_columns()
return dfForHistograms


Expand Down
13 changes: 13 additions & 0 deletions Analysis/histTupleDef.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def GetDfw(
if global_params["process_name"] == "DY":
datasetType = 2
kwargset["whichType"] = datasetType
kwargset["run_ffs"] = global_params.get("run_ffs", False)
dfw = analysis.DataFrameBuilderForHistograms(df, global_params, period, **kwargset)

if df_caches:
Expand Down Expand Up @@ -105,3 +106,15 @@ def DefineWeightForHistograms(
):
weight_name = unc_cfg_dict[uncName]["expression"].format(scale=uncScale)
dfw.df = dfw.df.Define(final_weight_name, weight_name)

# fake-factor “shape” weight, only for central
if isCentral:
cols = set(map(str, dfw.df.GetColumnNames()))
if "ff_comb_weight" in cols:
if process_group == "data":
# data: the ML weight is just the FF comb weight
dfw.df = dfw.df.Define("weight_MLshape_Central", "ff_comb_weight")
else:
# MC: multiply the already defined final_weight by the FF comb weight
dfw.df = dfw.df.Define("weight_MLshape_Central", "final_weight * ff_comb_weight")
dfw.colToSave.append("weight_MLshape_Central")
86 changes: 86 additions & 0 deletions Analysis/include/FFNetONNX.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#pragma once
#include "/cvmfs/sft.cern.ch/lcg/views/LCG_107/x86_64-el9-gcc11-opt/include/onnxruntime/onnxruntime_cxx_api.h"

#include <memory>
#include <vector>
#include <string>
#include <stdexcept>
#include <fstream>
#include <iostream>

namespace ff_interface {

class FFNetONNXRunner {
public:
explicit FFNetONNXRunner(const std::string& model_path)
: env_(ORT_LOGGING_LEVEL_WARNING, "FFNetInference"),
session_opts_(),
feature_count_(0) {

std::ifstream f(model_path.c_str());
if (!f.good()) throw std::runtime_error("Could not find ONNX model file: " + model_path);

session_opts_.SetIntraOpNumThreads(1);
session_ = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_opts_);

// Input shape to learn expected feature length (batch, F)
Ort::AllocatorWithDefaultOptions alloc;
auto type_info = session_->GetInputTypeInfo(0);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
auto shape = tensor_info.GetShape();
if (shape.size() != 2) {
throw std::runtime_error("Model input is not rank-2 [batch, features].");
}
if (shape[1] <= 0) {
throw std::runtime_error("Model feature dimension is dynamic/unknown. Export should fix it.");
}
feature_count_ = static_cast<size_t>(shape[1]);
std::cout << "[FFNetONNXRunner] Loaded " << model_path
<< " expecting feature_count=" << feature_count_ << std::endl;
}

// pass the raw vector in the exact feature_order
float compute_w_ff(const std::vector<float>& raw_input) const {
if (raw_input.size() != feature_count_) {
throw std::runtime_error(
"[FFNetONNXRunner] raw_input.size()=" + std::to_string(raw_input.size()) +
" does not match model feature_count=" + std::to_string(feature_count_) +
". Build the vector in the saved feature_order.json."
);
}
Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
std::vector<int64_t> input_shape{1, static_cast<int64_t>(raw_input.size())};
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
mem_info,
const_cast<float*>(raw_input.data()),
raw_input.size(),
input_shape.data(), input_shape.size()
);

const char* input_names[] = {"raw_input"};
const char* output_names[] = {"w_ff"};
auto outputs = session_->Run(Ort::RunOptions{nullptr}, input_names, &input_tensor, 1, output_names, 1);
return outputs[0].GetTensorData<float>()[0];
}

size_t feature_count() const { return feature_count_; }

private:
Ort::Env env_;
Ort::SessionOptions session_opts_;
std::unique_ptr<Ort::Session> session_;
size_t feature_count_;
};

inline std::unique_ptr<FFNetONNXRunner> g_ff_runner_instance;
inline void initialize_ff_runner(const std::string& model_path) {
g_ff_runner_instance = std::make_unique<FFNetONNXRunner>(model_path);
}
inline FFNetONNXRunner& get_ff_runner() {
if (!g_ff_runner_instance) throw std::runtime_error("FF runner not initialized.");
return *g_ff_runner_instance;
}
inline void finalize_ff_runner() {
g_ff_runner_instance.reset();
}
} // namespace ff_interface
41 changes: 13 additions & 28 deletions Analysis/make_stackplots.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,18 @@
import os

era = "ERA_string"
ver = "your_version"
indir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/merged/"
plotdir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/plots/"

varnames = [
"tau1_pt",
"tau2_pt",
"b1_pt",
"b2_pt",
"tautau_m_vis",
"bb_m_vis",
"MT2",
] # "bbtautau_mass"

channellist = ["eE", "eMu", "muMu", "eTau", "muTau", "tauTau"]
era = "Run3_2022"
ver = "v2510_2"
indir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/"
plotdir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/plots/"

#varnames = ["tau1_pt", "tau2_pt", "tau1_eta", "tau2_eta", "tautau_m_vis"] #"bbtautau_mass"
varnames = ["tautau_m_vis"] #"bbtautau_mass"

channellist = ["tauTau"]

cat = "inclusive"

using_uncertainties = (
True # When we turn on Up/Down, the file storage changes due to renameHists.py
)
using_uncertainties = False #When we turn on Up/Down, the file storage changes due to renameHists.py

for var in varnames:
for channel in channellist:
Expand All @@ -31,14 +22,8 @@
outname = os.path.join(plotdir, f"HHbbtautau_{channel}_{var}_StackPlot.pdf")

if not using_uncertainties:
os.system(
f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals"
)
os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals")

else:
filename = os.path.join(
indir, var, "tmp", f"all_histograms_{var}_hadded.root"
)
os.system(
f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals"
)
filename = os.path.join(indir, var, 'tmp', f"all_histograms_{var}_hadded.root")
os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion SS_AntiIso")
Binary file added Events.root
Binary file not shown.
1 change: 1 addition & 0 deletions config/Run3_2022EE/samples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ GLOBAL:
# - keep ^MuonEG_.*
# - keep ^Jet.*
# - keep ^MET_.*
- keep ^QCD_*
- keep ^GluGlutoHHto2B2Tau_.*
- keep ^VBFHHto2B2Tau_.*
use_stitching: false
Expand Down
12 changes: 7 additions & 5 deletions config/global.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ vars_to_plot:
- tau1_pt

channelSelection:
- eTau
- muTau
#- eTau
#- muTau
- tauTau
- eE
- eMu
- muMu
#- eE
#- eMu
#- muMu

met_type: "PuppiMET"
deepTauVersion: 2p5
Expand Down Expand Up @@ -530,3 +530,5 @@ unc_to_not_consider_boosted:
- bTagShapeSF_hfstats2
- bTagShapeSF_cferr1
- bTagShapeSF_cferr2

ff_feature_order_json: Analysis/data/feature_order.json
22 changes: 11 additions & 11 deletions config/plot/inputs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
- VV
title: "VV"
color: kOrange-4
- name: W
types:
- W
title: "W #rightarrow l#nu + jets"
color: kOrange+1
#- name: W
# types:
# - W
# title: "W #rightarrow l#nu + jets"
# color: kOrange+1
- name: single_H
title: "H"
types:
Expand All @@ -39,12 +39,12 @@
# title: "Other"
# types: []
# color: kPink+2
- name: GluGlutoHHto2B2Tau_kl_1p00_kt_1p00_c2_0p00
title: "GGtoHHto2B2Tau"
type: signal
types:
- HHnonRes
color: kCyan
#- name: GluGlutoHHto2B2Tau_0p00_1p00_0p00
# title: "GGtoHHto2B2Tau"
# type: signal
# types:
# - HHnonRes
# color: kCyan
# - name: GluGluToBulkGraviton_1250
# title: "GGBG-1250"
# type: signal
Expand Down
Loading