diff --git a/.gitattributes b/.gitattributes index 0bb75f7..ab51a50 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -*.onnx filter=lfs diff=lfs merge=lfs -text +*.onnx !text !filter !merge !diff diff --git a/Analysis/data/feature_order.json b/Analysis/data/feature_order.json new file mode 100644 index 0000000..4b547d3 --- /dev/null +++ b/Analysis/data/feature_order.json @@ -0,0 +1,20 @@ +{ + "feature_order": [ + "pt", + "eta", + "mass", + "seedingJet_pt", + "seedingJet_eta", + "seedingJet_mass", + "decayMode_0", + "decayMode_1", + "decayMode_10", + "decayMode_11", + "btagPNetB", + "btagPNetCvB", + "btagPNetCvL", + "btagPNetCvNotB", + "btagPNetQvG" + ], + "model_type": "moe_gate" +} \ No newline at end of file diff --git a/Analysis/data/model.onnx b/Analysis/data/model.onnx new file mode 100644 index 0000000..354d21e Binary files /dev/null and b/Analysis/data/model.onnx differ diff --git a/Analysis/hh_bbtautau.py b/Analysis/hh_bbtautau.py index 25ade90..96a9830 100644 --- a/Analysis/hh_bbtautau.py +++ b/Analysis/hh_bbtautau.py @@ -1,5 +1,5 @@ import ROOT - +import json if __name__ == "__main__": sys.path.append(os.environ["ANALYSIS_PATH"]) @@ -485,6 +485,93 @@ def defineLeptonPreselection(self): # needs channel def "eleEta2016 && tau1_iso_medium && muon1_tightId && muon2_tightId && firstele_mvaIso", ) + def define_w_ff_columns(self): + if self.run_ffs and "tauTau" in self.config['channelSelection']: + print("Defining w_ff columns using ONNX runner ...") + # Load the exact feature order expected by ONNX + analysis_path = os.environ["ANALYSIS_PATH"] + feat_order_path = os.path.join(analysis_path, "Analysis/data/feature_order.json") + + with open(feat_order_path, 'r') as f: + fo = json.load(f) + feature_order = fo['feature_order'] + model_type = fo.get('model_type', 'single') + + # Columns we know how to map directly from per tau branches + direct_cont = { + 'pt', 'eta', 'mass', + 'seedingJet_pt', 'seedingJet_eta', 'seedingJet_mass', + 'btagPNetB', 'btagPNetCvB', 'btagPNetCvL', 'btagPNetCvNotB', 'btagPNetQvG' + } + + # Build a C++ expression that creates a float vector in the ONNX feature_order + def make_tau_vec_expr(tau_prefix: str) -> str: + items = [] + for feat in feature_order: + if feat in direct_cont: + items.append(f"{tau_prefix}_{feat}") + elif feat.startswith("decayMode_"): + dm_val = feat.split("_", 1)[1] + items.append(f"({tau_prefix}_decayMode == {dm_val} ? 1.f : 0.f)") + else: + items.append("0.f") + return "std::vector{" + ", ".join(items) + "}" + + # Per-tau raw feature vectors + tau1_feature_vector = make_tau_vec_expr("tau1") + tau2_feature_vector = make_tau_vec_expr("tau2") + + tau1_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau1_feature_vector})" + tau2_expression = f"ff_interface::get_ff_runner().compute_w_ff({tau2_feature_vector})" + + self.df = self.df.Define("tau1_w_ff", tau1_expression) + self.df = self.df.Define("tau2_w_ff", tau2_expression) + + # Iso / anti-iso flags + deepTau_medium_wp = Utilities.WorkingPointsTauVSjet.Medium.value + deepTau_vvloose_wp = Utilities.WorkingPointsTauVSjet.VVLoose.value + deepTau_vsjet_version = f"idDeepTau{self.deepTauYear()}v{self.deepTauVersion}VSjet" + + self.df = self.df.Define("tau1_is_iso_ff", f"tau1_{deepTau_vsjet_version} >= {deepTau_medium_wp}") + self.df = self.df.Define("tau1_is_antiiso_ff", f"(tau1_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau1_{deepTau_vsjet_version} < {deepTau_medium_wp})") + self.df = self.df.Define("tau2_is_iso_ff", f"tau2_{deepTau_vsjet_version} >= {deepTau_medium_wp}") + self.df = self.df.Define("tau2_is_antiiso_ff", f"(tau2_{deepTau_vsjet_version} >= {deepTau_vvloose_wp} && tau2_{deepTau_vsjet_version} < {deepTau_medium_wp})") + + # Combine per-event + self.df = self.df.Define("is_tau1_leading", "tau1_pt > tau2_pt") + self.df = self.df.Define("ff_lead", "is_tau1_leading ? tau1_w_ff : tau2_w_ff") + self.df = self.df.Define("ff_sublead", "is_tau1_leading ? tau2_w_ff : tau1_w_ff") + self.df = self.df.Define("iso_lead", "is_tau1_leading ? tau1_is_iso_ff : tau2_is_iso_ff") + self.df = self.df.Define("antiiso_lead", "is_tau1_leading ? tau1_is_antiiso_ff : tau2_is_antiiso_ff") + self.df = self.df.Define("iso_sublead", "is_tau1_leading ? tau2_is_iso_ff : tau1_is_iso_ff") + self.df = self.df.Define("antiiso_sublead", "is_tau1_leading ? tau2_is_antiiso_ff : tau1_is_antiiso_ff") + + self.df = self.df.Define("ff_comb_weight", """ + if (!tauTau) return 1.0f; + float weight = 0.f; + if (!OS) return 0.f; + + // Case 1: Leading tau is anti-iso, subleading is iso + if (antiiso_lead && iso_sublead) { + weight += ff_lead; + } + // Case 2: Leading tau is iso, subleading is anti-iso + if (iso_lead && antiiso_sublead) { + weight += ff_sublead; + } + // Case 3: Both are anti-iso + if (antiiso_lead && antiiso_sublead) { + weight -= (ff_lead * ff_sublead); + } + return weight; + """) + else: + print("FF runner not active. Defining default w_ff columns.") + self.df = self.df.Define("tau1_w_ff", "1.0f") + self.df = self.df.Define("tau2_w_ff", "1.0f") + if "ff_comb_weight" not in self.df.GetColumnNames(): + self.df = self.df.Define("ff_comb_weight", "1.0f") + def defineQCDRegions(self): self.DefineAndAppend("OS", "tau1_charge*tau2_charge < 0") self.DefineAndAppend("SS", "!OS") @@ -548,6 +635,8 @@ def __init__( whichType=3, wantScales=True, colToSave=[], + run_ffs=False + ): super(DataFrameBuilderForHistograms, self).__init__(df) self.deepTauVersion = config["deepTauVersion"] @@ -564,6 +653,7 @@ def __init__( self.wantTriggerSFErrors = wantTriggerSFErrors self.wantScales = isCentral and wantScales self.colToSave = colToSave + self.run_ffs = run_ffs def PrepareDfForDNN(dfForHistograms): @@ -589,6 +679,7 @@ def PrepareDfForHistograms(dfForHistograms): dfForHistograms.defineCRs() dfForHistograms.defineCategories() dfForHistograms.defineQCDRegions() + dfForHistograms.define_w_ff_columns() return dfForHistograms diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py index 2ae066d..dc07ea7 100644 --- a/Analysis/histTupleDef.py +++ b/Analysis/histTupleDef.py @@ -59,6 +59,7 @@ def GetDfw( if global_params["process_name"] == "DY": datasetType = 2 kwargset["whichType"] = datasetType + kwargset["run_ffs"] = global_params.get("run_ffs", False) dfw = analysis.DataFrameBuilderForHistograms(df, global_params, period, **kwargset) if df_caches: @@ -105,3 +106,15 @@ def DefineWeightForHistograms( ): weight_name = unc_cfg_dict[uncName]["expression"].format(scale=uncScale) dfw.df = dfw.df.Define(final_weight_name, weight_name) + + # fake-factor “shape” weight, only for central + if isCentral: + cols = set(map(str, dfw.df.GetColumnNames())) + if "ff_comb_weight" in cols: + if process_group == "data": + # data: the ML weight is just the FF comb weight + dfw.df = dfw.df.Define("weight_MLshape_Central", "ff_comb_weight") + else: + # MC: multiply the already defined final_weight by the FF comb weight + dfw.df = dfw.df.Define("weight_MLshape_Central", "final_weight * ff_comb_weight") + dfw.colToSave.append("weight_MLshape_Central") \ No newline at end of file diff --git a/Analysis/include/FFNetONNX.h b/Analysis/include/FFNetONNX.h new file mode 100644 index 0000000..c83a0f8 --- /dev/null +++ b/Analysis/include/FFNetONNX.h @@ -0,0 +1,86 @@ +#pragma once +#include "/cvmfs/sft.cern.ch/lcg/views/LCG_107/x86_64-el9-gcc11-opt/include/onnxruntime/onnxruntime_cxx_api.h" + +#include +#include +#include +#include +#include +#include + +namespace ff_interface { + +class FFNetONNXRunner { +public: + explicit FFNetONNXRunner(const std::string& model_path) + : env_(ORT_LOGGING_LEVEL_WARNING, "FFNetInference"), + session_opts_(), + feature_count_(0) { + + std::ifstream f(model_path.c_str()); + if (!f.good()) throw std::runtime_error("Could not find ONNX model file: " + model_path); + + session_opts_.SetIntraOpNumThreads(1); + session_ = std::make_unique(env_, model_path.c_str(), session_opts_); + + // Input shape to learn expected feature length (batch, F) + Ort::AllocatorWithDefaultOptions alloc; + auto type_info = session_->GetInputTypeInfo(0); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + auto shape = tensor_info.GetShape(); + if (shape.size() != 2) { + throw std::runtime_error("Model input is not rank-2 [batch, features]."); + } + if (shape[1] <= 0) { + throw std::runtime_error("Model feature dimension is dynamic/unknown. Export should fix it."); + } + feature_count_ = static_cast(shape[1]); + std::cout << "[FFNetONNXRunner] Loaded " << model_path + << " expecting feature_count=" << feature_count_ << std::endl; + } + + // pass the raw vector in the exact feature_order + float compute_w_ff(const std::vector& raw_input) const { + if (raw_input.size() != feature_count_) { + throw std::runtime_error( + "[FFNetONNXRunner] raw_input.size()=" + std::to_string(raw_input.size()) + + " does not match model feature_count=" + std::to_string(feature_count_) + + ". Build the vector in the saved feature_order.json." + ); + } + Ort::MemoryInfo mem_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + std::vector input_shape{1, static_cast(raw_input.size())}; + Ort::Value input_tensor = Ort::Value::CreateTensor( + mem_info, + const_cast(raw_input.data()), + raw_input.size(), + input_shape.data(), input_shape.size() + ); + + const char* input_names[] = {"raw_input"}; + const char* output_names[] = {"w_ff"}; + auto outputs = session_->Run(Ort::RunOptions{nullptr}, input_names, &input_tensor, 1, output_names, 1); + return outputs[0].GetTensorData()[0]; + } + + size_t feature_count() const { return feature_count_; } + +private: + Ort::Env env_; + Ort::SessionOptions session_opts_; + std::unique_ptr session_; + size_t feature_count_; +}; + +inline std::unique_ptr g_ff_runner_instance; +inline void initialize_ff_runner(const std::string& model_path) { + g_ff_runner_instance = std::make_unique(model_path); +} +inline FFNetONNXRunner& get_ff_runner() { + if (!g_ff_runner_instance) throw std::runtime_error("FF runner not initialized."); + return *g_ff_runner_instance; +} +inline void finalize_ff_runner() { + g_ff_runner_instance.reset(); + } +} // namespace ff_interface \ No newline at end of file diff --git a/Analysis/make_stackplots.py b/Analysis/make_stackplots.py index e55a250..4f9990b 100644 --- a/Analysis/make_stackplots.py +++ b/Analysis/make_stackplots.py @@ -1,27 +1,18 @@ import os -era = "ERA_string" -ver = "your_version" -indir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/merged/" -plotdir = f"/eos/user/u/username/HH_bbtautau_Run3/histograms/{ver}/{era}/plots/" - -varnames = [ - "tau1_pt", - "tau2_pt", - "b1_pt", - "b2_pt", - "tautau_m_vis", - "bb_m_vis", - "MT2", -] # "bbtautau_mass" - -channellist = ["eE", "eMu", "muMu", "eTau", "muTau", "tauTau"] +era = "Run3_2022" +ver = "v2510_2" +indir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/" +plotdir = f"/eos/user/p/prsolank/HH_bbtautau_resonant_Run3/merged_hists/{ver}/{era}/plots/" + +#varnames = ["tau1_pt", "tau2_pt", "tau1_eta", "tau2_eta", "tautau_m_vis"] #"bbtautau_mass" +varnames = ["tautau_m_vis"] #"bbtautau_mass" + +channellist = ["tauTau"] cat = "inclusive" -using_uncertainties = ( - True # When we turn on Up/Down, the file storage changes due to renameHists.py -) +using_uncertainties = False #When we turn on Up/Down, the file storage changes due to renameHists.py for var in varnames: for channel in channellist: @@ -31,14 +22,8 @@ outname = os.path.join(plotdir, f"HHbbtautau_{channel}_{var}_StackPlot.pdf") if not using_uncertainties: - os.system( - f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals" - ) + os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals") else: - filename = os.path.join( - indir, var, "tmp", f"all_histograms_{var}_hadded.root" - ) - os.system( - f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD False --rebin False --analysis HH_bbtautau --qcdregion OS_Iso --sigConfig ../config/{era}/samples.yaml --wantSignals" - ) + filename = os.path.join(indir, var, 'tmp', f"all_histograms_{var}_hadded.root") + os.system(f"python3 ../FLAF/Analysis/HistPlotter.py --inFile {filename} --bckgConfig ../config/background_samples.yaml --globalConfig ../config/global.yaml --outFile {outname} --var {var} --category {cat} --channel {channel} --uncSource Central --wantData --year {era} --wantQCD True --rebin False --analysis HH_bbtautau --qcdregion SS_AntiIso") diff --git a/Events.root b/Events.root new file mode 100644 index 0000000..08a58ef Binary files /dev/null and b/Events.root differ diff --git a/FLAF b/FLAF index 8f49b08..ca2df74 160000 --- a/FLAF +++ b/FLAF @@ -1 +1 @@ -Subproject commit 8f49b08ae4e43f623cb6a0dce595afa510d61014 +Subproject commit ca2df7476e55ecfd98cdf3612daf02da416c7196 diff --git a/config/Run3_2022EE/samples.yaml b/config/Run3_2022EE/samples.yaml index 64e0a10..55a34e0 100644 --- a/config/Run3_2022EE/samples.yaml +++ b/config/Run3_2022EE/samples.yaml @@ -20,6 +20,7 @@ GLOBAL: # - keep ^MuonEG_.* # - keep ^Jet.* # - keep ^MET_.* + - keep ^QCD_* - keep ^GluGlutoHHto2B2Tau_.* - keep ^VBFHHto2B2Tau_.* use_stitching: false diff --git a/config/global.yaml b/config/global.yaml index 585d039..711cf0e 100644 --- a/config/global.yaml +++ b/config/global.yaml @@ -51,12 +51,12 @@ vars_to_plot: - tau1_pt channelSelection: - - eTau - - muTau + #- eTau + #- muTau - tauTau - - eE - - eMu - - muMu + #- eE + #- eMu + #- muMu met_type: "PuppiMET" deepTauVersion: 2p5 @@ -530,3 +530,5 @@ unc_to_not_consider_boosted: - bTagShapeSF_hfstats2 - bTagShapeSF_cferr1 - bTagShapeSF_cferr2 + +ff_feature_order_json: Analysis/data/feature_order.json \ No newline at end of file diff --git a/config/plot/inputs.yaml b/config/plot/inputs.yaml index 784802b..e26d47b 100644 --- a/config/plot/inputs.yaml +++ b/config/plot/inputs.yaml @@ -23,11 +23,11 @@ - VV title: "VV" color: kOrange-4 -- name: W - types: - - W - title: "W #rightarrow l#nu + jets" - color: kOrange+1 +#- name: W +# types: +# - W +# title: "W #rightarrow l#nu + jets" +# color: kOrange+1 - name: single_H title: "H" types: @@ -39,12 +39,12 @@ # title: "Other" # types: [] # color: kPink+2 -- name: GluGlutoHHto2B2Tau_kl_1p00_kt_1p00_c2_0p00 - title: "GGtoHHto2B2Tau" - type: signal - types: - - HHnonRes - color: kCyan +#- name: GluGlutoHHto2B2Tau_0p00_1p00_0p00 +# title: "GGtoHHto2B2Tau" +# type: signal +# types: +# - HHnonRes +# color: kCyan # - name: GluGluToBulkGraviton_1250 # title: "GGBG-1250" # type: signal