From 98298d031c3bb71a5bf1949eca5bb93e692f5436 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Thu, 29 Jan 2026 18:59:46 +0100 Subject: [PATCH 01/11] start new branch --- Corrections | 2 +- DeepHME | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Corrections b/Corrections index c8a2b00..a50b274 160000 --- a/Corrections +++ b/Corrections @@ -1 +1 @@ -Subproject commit c8a2b00c265e5310d174d8249d0f03059b87cc5c +Subproject commit a50b27449e6ec4056e148d05f9bf97a19edec65c diff --git a/DeepHME b/DeepHME index 13f95ac..8371521 160000 --- a/DeepHME +++ b/DeepHME @@ -1 +1 @@ -Subproject commit 13f95accfbeca5c177d3a74bb121c33df316c961 +Subproject commit 8371521797f68ee4a547741729ec5bffc9cb3633 From 10bbf9936af8ee605589e157f75a1a7334c2a934 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Thu, 29 Jan 2026 19:11:23 +0100 Subject: [PATCH 02/11] update DeepHME --- DeepHME | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DeepHME b/DeepHME index 8371521..13f95ac 160000 --- a/DeepHME +++ b/DeepHME @@ -1 +1 @@ -Subproject commit 8371521797f68ee4a547741729ec5bffc9cb3633 +Subproject commit 13f95accfbeca5c177d3a74bb121c33df316c961 From 9d88184af247fe563176455e9cef3d0aa3697c82 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 30 Jan 2026 01:04:43 +0100 Subject: [PATCH 03/11] add everything in HH_bbWW --- Analysis/hh_bbww.py | 7 +++++-- Analysis/histTupleDef.py | 3 ++- config/global.yaml | 27 +++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/Analysis/hh_bbww.py b/Analysis/hh_bbww.py index 616fd18..540cdb4 100644 --- a/Analysis/hh_bbww.py +++ b/Analysis/hh_bbww.py @@ -77,14 +77,17 @@ def GetBTagWeight(global_cfg_dict, cat, applyBtag=False): btagshape_weight = "weight_bTagShape_Central" return f"{btag_weight}*{btagshape_weight}" - -def GetWeight(channel, cat, boosted_categories): # do you need all these args? +def GetWeight( + channel, cat, boosted_categories, apply_btag_shape_weights=False +): # do you need all these args? # weights_to_apply = ["weight_MC_Lumi_pu", "ExtraDYWeight"] weights_to_apply = ["weight_MC_Lumi_pu"] total_weight = "*".join(weights_to_apply) for lep_index in [1, 2]: total_weight = f"{total_weight} * {GetLepWeight(lep_index)}" total_weight = f"{total_weight} * {GetTriggerWeight()}" + if apply_btag_shape_weights: + total_weight = f"{total_weight} * weight_bTagShape_Central" return total_weight diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py index 02aff39..05fa3fa 100644 --- a/Analysis/histTupleDef.py +++ b/Analysis/histTupleDef.py @@ -90,9 +90,10 @@ def DefineWeightForHistograms( categories = global_params["categories"] boosted_categories = global_params.get("boosted_categories", []) process_group = global_params["process_group"] + apply_btag_shape_weights = global_params.get("correct_btagShape_weights", False) total_weight_expression = ( # channel, cat, boosted_categories --> these are not needed in the GetWeight function therefore I just put some placeholders - analysis.GetWeight("", "", boosted_categories) + analysis.GetWeight("", "", boosted_categories, apply_btag_shape_weights=apply_btag_shape_weights) if process_group != "data" else "1" ) # are we sure? diff --git a/config/global.yaml b/config/global.yaml index d05cd7e..93ba5d6 100644 --- a/config/global.yaml +++ b/config/global.yaml @@ -50,6 +50,8 @@ muIsoWP: "Loose" treeName: "Events" +correct_btagShape_weights: True + nPbPerFile: 2_000 # 2fb-1 per split data file nEventsPerFile: 100_000 # 100k events per MC file @@ -249,6 +251,31 @@ payload_producers: awkward_based: True uproot_stepsize: '50MB' dependencies: + BtagShape: + producers_module_name: BtagShapeProducer + producer_name: BtagShapeProducer + save_as: json + columns : [] + lepton_categories: + - e + - mu + - eE + - eMu + - muMu + jet_multiplicities: + - 2 + - 3 + - 4 + - 5 + - 6 + - 7 + - 8 + n_cpus: 4 + max_runtime: 8.0 + cmssw_env: False + awkward_based: True + uproot_stepsize: '50MB' + dependencies: region: All region_default: SR From 8a443ba871a42f8a0ee07063cfdb9f45ded9d405 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 30 Jan 2026 21:25:57 +0100 Subject: [PATCH 04/11] improve lepton category descriptions in the config and add flag controlling application of btag shape weights in DefineWeightForHistograms --- Analysis/histTupleDef.py | 3 ++- config/global.yaml | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py index 05fa3fa..3c42814 100644 --- a/Analysis/histTupleDef.py +++ b/Analysis/histTupleDef.py @@ -55,6 +55,7 @@ def DefineWeightForHistograms( global_params, final_weight_name, df_is_central, + btag_shape_was_corrected=False, ): global central_df_weights_computed is_central = uncName == central @@ -90,7 +91,7 @@ def DefineWeightForHistograms( categories = global_params["categories"] boosted_categories = global_params.get("boosted_categories", []) process_group = global_params["process_group"] - apply_btag_shape_weights = global_params.get("correct_btagShape_weights", False) + apply_btag_shape_weights = global_params.get("correct_btagShape_weights", False) if btag_shape_was_corrected else False total_weight_expression = ( # channel, cat, boosted_categories --> these are not needed in the GetWeight function therefore I just put some placeholders analysis.GetWeight("", "", boosted_categories, apply_btag_shape_weights=apply_btag_shape_weights) diff --git a/config/global.yaml b/config/global.yaml index 2824a8f..4b4708f 100644 --- a/config/global.yaml +++ b/config/global.yaml @@ -264,11 +264,11 @@ payload_producers: save_as: json columns : [] lepton_categories: - - e - - mu - - eE - - eMu - - muMu + e: 1 + mu: 2 + eE: 11 + eMu: 12 + muMu: 22 jet_multiplicities: - 2 - 3 From 44016e5ebca319d0eacc61aafe10d345a35257ce Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Fri, 30 Jan 2026 21:26:20 +0100 Subject: [PATCH 05/11] add FLAF --- FLAF | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FLAF b/FLAF index bbe942f..ccb9f6a 160000 --- a/FLAF +++ b/FLAF @@ -1 +1 @@ -Subproject commit bbe942f4ef22d5ea37dfe7ea469b03c1bf304991 +Subproject commit ccb9f6af20d1ee8b42cb9eaac3db78ed25922d31 From 85eec714b129e7211873717abc98cd9b16460006 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Sat, 31 Jan 2026 00:20:05 +0100 Subject: [PATCH 06/11] implement correction of btag shape weights --- Analysis/histTupleDef.py | 89 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py index 3c42814..6cb95c4 100644 --- a/Analysis/histTupleDef.py +++ b/Analysis/histTupleDef.py @@ -42,7 +42,79 @@ def GetDfw(df, setup, dataset_name): central_df_weights_computed = False +btag_shape_weight_corrected = False +cat_to_channelId = {"e": 1, "mu": 2, "eE": 11, "eMu": 12, "muMu": 22} + + +class BtagShapeWeightCorrector: + def __init__(self, btag_integral_ratios): + self.exisiting_srcScale_combs = [key for key in btag_integral_ratios.keys()] + # if the btag_integral_ratios dictionary is not empty, do stuff + if self.exisiting_srcScale_combs: + ROOT.gInterpreter.Declare("#include ") + + for key in btag_integral_ratios.keys(): + # key in btag_integral_ratios has form f"{source}_{scale}", so function expects that + # and creates a map and function to rescale btag weights for each f"{source}_{scale}" value + self._declare_cpp_map_and_resc_func(btag_integral_ratios, key) + + def _declare_cpp_map_and_resc_func(self, btag_integral_ratios, unc_src_scale): + correction_factors = btag_integral_ratios[unc_src_scale] + + # init c++ map + cpp_map_entries = [] + for cat, multipl_dict in correction_factors.items(): + channelId = cat_to_channelId[cat] + for key, ratio in multipl_dict.items(): + # key has structure f"ratio_ncetnralJet_{number}"" + num_jet = int(key.split("_")[-1]) + cpp_map_entries.append(f"{{{{{channelId}, {num_jet}}}, {ratio}}}") + cpp_init = ", ".join(cpp_map_entries) + + ROOT.gInterpreter.Declare( + f""" + static const std::map, float> ratios_{unc_src_scale} = {{ + {cpp_init} + }}; + + float integral_correction_ratio_{unc_src_scale}(int ncentralJet, int channelId) {{ + std::pair key{{channelId, ncentralJet}}; + try + {{ + float ratio = ratios_{unc_src_scale}.at(key); + return ratio; + }} + catch (...) + {{ + return 1.0f; + }} + }}""" + ) + + def UpdateBtagWeight(self, dfw, unc_src="Central", unc_scale=None): + # return original dfw if empty dict was passed to constructor + if not self.exisiting_srcScale_combs: + return dfw + + if unc_scale is None: + unc_src_scale = unc_src + else: + unc_src_scale = f"{unc_src}_{unc_scale}" + + if unc_src_scale not in self.exisiting_srcScale_combs: + raise RuntimeError( + f"`BtagShapeWeightCorrection.json` does not contain key `{unc_src_scale}`." + ) + + dfw.df = dfw.df.Redefine( + "weight_bTagShape_Central", + f"""if (ncentralJet >= 2 && ncentralJet <= 8) + return integral_correction_ratio_{unc_src_scale}(ncentralJet, channelId)*weight_bTagShape_Central; + return weight_bTagShape_Central;""", + ) + + return dfw def DefineWeightForHistograms( *, @@ -55,7 +127,7 @@ def DefineWeightForHistograms( global_params, final_weight_name, df_is_central, - btag_shape_was_corrected=False, + btag_integral_ratios, ): global central_df_weights_computed is_central = uncName == central @@ -88,13 +160,24 @@ def DefineWeightForHistograms( if df_is_central: central_df_weights_computed = True + # btag shape weight column appears here + correct_btagShape_weights = global_params.get("correct_btagShape_weights", False) + global btag_shape_weight_corrected + if correct_btagShape_weights and not btag_shape_weight_corrected and btag_integral_ratios: + isMC = not isData + if is_central and isMC: + weight_corrector = BtagShapeWeightCorrector(btag_integral_ratios) + print(f"Calling weight_corrector.UpdateBtagWeight for unc_source={uncName} unc_scale={uncScale}") + weight_corrector.UpdateBtagWeight(dfw, unc_src=uncName) + btag_shape_weight_corrected = True + categories = global_params["categories"] boosted_categories = global_params.get("boosted_categories", []) process_group = global_params["process_group"] - apply_btag_shape_weights = global_params.get("correct_btagShape_weights", False) if btag_shape_was_corrected else False total_weight_expression = ( # channel, cat, boosted_categories --> these are not needed in the GetWeight function therefore I just put some placeholders - analysis.GetWeight("", "", boosted_categories, apply_btag_shape_weights=apply_btag_shape_weights) + # if btag shape weight was corrected => must be applied, else no + analysis.GetWeight("", "", boosted_categories, apply_btag_shape_weights=btag_shape_weight_corrected) if process_group != "data" else "1" ) # are we sure? From 206ec47d44d21879b1778604213de07a68197906 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Sun, 1 Feb 2026 15:48:23 +0100 Subject: [PATCH 07/11] update FLAF --- FLAF | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FLAF b/FLAF index ccb9f6a..aa703be 160000 --- a/FLAF +++ b/FLAF @@ -1 +1 @@ -Subproject commit ccb9f6af20d1ee8b42cb9eaac3db78ed25922d31 +Subproject commit aa703be4f7f9d9075c5d4847a973086b8a8df658 From d3912250f31f834cafcaf4e9555109add2de264c Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 10 Feb 2026 14:46:10 +0100 Subject: [PATCH 08/11] modify hh_bbww and histTupleDef --- Analysis/hh_bbww.py | 5 +-- Analysis/histTupleDef.py | 88 +--------------------------------------- 2 files changed, 3 insertions(+), 90 deletions(-) diff --git a/Analysis/hh_bbww.py b/Analysis/hh_bbww.py index 9a7a1cd..be14beb 100644 --- a/Analysis/hh_bbww.py +++ b/Analysis/hh_bbww.py @@ -78,7 +78,7 @@ def GetBTagWeight(global_cfg_dict, cat, applyBtag=False): return f"{btag_weight}*{btagshape_weight}" def GetWeight( - channel, cat, boosted_categories, apply_btag_shape_weights=False + channel, cat, boosted_categories ): # do you need all these args? # weights_to_apply = ["weight_base", "ExtraDYWeight"] weights_to_apply = ["weight_base"] @@ -86,8 +86,7 @@ def GetWeight( for lep_index in [1, 2]: total_weight = f"{total_weight} * {GetLepWeight(lep_index)}" total_weight = f"{total_weight} * {GetTriggerWeight()}" - if apply_btag_shape_weights: - total_weight = f"{total_weight} * weight_bTagShape_Central" + total_weight = f"{total_weight} * weight_bTagShape_Central" return total_weight diff --git a/Analysis/histTupleDef.py b/Analysis/histTupleDef.py index 6cb95c4..7b3e576 100644 --- a/Analysis/histTupleDef.py +++ b/Analysis/histTupleDef.py @@ -42,79 +42,6 @@ def GetDfw(df, setup, dataset_name): central_df_weights_computed = False -btag_shape_weight_corrected = False - -cat_to_channelId = {"e": 1, "mu": 2, "eE": 11, "eMu": 12, "muMu": 22} - - -class BtagShapeWeightCorrector: - def __init__(self, btag_integral_ratios): - self.exisiting_srcScale_combs = [key for key in btag_integral_ratios.keys()] - # if the btag_integral_ratios dictionary is not empty, do stuff - if self.exisiting_srcScale_combs: - ROOT.gInterpreter.Declare("#include ") - - for key in btag_integral_ratios.keys(): - # key in btag_integral_ratios has form f"{source}_{scale}", so function expects that - # and creates a map and function to rescale btag weights for each f"{source}_{scale}" value - self._declare_cpp_map_and_resc_func(btag_integral_ratios, key) - - def _declare_cpp_map_and_resc_func(self, btag_integral_ratios, unc_src_scale): - correction_factors = btag_integral_ratios[unc_src_scale] - - # init c++ map - cpp_map_entries = [] - for cat, multipl_dict in correction_factors.items(): - channelId = cat_to_channelId[cat] - for key, ratio in multipl_dict.items(): - # key has structure f"ratio_ncetnralJet_{number}"" - num_jet = int(key.split("_")[-1]) - cpp_map_entries.append(f"{{{{{channelId}, {num_jet}}}, {ratio}}}") - cpp_init = ", ".join(cpp_map_entries) - - ROOT.gInterpreter.Declare( - f""" - static const std::map, float> ratios_{unc_src_scale} = {{ - {cpp_init} - }}; - - float integral_correction_ratio_{unc_src_scale}(int ncentralJet, int channelId) {{ - std::pair key{{channelId, ncentralJet}}; - try - {{ - float ratio = ratios_{unc_src_scale}.at(key); - return ratio; - }} - catch (...) - {{ - return 1.0f; - }} - }}""" - ) - - def UpdateBtagWeight(self, dfw, unc_src="Central", unc_scale=None): - # return original dfw if empty dict was passed to constructor - if not self.exisiting_srcScale_combs: - return dfw - - if unc_scale is None: - unc_src_scale = unc_src - else: - unc_src_scale = f"{unc_src}_{unc_scale}" - - if unc_src_scale not in self.exisiting_srcScale_combs: - raise RuntimeError( - f"`BtagShapeWeightCorrection.json` does not contain key `{unc_src_scale}`." - ) - - dfw.df = dfw.df.Redefine( - "weight_bTagShape_Central", - f"""if (ncentralJet >= 2 && ncentralJet <= 8) - return integral_correction_ratio_{unc_src_scale}(ncentralJet, channelId)*weight_bTagShape_Central; - return weight_bTagShape_Central;""", - ) - - return dfw def DefineWeightForHistograms( *, @@ -127,7 +54,6 @@ def DefineWeightForHistograms( global_params, final_weight_name, df_is_central, - btag_integral_ratios, ): global central_df_weights_computed is_central = uncName == central @@ -160,24 +86,12 @@ def DefineWeightForHistograms( if df_is_central: central_df_weights_computed = True - # btag shape weight column appears here - correct_btagShape_weights = global_params.get("correct_btagShape_weights", False) - global btag_shape_weight_corrected - if correct_btagShape_weights and not btag_shape_weight_corrected and btag_integral_ratios: - isMC = not isData - if is_central and isMC: - weight_corrector = BtagShapeWeightCorrector(btag_integral_ratios) - print(f"Calling weight_corrector.UpdateBtagWeight for unc_source={uncName} unc_scale={uncScale}") - weight_corrector.UpdateBtagWeight(dfw, unc_src=uncName) - btag_shape_weight_corrected = True - categories = global_params["categories"] boosted_categories = global_params.get("boosted_categories", []) process_group = global_params["process_group"] total_weight_expression = ( # channel, cat, boosted_categories --> these are not needed in the GetWeight function therefore I just put some placeholders - # if btag shape weight was corrected => must be applied, else no - analysis.GetWeight("", "", boosted_categories, apply_btag_shape_weights=btag_shape_weight_corrected) + analysis.GetWeight("", "", boosted_categories) if process_group != "data" else "1" ) # are we sure? From a4b54c8ca6bf580ca596c596f2e6dafaf1922041 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 10 Feb 2026 15:16:12 +0100 Subject: [PATCH 09/11] update FLAF and Corrections --- Corrections | 2 +- FLAF | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Corrections b/Corrections index 77e8939..1e290fa 160000 --- a/Corrections +++ b/Corrections @@ -1 +1 @@ -Subproject commit 77e893974a7d181cd0dbb3720dcf19b5dd770a97 +Subproject commit 1e290fa90d23471393b08aa0a34e3de46e403c27 diff --git a/FLAF b/FLAF index aa703be..ba00e25 160000 --- a/FLAF +++ b/FLAF @@ -1 +1 @@ -Subproject commit aa703be4f7f9d9075c5d4847a973086b8a8df658 +Subproject commit ba00e2540d562b174b1aba954c2161fc2d917ad1 From ca7905c0bb3cc8d658a078c4166d743f05a0e52a Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 10 Feb 2026 20:02:23 +0100 Subject: [PATCH 10/11] update config --- config/global.yaml | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/config/global.yaml b/config/global.yaml index 4b4708f..a5a1b47 100644 --- a/config/global.yaml +++ b/config/global.yaml @@ -2,10 +2,10 @@ anaTupleDef: AnaProd/anaTupleDef.py histTupleDef: Analysis/histTupleDef.py analysis_import: Analysis.hh_bbww analysis_cache_import: Analysis.tasks:AnalysisCacheTask:AnalysisCacheMergeTask -phys_model: Run3_Model +phys_model: test corrections: mu: - stage: HistTuple + stages: [ HistTuple, AnalysisCache ] columns: pfRelIso04_all: Muon_pfRelIso04_all tightId: Muon_tightId @@ -14,16 +14,16 @@ corrections: mediumId: Muon_mediumId looseId: Muon_looseId trigger: - stage: HistTuple + stages: [ HistTuple, AnalysisCache ] mode: SF - ele: { stage: HistTuple } + ele: { stages: [ HistTuple, AnalysisCache ] } fatjet: - stage: HistTuple # AnaTuple + stages: [ HistTuple, AnalysisCache ] # AnaTuple ana: bbWW tagger: particleNetWithMass_HbbvsQCD fatJetName: fatbjet # SelectedFatJet muScaRe: { stage: AnaTuple, mu_pt_for_ScaReApplication: "nano" } - eleES: { stage: AnaTuple } + eleES: { stages: [ AnaTuple, AnalysisCache ] } lumi: { stage: AnaTuple } xs: { stage: AnaTuple } gen: { stage: AnaTuple } @@ -38,9 +38,11 @@ corrections: stage: AnaTuple apply_jet_horns_fix: true btag: - stages: [ AnaTuple, HistTuple ] + normFilePattern: data/AnalysisCacheAggregationTask/{version}/{period}/{dataset_name}/BtagShape/aggregatedCache.json + stages: [ AnaTuple, HistTuple, AnalysisCache ] modes: AnaTuple: none # just load to define WP ID branches + AnalysisCache: shape HistTuple: shape tagger: particleNet jetCollection: centralJet @@ -57,8 +59,6 @@ muIsoWP: "Loose" treeName: "Events" -correct_btagShape_weights: True - nPbPerFile: 2_000 # 2fb-1 per split data file nEventsPerFile: 100_000 # 100k events per MC file @@ -262,6 +262,8 @@ payload_producers: producers_module_name: BtagShapeProducer producer_name: BtagShapeProducer save_as: json + needs_aggregation: True + ignore_data: True columns : [] lepton_categories: e: 1 From 21327175209d9ebfd42947714b23001b04081391 Mon Sep 17 00:00:00 2001 From: Artem Bolshov Date: Tue, 10 Feb 2026 20:02:44 +0100 Subject: [PATCH 11/11] update FLAF --- FLAF | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FLAF b/FLAF index ba00e25..9daffb1 160000 --- a/FLAF +++ b/FLAF @@ -1 +1 @@ -Subproject commit ba00e2540d562b174b1aba954c2161fc2d917ad1 +Subproject commit 9daffb1fdc34e887ca4f16a9a7e74c99d9b15594