diff --git a/.gitignore b/.gitignore
index 15df29e..798d014 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ configs.*/
*.pdf
*.png
*.root
+*.feather
# C extensions
*.so
diff --git a/FF_calculation/FF_QCD.py b/FF_calculation/FF_QCD.py
index 7add338..774d729 100644
--- a/FF_calculation/FF_QCD.py
+++ b/FF_calculation/FF_QCD.py
@@ -18,9 +18,7 @@
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[6]}")
-def calculation_QCD_FFs(
- args: Tuple[Any, ...],
-) -> Dict[str, Union[str, Dict[str, str]]]:
+def calculation_QCD_FFs(args: Tuple[Any, ...]) -> Dict[str, Union[str, Dict[str, str]]]:
"""
This function calculates fake factors for the QCD process for a specific category (split).
The function expects as 'args' a Tuple containing all the necessary information for the
@@ -65,10 +63,7 @@ def calculation_QCD_FFs(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for QCD signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -79,10 +74,7 @@ def calculation_QCD_FFs(
logger=logger,
)
- # event filter for QCD application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -93,24 +85,20 @@ def calculation_QCD_FFs(
logger=logger,
)
- # get binning of the dependent variable
xbinning = array.array("d", splitting.var_bins)
nbinsx = len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
(process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- SRlike_hists[sample] = h.GetValue()
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ ARlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
(process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- ARlike_hists[sample] = h.GetValue()
+ ).GetValue()
# calculate QCD enriched data by subtraction all the background samples
SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
@@ -164,6 +152,7 @@ def calculation_QCD_FFs(
logger=logger,
fit_option=splitting.fit_option,
limit_kwargs=splitting.limit_kwargs(hist=FF_hist),
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
elif isinstance(splitting.fit_option, str):
nominal_draw_obj, results = ff_func.smooth_function(
@@ -175,7 +164,7 @@ def calculation_QCD_FFs(
"MCShiftUp": FF_hist_up.Clone(),
"MCShiftDown": FF_hist_down.Clone(),
},
- for_FF=True,
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
unc_draw_obj = results["default"]
used_fit = splitting.fit_option
@@ -198,7 +187,7 @@ def calculation_QCD_FFs(
save_data=True,
)
- # producing some control plots
+ # producing control plots
for _hist, _region in [
(SRlike_hists, "SR_like"),
(ARlike_hists, "AR_like"),
@@ -212,9 +201,7 @@ def calculation_QCD_FFs(
process=process,
region=_region,
data="data",
- samples=ff_func.controlplot_samples(
- config["use_embedding"], add_qcd=False
- ),
+ samples=ff_func.controlplot_samples(sample_paths, add_qcd=False),
category=splitting.split or {"incl": ""},
output_path=output_path,
logger=logger,
@@ -232,9 +219,7 @@ def calculation_QCD_FFs(
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[7]}")
-def non_closure_correction(
- args: Tuple[Any, ...],
-) -> Dict[str, np.ndarray]:
+def non_closure_correction(args: Tuple[Any, ...]) -> Dict[str, np.ndarray]:
"""
This function calculates non-closure corrections for fake factors for QCD.
@@ -272,9 +257,9 @@ def non_closure_correction(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement
SRlike_hists = dict()
ARlike_hists = dict()
+ ARlike_hists_ff = dict()
for sample_path in sample_paths:
# getting the name of the process from the sample path
@@ -291,10 +276,7 @@ def non_closure_correction(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for QCD signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -305,10 +287,7 @@ def non_closure_correction(
logger=logger,
)
- # event filter for QCD application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -319,66 +298,49 @@ def non_closure_correction(
logger=logger,
)
- # evaluate the measured fake factors for the specific processes
- if sample == "data":
- rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
+ rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
- # additionally evaluate the previous corrections
- corr_str = ""
- for corr_evaluator in corr_evaluators:
- rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
- corr_str += f" * {corr_evaluator.corr_str}"
+ corr_str = ""
+ for corr_evaluator in corr_evaluators:
+ rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
+ corr_str += f" * {corr_evaluator.corr_str}"
- rdf_ARlike = rdf_ARlike.Define(
- "weight_ff",
- f"weight * {process}_fake_factor{corr_str}",
- )
+ rdf_ARlike = rdf_ARlike.Define("weight_ff", f"weight * {process}_fake_factor{corr_str}")
- # get binning of the dependent variable
- xbinning, nbinsx = (
- array.array("d", splitting.var_bins),
- len(splitting.var_bins) - 1,
- )
+ xbinning, nbinsx = array.array("d", splitting.var_bins), len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
(correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight",
- )
- SRlike_hists[sample] = h.GetValue()
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- ("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists[sample] = h.GetValue()
-
- if sample == "data":
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}_ff",
- nbinsx,
- xbinning,
- ),
- correction_conf["var_dependence"],
- "weight_ff",
- )
- ARlike_hists["data_ff"] = h.GetValue()
+ ARlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar", nbinsx, xbinning),
+ correction_conf["var_dependence"],
+ "weight"
+ ).GetValue()
- SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_ff[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ff", nbinsx, xbinning),
+ correction_conf["var_dependence"],
+ "weight_ff",
+ ).GetValue()
_pairs = [("data_subtracted", "data"), ("data", "data")]
+ SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
SRlike_hists_sub_up = {k1: SRlike_hists[k2].Clone() for k1, k2 in _pairs}
SRlike_hists_sub_down = deepcopy(SRlike_hists_sub_up)
- ARlike_hists_sub_up = {
- k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs + [("data_ff", "data_ff")]
- }
+ ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_sub_up = {k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs}
ARlike_hists_sub_down = deepcopy(ARlike_hists_sub_up)
+ data_ff_subtracted = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_up = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_down = ARlike_hists_ff["data"].Clone()
+
for hist in SRlike_hists:
if hist not in ["data", "data_subtracted", "QCD"]:
SRlike_hists["data_subtracted"].Add(SRlike_hists[hist].Clone(), -1)
@@ -389,18 +351,24 @@ def non_closure_correction(
SRlike_hists[hist].Clone().AddError(-1), -1
)
for hist in ARlike_hists:
- if hist not in ["data", "data_subtracted", "data_ff", "QCD"]:
+ if hist not in ["data", "data_subtracted", "QCD"]:
ARlike_hists["data_subtracted"].Add(ARlike_hists[hist].Clone(), -1)
- ARlike_hists_sub_up["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(1), -1
- )
- ARlike_hists_sub_down["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(-1), -1
- )
+ ARlike_hists_sub_up["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(1), -1)
+ ARlike_hists_sub_down["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(-1), -1)
+ for hist in ARlike_hists_ff:
+ if hist not in ["data", "QCD"]:
+ data_ff_subtracted.Add(ARlike_hists_ff[hist].Clone(), -1)
+ data_ff_subtracted_up.Add(ARlike_hists_ff[hist].Clone().AddError(1), -1)
+ data_ff_subtracted_down.Add(ARlike_hists_ff[hist].Clone().AddError(-1), -1)
+
+ ARlike_hists["data_ff"] = data_ff_subtracted
+ ARlike_hists_sub_up["data_ff"] = data_ff_subtracted_up
+ ARlike_hists_sub_down["data_ff"] = data_ff_subtracted_down
correction_hist, process_fraction = ff_func.calculate_non_closure_correction(
SRlike=SRlike_hists,
ARlike=ARlike_hists,
+ skip_frac=True,
)
nominal_draw_obj, results = ff_func.smooth_function(
@@ -409,13 +377,10 @@ def non_closure_correction(
correction_option=splitting.correction_option,
bandwidth=splitting.bandwidth,
mc_shifted_hist={
- "MCShiftUp": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_up, ARlike_hists_sub_up
- )[0].Clone(),
- "MCShiftDown": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_down, ARlike_hists_sub_down
- )[0].Clone(),
+ "MCShiftUp": ff_func.calculate_non_closure_correction(SRlike_hists_sub_up, ARlike_hists_sub_up, skip_frac=True)[0].Clone(),
+ "MCShiftDown": ff_func.calculate_non_closure_correction(SRlike_hists_sub_down, ARlike_hists_sub_down, skip_frac=True)[0].Clone(),
},
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
add_str = "_for_DRtoSR" if for_DRtoSR else ""
@@ -457,7 +422,7 @@ def non_closure_correction(
save_data=save_data,
)
- # producing some control plots
+ # producing control plots
for yscale, save_data in zip(["linear", "log"], [True, False]):
plotting.plot_data_mc_ratio(
variable=correction_conf["var_dependence"],
@@ -467,7 +432,7 @@ def non_closure_correction(
process=process,
region=f"non_closure_{closure_variable}{add_str}_SRlike_hist",
data="data",
- samples=ff_func.controlplot_samples(config["use_embedding"], add_qcd=False),
+ samples=ff_func.controlplot_samples(sample_paths, add_qcd=False),
category=splitting.split or {"incl": ""},
output_path=output_path,
logger=logger,
@@ -484,9 +449,7 @@ def non_closure_correction(
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[6]}")
-def DR_SR_correction(
- args: Tuple[Any, ...],
-) -> Dict[str, np.ndarray]:
+def DR_SR_correction(args: Tuple[Any, ...]) -> Dict[str, np.ndarray]:
"""
This function calculates DR to SR correction for fake factors for QCD.
@@ -520,9 +483,9 @@ def DR_SR_correction(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement
SRlike_hists = dict()
ARlike_hists = dict()
+ ARlike_hists_ff = dict()
for sample_path in sample_paths:
# getting the name of the process from the sample path
@@ -537,10 +500,7 @@ def DR_SR_correction(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for QCD signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -551,10 +511,7 @@ def DR_SR_correction(
logger=logger,
)
- # event filter for QCD application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -565,66 +522,49 @@ def DR_SR_correction(
logger=logger,
)
- # evaluate the measured fake factors for the specific processes
- if sample == "data":
- rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
+ rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
- # additionally evaluate the previous corrections
- corr_str = ""
- for corr_evaluator in corr_evaluators:
- rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
- corr_str += f" * {corr_evaluator.corr_str}"
+ corr_str = ""
+ for corr_evaluator in corr_evaluators:
+ rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
+ corr_str += f" * {corr_evaluator.corr_str}"
- rdf_ARlike = rdf_ARlike.Define(
- "weight_ff",
- f"weight * {process}_fake_factor{corr_str}",
- )
+ rdf_ARlike = rdf_ARlike.Define("weight_ff", f"weight * {process}_fake_factor{corr_str}")
- # get binning of the dependent variable
- xbinning, nbinsx = (
- array.array("d", splitting.var_bins),
- len(splitting.var_bins) - 1,
- )
+ xbinning, nbinsx = array.array("d", splitting.var_bins), len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
(correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight",
- )
- SRlike_hists[sample] = h.GetValue()
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- ("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists[sample] = h.GetValue()
-
- if sample == "data":
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}_ff",
- nbinsx,
- xbinning,
- ),
- correction_conf["var_dependence"],
- "weight_ff",
- )
- ARlike_hists["data_ff"] = h.GetValue()
+ ARlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar", nbinsx, xbinning),
+ correction_conf["var_dependence"],
+ "weight"
+ ).GetValue()
- SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_ff[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ff", nbinsx, xbinning),
+ correction_conf["var_dependence"],
+ "weight_ff",
+ ).GetValue()
_pairs = [("data_subtracted", "data"), ("data", "data")]
+ SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
SRlike_hists_sub_up = {k1: SRlike_hists[k2].Clone() for k1, k2 in _pairs}
SRlike_hists_sub_down = deepcopy(SRlike_hists_sub_up)
- ARlike_hists_sub_up = {
- k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs + [("data_ff", "data_ff")]
- }
+ ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_sub_up = {k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs}
ARlike_hists_sub_down = deepcopy(ARlike_hists_sub_up)
+ data_ff_subtracted = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_up = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_down = ARlike_hists_ff["data"].Clone()
+
for hist in SRlike_hists:
if hist not in ["data", "data_subtracted", "QCD"]:
SRlike_hists["data_subtracted"].Add(SRlike_hists[hist].Clone(), -1)
@@ -635,18 +575,24 @@ def DR_SR_correction(
SRlike_hists[hist].Clone().AddError(-1), -1
)
for hist in ARlike_hists:
- if hist not in ["data", "data_subtracted", "data_ff", "QCD"]:
+ if hist not in ["data", "data_subtracted", "QCD"]:
ARlike_hists["data_subtracted"].Add(ARlike_hists[hist].Clone(), -1)
- ARlike_hists_sub_up["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(1), -1
- )
- ARlike_hists_sub_down["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(-1), -1
- )
+ ARlike_hists_sub_up["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(1), -1)
+ ARlike_hists_sub_down["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(-1), -1)
+ for hist in ARlike_hists_ff:
+ if hist not in ["data", "QCD"]:
+ data_ff_subtracted.Add(ARlike_hists_ff[hist].Clone(), -1)
+ data_ff_subtracted_up.Add(ARlike_hists_ff[hist].Clone().AddError(1), -1)
+ data_ff_subtracted_down.Add(ARlike_hists_ff[hist].Clone().AddError(-1), -1)
+
+ ARlike_hists["data_ff"] = data_ff_subtracted
+ ARlike_hists_sub_up["data_ff"] = data_ff_subtracted_up
+ ARlike_hists_sub_down["data_ff"] = data_ff_subtracted_down
correction_hist, process_fraction = ff_func.calculate_non_closure_correction(
SRlike=SRlike_hists,
ARlike=ARlike_hists,
+ skip_frac=True,
)
nominal_draw_obj, results = ff_func.smooth_function(
@@ -655,13 +601,10 @@ def DR_SR_correction(
correction_option=splitting.correction_option,
bandwidth=splitting.bandwidth,
mc_shifted_hist={
- "MCShiftUp": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_up, ARlike_hists_sub_up
- )[0].Clone(),
- "MCShiftDown": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_down, ARlike_hists_sub_down
- )[0].Clone(),
+ "MCShiftUp": ff_func.calculate_non_closure_correction(SRlike_hists_sub_up, ARlike_hists_sub_up, skip_frac=True)[0].Clone(),
+ "MCShiftDown": ff_func.calculate_non_closure_correction(SRlike_hists_sub_down, ARlike_hists_sub_down, skip_frac=True)[0].Clone(),
},
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
plotting.plot_correction(
@@ -709,7 +652,7 @@ def DR_SR_correction(
process=process,
region="DR_SR" + "_SRlike_hist",
data="data",
- samples=ff_func.controlplot_samples(config["use_embedding"], add_qcd=False),
+ samples=ff_func.controlplot_samples(sample_paths, add_qcd=False),
category=splitting.split or {"incl": ""},
output_path=output_path,
logger=logger,
diff --git a/FF_calculation/FF_Wjets.py b/FF_calculation/FF_Wjets.py
index b1dca29..a29f8cc 100644
--- a/FF_calculation/FF_Wjets.py
+++ b/FF_calculation/FF_Wjets.py
@@ -18,9 +18,7 @@
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[6]}")
-def calculation_Wjets_FFs(
- args: Tuple[Any, ...],
-) -> Dict[str, Union[Dict[str, str], Dict[str, Dict[str, str]]]]:
+def calculation_Wjets_FFs(args: Tuple[Any, ...]) -> Dict[str, Union[Dict[str, str], Dict[str, Dict[str, str]]]]:
"""
This function calculates fake factors for the Wjets process for a specific category (split).
The function expects as 'args' a Tuple containing all the necessary information for the
@@ -174,70 +172,74 @@ def calculation_Wjets_FFs(
SRlike_hists["QCD"] = ff_func.QCD_SS_estimate(hists=SRlike_hists_qcd)
ARlike_hists["QCD"] = ff_func.QCD_SS_estimate(hists=ARlike_hists_qcd)
- # calculate Wjets enriched data by subtraction all there backgrould sample
- SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
- SRlike_hists["data_subtracted_up"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted_up"] = ARlike_hists["data"].Clone()
- SRlike_hists["data_subtracted_down"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted_down"] = ARlike_hists["data"].Clone()
+ use_data = process_conf.get("compute_orthogonal_fake_factors_using_data", True)
+
+ if use_data:
+ # calculate Wjets enriched data by subtraction all there backgrould sample
+ SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
+ ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ SRlike_hists["data_subtracted_up"] = SRlike_hists["data"].Clone()
+ ARlike_hists["data_subtracted_up"] = ARlike_hists["data"].Clone()
+ SRlike_hists["data_subtracted_down"] = SRlike_hists["data"].Clone()
+ ARlike_hists["data_subtracted_down"] = ARlike_hists["data"].Clone()
+
+ for hist in SRlike_hists:
+ if hist not in [
+ "data",
+ "data_subtracted",
+ "data_subtracted_up",
+ "data_subtracted_down",
+ "Wjets",
+ ]:
+ SRlike_hists["data_subtracted"].Add(SRlike_hists[hist].Clone(), -1)
+ SRlike_hists["data_subtracted_up"].Add(SRlike_hists[hist].Clone().AddError(1), -1)
+ SRlike_hists["data_subtracted_down"].Add(SRlike_hists[hist].Clone().AddError(-1), -1)
+ for hist in ARlike_hists:
+ if hist not in [
+ "data",
+ "data_subtracted",
+ "data_subtracted_up",
+ "data_subtracted_down",
+ "Wjets",
+ ]:
+ ARlike_hists["data_subtracted"].Add(ARlike_hists[hist].Clone(), -1)
+ ARlike_hists["data_subtracted_up"].Add(ARlike_hists[hist].Clone().AddError(1), -1)
+ ARlike_hists["data_subtracted_down"].Add(ARlike_hists[hist].Clone().AddError(-1), -1)
+
+ # Start of the FF calculation
+ FF_hist, FF_hist_up, FF_hist_down = ff_func.calculate_Wjets_FF(
+ SRlike=SRlike_hists, ARlike=ARlike_hists
+ )
+ ff_hists_to_fit = [FF_hist.Clone(), FF_hist_up, FF_hist_down]
+ else:
+ SRlike_hists["data_subtracted"] = SRlike_hists["Wjets"].Clone()
+ ARlike_hists["data_subtracted"] = ARlike_hists["Wjets"].Clone()
- for hist in SRlike_hists:
- if hist not in [
- "data",
- "data_subtracted",
- "data_subtracted_up",
- "data_subtracted_down",
- "Wjets",
- ]:
- SRlike_hists["data_subtracted"].Add(SRlike_hists[hist].Clone(), -1)
- SRlike_hists["data_subtracted_up"].Add(
- SRlike_hists[hist].Clone().AddError(1), -1
- )
- SRlike_hists["data_subtracted_down"].Add(
- SRlike_hists[hist].Clone().AddError(-1), -1
- )
- for hist in ARlike_hists:
- if hist not in [
- "data",
- "data_subtracted",
- "data_subtracted_up",
- "data_subtracted_down",
- "Wjets",
- ]:
- ARlike_hists["data_subtracted"].Add(ARlike_hists[hist].Clone(), -1)
- ARlike_hists["data_subtracted_up"].Add(
- ARlike_hists[hist].Clone().AddError(1), -1
- )
- ARlike_hists["data_subtracted_down"].Add(
- ARlike_hists[hist].Clone().AddError(-1), -1
- )
+ FF_hist = SRlike_hists["Wjets"].Clone()
+ FF_hist.Divide(ARlike_hists["Wjets"])
+ ff_hists_to_fit = [FF_hist.Clone()]
- # Start of the FF calculation
- FF_hist, FF_hist_up, FF_hist_down = ff_func.calculate_Wjets_FF(
- SRlike=SRlike_hists,
- ARlike=ARlike_hists,
- )
# performing the fit and calculating the uncertainties
if isinstance(splitting.fit_option, list):
nominal_draw_obj, unc_draw_obj, results, used_fit = ff_func.fit_function(
- ff_hists=[FF_hist.Clone(), FF_hist_up, FF_hist_down],
+ ff_hists=ff_hists_to_fit,
bin_edges=splitting.var_bins,
logger=logger,
fit_option=splitting.fit_option,
limit_kwargs=splitting.limit_kwargs(hist=FF_hist),
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
elif isinstance(splitting.fit_option, str):
nominal_draw_obj, results = ff_func.smooth_function(
- hist=FF_hist.Clone(),
+ hist=ff_hists_to_fit[0],
bin_edges=splitting.var_bins,
correction_option=splitting.fit_option,
bandwidth=splitting.bandwidth,
mc_shifted_hist={
- "MCShiftUp": FF_hist_up.Clone(),
- "MCShiftDown": FF_hist_down.Clone(),
- },
- for_FF=True,
+ "MCShiftUp": ff_hists_to_fit[1].Clone(),
+ "MCShiftDown": ff_hists_to_fit[2].Clone(),
+ } if use_data else None,
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
unc_draw_obj = results["default"]
used_fit = splitting.fit_option
@@ -262,18 +264,8 @@ def calculation_Wjets_FFs(
# producing some control plots
for _hist, _region, _data, _samples in [
- (
- SRlike_hists,
- "SR_like",
- "data",
- ff_func.controlplot_samples(config["use_embedding"]),
- ),
- (
- ARlike_hists,
- "AR_like",
- "data",
- ff_func.controlplot_samples(config["use_embedding"]),
- ),
+ (SRlike_hists, "SR_like", "data", ff_func.controlplot_samples(sample_paths)),
+ (ARlike_hists, "AR_like", "data", ff_func.controlplot_samples(sample_paths)),
(SRlike_hists, "SR_like", "data_subtracted", ["Wjets"]),
(ARlike_hists, "AR_like", "data_subtracted", ["Wjets"]),
]:
@@ -304,9 +296,7 @@ def calculation_Wjets_FFs(
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[7]}")
-def non_closure_correction(
- args: Tuple[Any, ...],
-) -> Dict[str, np.ndarray]:
+def non_closure_correction(args: Tuple[Any, ...]) -> Dict[str, np.ndarray]:
"""
This function calculates the non-closure correction for the Wjet process for a specific category.
@@ -344,14 +334,14 @@ def non_closure_correction(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement
SRlike_hists = dict()
ARlike_hists = dict()
-
- # init histogram dict for QCD SS/OS estimation
SRlike_hists_qcd = dict()
ARlike_hists_qcd = dict()
+ ARlike_hists_ff = dict()
+ ARlike_hists_qcd_ff = dict()
+
for sample_path in sample_paths:
# getting the name of the process from the sample path
sample = sample_path.rsplit("/")[-1].rsplit(".")[0]
@@ -367,10 +357,7 @@ def non_closure_correction(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for Wjets signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -381,8 +368,7 @@ def non_closure_correction(
logger=logger,
)
- # QCD estimation from same sign in signal-like region
- if "tau_pair_sign" in region_conf:
+ if "tau_pair_sign" in region_conf: # QCD estimation from same sign in signal-like region
region_conf["tau_pair_sign"] = "(q_1*q_2) > 0" # same sign
else:
raise ValueError(
@@ -401,10 +387,7 @@ def non_closure_correction(
logger=logger,
)
- # event filter for Wjets application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -415,24 +398,7 @@ def non_closure_correction(
logger=logger,
)
- # evaluate the measured fake factors for the specific processes
- if sample == "data":
- rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
-
- # additionally evaluate the previous corrections
- corr_str = ""
- for corr_evaluator in corr_evaluators:
- rdf_ARlike = corr_evaluator.evaluate_correction(
- rdf=rdf_ARlike,
- )
- corr_str += f" * {corr_evaluator.corr_str}"
-
- rdf_ARlike = rdf_ARlike.Define(
- "weight_ff", f"weight * {process}_fake_factor{corr_str}"
- )
-
- # QCD estimation from same sign in application-like region
- if "tau_pair_sign" in region_conf:
+ if "tau_pair_sign" in region_conf: # QCD estimation from same sign in application-like region
region_conf["tau_pair_sign"] = "(q_1*q_2) > 0" # same sign
else:
raise ValueError(
@@ -451,66 +417,69 @@ def non_closure_correction(
logger=logger,
)
- # get binning of the dependent variable
+ rdf_ARlike = evaluator.evaluate_fake_factor(rdf=rdf_ARlike)
+ rdf_ARlike_qcd = evaluator.evaluate_fake_factor(rdf=rdf_ARlike_qcd)
+
+ corr_str = ""
+ for corr_evaluator in corr_evaluators:
+ rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
+ rdf_ARlike_qcd = corr_evaluator.evaluate_correction(rdf=rdf_ARlike_qcd)
+ corr_str += f" * {corr_evaluator.corr_str}"
+
+ rdf_ARlike = rdf_ARlike.Define("weight_ff", f"weight * {process}_fake_factor{corr_str}")
+ rdf_ARlike_qcd = rdf_ARlike_qcd.Define("weight_ff", f"weight * {process}_fake_factor{corr_str}")
+
xbinning = array.array("d", splitting.var_bins)
nbinsx = len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
- (correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
- correction_conf["var_dependence"],
- "weight",
- )
- SRlike_hists[sample] = h.GetValue()
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_sr", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight"
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- ("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists[sample] = h.GetValue()
+ SRlike_hists_qcd[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike_qcd).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_sr_qcd", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight"
+ ).GetValue()
- if sample == "data":
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}_ff",
- nbinsx,
- xbinning,
- ),
- correction_conf["var_dependence"],
- "weight_ff",
- )
- ARlike_hists["data_ff"] = h.GetValue()
+ ARlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight"
+ ).GetValue()
- # making the histograms for QCD estimation
- h_qcd = RuntimeVariables.RDataFrameWrapper(rdf_SRlike_qcd).Histo1D(
- (correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
- correction_conf["var_dependence"],
- "weight",
- )
- SRlike_hists_qcd[sample] = h_qcd.GetValue()
+ ARlike_hists_qcd[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike_qcd).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar_qcd", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight"
+ ).GetValue()
- h_qcd = RuntimeVariables.RDataFrameWrapper(rdf_ARlike_qcd).Histo1D(
- ("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists_qcd[sample] = h_qcd.GetValue()
+ ARlike_hists_ff[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar_ff", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight_ff"
+ ).GetValue()
+
+ ARlike_hists_qcd_ff[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike_qcd).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ar_qcd_ff", nbinsx, xbinning),
+ correction_conf["var_dependence"], "weight_ff"
+ ).GetValue()
- # calculate QCD estimation
SRlike_hists["QCD"] = ff_func.QCD_SS_estimate(hists=SRlike_hists_qcd)
ARlike_hists["QCD"] = ff_func.QCD_SS_estimate(hists=ARlike_hists_qcd)
-
- SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
- ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_ff["QCD"] = ff_func.QCD_SS_estimate(hists=ARlike_hists_qcd_ff)
_pairs = [("data_subtracted", "data"), ("data", "data")]
+ SRlike_hists["data_subtracted"] = SRlike_hists["data"].Clone()
SRlike_hists_sub_up = {k1: SRlike_hists[k2].Clone() for k1, k2 in _pairs}
SRlike_hists_sub_down = deepcopy(SRlike_hists_sub_up)
- ARlike_hists_sub_up = {
- k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs + [("data_ff", "data_ff")]
- }
+ ARlike_hists["data_subtracted"] = ARlike_hists["data"].Clone()
+ ARlike_hists_sub_up = {k1: ARlike_hists[k2].Clone() for k1, k2 in _pairs}
ARlike_hists_sub_down = deepcopy(ARlike_hists_sub_up)
+ data_ff_subtracted = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_up = ARlike_hists_ff["data"].Clone()
+ data_ff_subtracted_down = ARlike_hists_ff["data"].Clone()
+
for hist in SRlike_hists:
if hist not in ["data", "data_subtracted", "Wjets"]:
SRlike_hists["data_subtracted"].Add(SRlike_hists[hist].Clone(), -1)
@@ -521,18 +490,24 @@ def non_closure_correction(
SRlike_hists[hist].Clone().AddError(-1), -1
)
for hist in ARlike_hists:
- if hist not in ["data", "data_subtracted", "data_ff", "Wjets"]:
+ if hist not in ["data", "data_subtracted", "Wjets"]:
ARlike_hists["data_subtracted"].Add(ARlike_hists[hist].Clone(), -1)
- ARlike_hists_sub_up["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(1), -1
- )
- ARlike_hists_sub_down["data_subtracted"].Add(
- ARlike_hists[hist].Clone().AddError(-1), -1
- )
+ ARlike_hists_sub_up["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(1), -1)
+ ARlike_hists_sub_down["data_subtracted"].Add(ARlike_hists[hist].Clone().AddError(-1), -1)
+ for hist in ARlike_hists_ff:
+ if hist not in ["data", "Wjets"]:
+ data_ff_subtracted.Add(ARlike_hists_ff[hist].Clone(), -1)
+ data_ff_subtracted_up.Add(ARlike_hists_ff[hist].Clone().AddError(1), -1)
+ data_ff_subtracted_down.Add(ARlike_hists_ff[hist].Clone().AddError(-1), -1)
+
+ ARlike_hists["data_ff"] = data_ff_subtracted
+ ARlike_hists_sub_up["data_ff"] = data_ff_subtracted_up
+ ARlike_hists_sub_down["data_ff"] = data_ff_subtracted_down
correction_hist, process_fraction = ff_func.calculate_non_closure_correction(
SRlike=SRlike_hists,
ARlike=ARlike_hists,
+ skip_frac=True,
)
nominal_draw_obj, results = ff_func.smooth_function(
@@ -541,13 +516,10 @@ def non_closure_correction(
correction_option=splitting.correction_option,
bandwidth=splitting.bandwidth,
mc_shifted_hist={
- "MCShiftUp": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_up, ARlike_hists_sub_up
- )[0].Clone(),
- "MCShiftDown": ff_func.calculate_non_closure_correction(
- SRlike_hists_sub_down, ARlike_hists_sub_down
- )[0].Clone(),
+ "MCShiftUp": ff_func.calculate_non_closure_correction(SRlike_hists_sub_up, ARlike_hists_sub_up, skip_frac=True)[0].Clone(),
+ "MCShiftDown": ff_func.calculate_non_closure_correction(SRlike_hists_sub_down, ARlike_hists_sub_down, skip_frac=True)[0].Clone(),
},
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
add_str = "_DRtoSR" if for_DRtoSR else ""
@@ -597,7 +569,7 @@ def non_closure_correction(
process=process,
region=f"non_closure_{closure_variable}{add_str}_SRlike",
data="data",
- samples=ff_func.controlplot_samples(config["use_embedding"]),
+ samples=ff_func.controlplot_samples(sample_paths),
category=splitting.split or {"incl": ""},
output_path=output_path,
logger=logger,
@@ -651,7 +623,6 @@ def DR_SR_correction(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement
SRlike_hists = dict()
ARlike_hists = dict()
@@ -671,13 +642,8 @@ def DR_SR_correction(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for Wjets signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
- region_conf = copy.deepcopy(
- config["target_processes"][process]["SRlike_cuts"]
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
+ region_conf = copy.deepcopy(config["target_processes"][process]["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
channel=config["channel"],
@@ -687,13 +653,8 @@ def DR_SR_correction(
logger=logger,
)
- # event filter for Wjets application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
- region_conf = copy.deepcopy(
- config["target_processes"][process]["ARlike_cuts"]
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
+ region_conf = copy.deepcopy(config["target_processes"][process]["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
channel=config["channel"],
@@ -707,45 +668,24 @@ def DR_SR_correction(
# additionally evaluate the previous corrections
corr_str = ""
for corr_evaluator in corr_evaluators:
- rdf_ARlike = corr_evaluator.evaluate_correction(
- rdf=rdf_ARlike,
- )
+ rdf_ARlike = corr_evaluator.evaluate_correction(rdf=rdf_ARlike)
corr_str += f" * {corr_evaluator.corr_str}"
- rdf_ARlike = rdf_ARlike.Define(
- "weight_ff", f"weight * {process}_fake_factor{corr_str}"
- )
+ rdf_ARlike = rdf_ARlike.Define("weight_ff", f"weight * {process}_fake_factor{corr_str}")
- # get binning of the dependent variable
- xbinning, nbinsx = (
- array.array("d", splitting.var_bins),
- len(splitting.var_bins) - 1,
- )
+ xbinning, nbinsx = array.array("d", splitting.var_bins), len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}",
- nbinsx,
- xbinning,
- ),
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight",
- )
- SRlike_hists[sample] = h.GetValue()
-
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}_ff",
- nbinsx,
- xbinning,
- ),
+ ).GetValue()
+
+ ARlike_hists["Wjets_ff"] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}_ff", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight_ff",
- )
- ARlike_hists["Wjets_ff"] = h.GetValue()
+ ).GetValue()
correction_hist = ff_func.calculate_non_closure_correction_Wjets_fromMC(
SRlike=SRlike_hists, ARlike=ARlike_hists
@@ -756,6 +696,7 @@ def DR_SR_correction(
bin_edges=splitting.var_bins,
correction_option=splitting.correction_option,
bandwidth=splitting.bandwidth,
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
plotting.plot_correction(
diff --git a/FF_calculation/FF_ttbar.py b/FF_calculation/FF_ttbar.py
index 78e0f13..7e313a3 100644
--- a/FF_calculation/FF_ttbar.py
+++ b/FF_calculation/FF_ttbar.py
@@ -54,7 +54,6 @@ def calculation_ttbar_FFs(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement from MC
SR_hists = dict()
AR_hists = dict()
@@ -70,10 +69,7 @@ def calculation_ttbar_FFs(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for ttbar signal region
- log.info(
- f"Filtering events for the signal region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["SR_cuts"])
rdf_SR = ff_func.apply_region_filters(
rdf=rdf,
@@ -84,10 +80,7 @@ def calculation_ttbar_FFs(
logger=logger,
)
- # event filter for ttbar application region
- log.info(
- f"Filtering events for the application region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["AR_cuts"])
rdf_AR = ff_func.apply_region_filters(
rdf=rdf,
@@ -98,34 +91,21 @@ def calculation_ttbar_FFs(
logger=logger,
)
- # get binning of the dependent variable
xbinning = array.array("d", splitting.var_bins)
nbinsx = len(splitting.var_bins) - 1
# making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
- (
- process_conf["var_dependence"],
- f"{sample}",
- nbinsx,
- xbinning,
- ),
+ SR_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
+ (process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- SR_hists[sample] = h.GetValue()
-
- h = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
- (
- process_conf["var_dependence"],
- f"{sample}",
- nbinsx,
- xbinning,
- ),
+ ).GetValue()
+
+ AR_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
+ (process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- AR_hists[sample] = h.GetValue()
+ ).GetValue()
# Start of the FF calculation
FF_hist = ff_func.calculate_ttbar_FF(
@@ -137,11 +117,12 @@ def calculation_ttbar_FFs(
# performing the fit and calculating the uncertainties
if isinstance(splitting.fit_option, list):
nominal_draw_obj, unc_draw_obj, results, used_fit = ff_func.fit_function(
- ff_hists=FF_hist.Clone(),
+ ff_hists=[FF_hist.Clone()],
bin_edges=splitting.var_bins,
logger=logger,
fit_option=splitting.fit_option,
limit_kwargs=splitting.limit_kwargs(hist=FF_hist),
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
elif isinstance(splitting.fit_option, str):
nominal_draw_obj, results = ff_func.smooth_function(
@@ -149,7 +130,7 @@ def calculation_ttbar_FFs(
bin_edges=splitting.var_bins,
correction_option=splitting.fit_option,
bandwidth=splitting.bandwidth,
- for_FF=True,
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
unc_draw_obj = results["default"]
used_fit = splitting.fit_option
@@ -172,20 +153,10 @@ def calculation_ttbar_FFs(
save_data=True,
)
- # doing some control plots
+ # doing control plots
for _hist, _region, _data, _samples in [
- (
- SRlike_hists,
- "SR_like",
- "data",
- ff_func.controlplot_samples(config["use_embedding"]),
- ),
- (
- ARlike_hists,
- "AR_like",
- "data",
- ff_func.controlplot_samples(config["use_embedding"]),
- ),
+ (SRlike_hists, "SR_like", "data", ff_func.controlplot_samples(sample_paths)),
+ (ARlike_hists, "AR_like", "data", ff_func.controlplot_samples(sample_paths)),
(SRlike_hists, "SR_like", "data_subtracted", ["ttbar_J"]),
(ARlike_hists, "AR_like", "data_subtracted", ["ttbar_J"]),
]:
@@ -215,11 +186,7 @@ def calculation_ttbar_FFs(
return results
-@logging_helper.LogDecorator().grouped_logs(
- extractor=lambda *args, **kwargs: (
- kwargs["logger"] if "config" in kwargs else args[4]
- )
-)
+@logging_helper.LogDecorator().grouped_logs(extractor=lambda *args, **kwargs: (kwargs["logger"] if "config" in kwargs else args[4]))
def calculation_FF_data_scaling_factor(
config: Dict[str, Union[str, Dict, List]],
process_conf: Dict[str, Union[str, Dict, List]],
@@ -244,10 +211,8 @@ def calculation_FF_data_scaling_factor(
"""
log = logging.getLogger(logger)
- # init histogram dict for FF data correction
SRlike_hists = dict()
ARlike_hists = dict()
- # init histogram dict for QCD SS/OS estimation
SRlike_hists_qcd = dict()
ARlike_hists_qcd = dict()
@@ -259,10 +224,7 @@ def calculation_FF_data_scaling_factor(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for ttbar signal-like region
- log.info(
- f"Filtering events for the signal-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal-like region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["SRlike_cuts"])
rdf_SRlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -273,8 +235,7 @@ def calculation_FF_data_scaling_factor(
logger=logger,
)
- # QCD estimation from same sign in signal-like region
- if "tau_pair_sign" in region_conf:
+ if "tau_pair_sign" in region_conf: # QCD estimation from same sign in signal-like region
region_conf["tau_pair_sign"] = "(q_1*q_2) > 0" # same sign
else:
raise ValueError(
@@ -293,10 +254,7 @@ def calculation_FF_data_scaling_factor(
logger=logger,
)
- # event filter for ttbar application-like region
- log.info(
- f"Filtering events for the application-like region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application-like region. Target process: {process}")
region_conf = copy.deepcopy(process_conf["ARlike_cuts"])
rdf_ARlike = ff_func.apply_region_filters(
rdf=rdf,
@@ -307,8 +265,7 @@ def calculation_FF_data_scaling_factor(
logger=logger,
)
- # QCD estimation from same sign in application-like region
- if "tau_pair_sign" in region_conf:
+ if "tau_pair_sign" in region_conf: # QCD estimation from same sign in application-like region
region_conf["tau_pair_sign"] = "(q_1*q_2) > 0" # same sign
else:
raise ValueError(
@@ -328,26 +285,22 @@ def calculation_FF_data_scaling_factor(
)
# make yield histograms for FF data correction
- h = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
+ SRlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike).Histo1D(
("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- SRlike_hists[sample] = h.GetValue()
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
+ ARlike_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike).Histo1D(
("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists[sample] = h.GetValue()
+ ).GetValue()
# make yield histograms for QCD estimation
- h_qcd = RuntimeVariables.RDataFrameWrapper(rdf_SRlike_qcd).Histo1D(
+ SRlike_hists_qcd[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SRlike_qcd).Histo1D(
("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- SRlike_hists_qcd[sample] = h_qcd.GetValue()
+ ).GetValue()
- h_qcd = RuntimeVariables.RDataFrameWrapper(rdf_ARlike_qcd).Histo1D(
+ ARlike_hists_qcd[sample] = RuntimeVariables.RDataFrameWrapper(rdf_ARlike_qcd).Histo1D(
("#phi(#slash{E}_{T})", f"{sample}", 1, -3.5, 3.5), "metphi", "weight"
- )
- ARlike_hists_qcd[sample] = h_qcd.GetValue()
+ ).GetValue()
# calculate QCD estimation
SRlike_hists["QCD"] = ff_func.QCD_SS_estimate(hists=SRlike_hists_qcd)
@@ -412,7 +365,6 @@ def non_closure_correction(
log = logging.getLogger(logger)
- # init histogram dict for FF measurement
SR_hists = dict()
AR_hists = dict()
@@ -432,10 +384,7 @@ def non_closure_correction(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for ttbar signal region
- log.info(
- f"Filtering events for the signal region. Target process: {process}"
- )
+ log.info(f"Filtering events for the signal region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["SR_cuts"])
rdf_SR = ff_func.apply_region_filters(
rdf=rdf,
@@ -446,10 +395,7 @@ def non_closure_correction(
logger=logger,
)
- # event filter for ttbar application region
- log.info(
- f"Filtering events for the application region. Target process: {process}"
- )
+ log.info(f"Filtering events for the application region. Target process: {process}")
region_conf = copy.deepcopy(config["target_processes"][process]["AR_cuts"])
rdf_AR = ff_func.apply_region_filters(
rdf=rdf,
@@ -465,43 +411,27 @@ def non_closure_correction(
# additionally evaluate the previous corrections
corr_str = ""
for corr_evaluator in corr_evaluators:
- rdf_AR = corr_evaluator.evaluate_correction(
- rdf=rdf_AR,
- )
+ rdf_AR = corr_evaluator.evaluate_correction(rdf=rdf_AR)
corr_str += f" * {corr_evaluator.corr_str}"
rdf_AR = rdf_AR.Define(
"weight_ff", f"weight * {process}_fake_factor{corr_str}"
)
- # get binning of the dependent variable
xbinning = array.array("d", splitting.var_bins)
nbinsx = len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}",
- nbinsx,
- xbinning,
- ),
+ SR_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight",
- )
- SR_hists[sample] = h.GetValue()
-
- h = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
- (
- correction_conf["var_dependence"],
- f"{sample}",
- nbinsx,
- xbinning,
- ),
+ ).GetValue()
+
+ AR_hists["ttbar_ff"] = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
+ (correction_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
correction_conf["var_dependence"],
"weight_ff",
- )
- AR_hists["ttbar_ff"] = h.GetValue()
+ ).GetValue()
correction_hist = ff_func.calculate_non_closure_correction_ttbar_fromMC(
SR=SR_hists, AR=AR_hists
@@ -512,6 +442,7 @@ def non_closure_correction(
bin_edges=splitting.var_bins,
correction_option=splitting.correction_option,
bandwidth=splitting.bandwidth,
+ stat_sigma=config["stat_sigma"] if "stat_sigma" in config else 1.0,
)
plotting.plot_correction(
diff --git a/FF_calculation/fractions.py b/FF_calculation/fractions.py
index e27ecd5..c1d010f 100644
--- a/FF_calculation/fractions.py
+++ b/FF_calculation/fractions.py
@@ -15,6 +15,48 @@
from helper.functions import RuntimeVariables
+def get_mc_shifted_fractions(hists: Dict[str, Any], processes: List[str]) -> Dict[str, Dict[str, Any]]:
+ """
+ Function to calculate the fractions of the processes for the fake factor calculation with shifted variations.
+
+ Args:
+ hists: Dictionary containing the histograms of the processes for the fraction calculation
+ processes: List of processes for which the fractions should be calculated
+
+ Returns:
+ Dictionary containing the fractions of the processes for the nominal and shifted variations
+ """
+
+ variations = {}
+ variations["nominal"] = {}
+ for process in processes:
+ variations["nominal"][process] = ff_func.calc_fraction(hists, process, processes)
+
+ for variation_process in processes:
+ hists_up, hists_down = {}, {}
+ for process in processes:
+ hists_up[process] = hists[process].Clone()
+ hists_down[process] = hists[process].Clone()
+ if process == variation_process:
+ hists_up[process] = hists_up[process].AddError(1)
+ hists_down[process] = hists_down[process].AddError(-1)
+
+ for b in range(1, hists_down[process].GetNbinsX() + 1):
+ if hists_down[process].GetBinContent(b) < 0.0:
+ hists_down[process].SetBinContent(b, 0.0)
+ if hists_up[process].GetBinContent(b) < 0.0:
+ hists_up[process].SetBinContent(b, 0.0)
+
+ variations[f"frac_{variation_process}_up"] = {}
+ variations[f"frac_{variation_process}_down"] = {}
+
+ for process in processes:
+ variations[f"frac_{variation_process}_up"][process] = ff_func.calc_fraction(hists_up, process, processes)
+ variations[f"frac_{variation_process}_down"][process] = ff_func.calc_fraction(hists_down, process, processes)
+
+ return variations
+
+
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"{args[6]}")
def fraction_calculation(
args: Tuple[Any, ...],
@@ -62,10 +104,7 @@ def fraction_calculation(
rdf = ROOT.RDataFrame(config["tree"], sample_path)
- # event filter for application region
- log.info(
- "Filtering events for the fraction calculation in the application region."
- )
+ log.info("Filtering events for the fraction calculation in the application region.")
region_conf = copy.deepcopy(process_conf["AR_cuts"])
rdf_AR = ff_func.apply_region_filters(
rdf=rdf,
@@ -76,7 +115,7 @@ def fraction_calculation(
logger=logger,
)
- # event filter for signal region; this is not needed for the FF calculation, just for control plots
+ # this is not needed for the FF calculation, just for control plots
log.info("Filtering events for the fraction calculation in the signal region.")
region_conf = copy.deepcopy(process_conf["SR_cuts"])
rdf_SR = ff_func.apply_region_filters(
@@ -88,54 +127,29 @@ def fraction_calculation(
logger=logger,
)
- # get binning of the dependent variable
xbinning = array.array("d", splitting.var_bins)
nbinsx = len(splitting.var_bins) - 1
- # making the histograms
- h = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
+ AR_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_AR).Histo1D(
(process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- AR_hists[sample] = h.GetValue()
+ ).GetValue()
- h = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
+ SR_hists[sample] = RuntimeVariables.RDataFrameWrapper(rdf_SR).Histo1D(
(process_conf["var_dependence"], f"{sample}", nbinsx, xbinning),
process_conf["var_dependence"],
"weight",
- )
- SR_hists[sample] = h.GetValue()
+ ).GetValue()
# calculate QCD estimation; here directly estimated as difference between mc and data without SS/OS
AR_hists["QCD"] = ff_func.QCD_SS_estimate(hists=AR_hists)
SR_hists["QCD"] = ff_func.QCD_SS_estimate(hists=SR_hists)
- frac_hists = dict()
-
- for p in config[process]["processes"]:
- frac_hists[p] = ff_func.calc_fraction(
- hists=AR_hists,
- target=p,
- processes=config[process]["processes"],
- )
- frac_hists = ff_func.add_fraction_variations(
- hists=frac_hists,
- processes=config[process]["processes"],
- )
-
- SR_frac_hists = dict()
+ processes_list = config[process]["processes"]
- for p in config[process]["processes"]:
- SR_frac_hists[p] = ff_func.calc_fraction(
- hists=SR_hists,
- target=p,
- processes=config[process]["processes"],
- )
- SR_frac_hists = ff_func.add_fraction_variations(
- hists=SR_frac_hists,
- processes=config[process]["processes"],
- )
+ frac_hists = get_mc_shifted_fractions(AR_hists, processes_list)
+ SR_frac_hists = get_mc_shifted_fractions(SR_hists, processes_list)
plotting.plot_fractions(
variable=process_conf["var_dependence"],
@@ -164,7 +178,7 @@ def fraction_calculation(
save_data=True,
)
- # producing some control plots
+ # producing control plots
for _hist, _region in [
(SR_hists, "SR"),
(AR_hists, "AR"),
@@ -178,7 +192,7 @@ def fraction_calculation(
process=process,
region=_region,
data="data",
- samples=ff_func.controlplot_samples(config["use_embedding"]),
+ samples=ff_func.controlplot_samples(sample_paths),
category=splitting.split,
output_path=output_path,
logger=logger,
diff --git a/adjust_binning.py b/adjust_binning.py
index 6f82f40..6e887e9 100644
--- a/adjust_binning.py
+++ b/adjust_binning.py
@@ -76,7 +76,7 @@ def _equipopulated_binned_variable(
return np.quantile(
a=item,
q=np.linspace(0, 1, n_bins + 1),
- weights=weights,
+ weights=abs(weights) if weights is not None else None,
method="linear" if weights is None else "inverted_cdf",
)
diff --git a/configs/general_definitions.py b/configs/general_definitions.py
index 681ccef..118ae89 100644
--- a/configs/general_definitions.py
+++ b/configs/general_definitions.py
@@ -11,7 +11,7 @@
default_fit_option = "poly_1"
default_correction_option = "smoothed"
-default_CMS_text = "Own work (Data/Simulation)"
+default_CMS_text = "Private work (Data/Simulation)"
VARIATIONS = namedtuple(
"Variations", [
@@ -106,19 +106,22 @@ def get_default_bandwidth(
)
# definitions for channels
-channel_dict = AutoGetDict({
- "et": r"$e\tau_{h}$",
- "mt": r"$\mu\tau_{h}$",
- "tt": r"$\tau_{h}\tau_{h}$",
- "mm": r"$\mu\mu$"})
+channel_dict = AutoGetDict(
+ {
+ "et": r"$e\tau_{h}$",
+ "mt": r"$\mu\tau_{h}$",
+ "tt": r"$\tau_{h}\tau_{h}$",
+ "mm": r"$\mu\mu$",
+ }
+)
# definitions for era and luminosity
era_dict = AutoGetDict(
{
- "2016preVFP": r"$19.5\,fb^{-1}$ (2016preVFP, 13 TeV)",
- "2016postVFP": r"$16.8\,fb^{-1}$ (2016postVFP, 13 TeV)",
- "2017": r"$41.5\,fb^{-1}$ (2017, 13 TeV)",
- "2018": r"$59.8\,fb^{-1}$ (2018, 13 TeV)",
+ "2016preVFP": r"$19.52\,fb^{-1}$ (2016preVFP, 13 TeV)",
+ "2016postVFP": r"$16.81\,fb^{-1}$ (2016postVFP, 13 TeV)",
+ "2017": r"$42.07\,fb^{-1}$ (2017, 13 TeV)",
+ "2018": r"$59.56\,fb^{-1}$ (2018, 13 TeV)",
"2022preEE": r"$7.98\,fb^{-1}$ (2022preEE, 13.6 TeV)",
"2022postEE": r"$26.67\,fb^{-1}$ (2022postEE, 13.6 TeV)",
"2023preBPix": r"$18.06\,fb^{-1}$ (2023preBPix, 13.6 TeV)",
@@ -257,7 +260,6 @@ def get_default_bandwidth(
"phi_2": r"$\phi^{\mu_2}$",
"mass_2": r"$\mu_2$ mass",
"deltaR_ditaupair": r"$\Delta R(\mu_1,\mu_2)$",
- # "tau_decaymode_2": r"$\tau_{h}^{DM}$",
**channel_indipendent_variable_dict,
}
),
diff --git a/configs/smhtt_ul/2018/common_settings.yaml b/configs/smhtt_ul/2018/common_settings.yaml
index e292125..0e6b9e6 100644
--- a/configs/smhtt_ul/2018/common_settings.yaml
+++ b/configs/smhtt_ul/2018/common_settings.yaml
@@ -1,9 +1,9 @@
-ntuple_path: "root://cmsdcache-kit-disk.gridka.de//store/user/amonsch/CROWN/ntuples/ff_and_cr_production_2018UL_mt__2025-11-03_wo_syst_v1/CROWNRun"
-output_path: /ceph/amonsch/FFmethod/smhtt_ul/ff_and_cr_production_2018UL_mt__2025-08-21_wo_syst_v2_2025-11-03
+ntuple_path: "root://cmsdcache-kit-disk.gridka.de//store/user/amonsch/CROWN/ntuples/ff_and_cr_2018UL_mt__2026-03-27__v2/CROWNRun"
+output_path: /ceph/amonsch/FFmethod/smhtt_ul/ff_and_cr_2018UL_mt__2026-03-27__v2
friends: ["fastmtt_v1"]
-file_path: /ceph/amonsch/FFmethod/smhtt_ul/ff_and_cr_production_2018UL_mt__2025-08-21_wo_syst_v2_2025-11-03
+file_path: /ceph/amonsch/FFmethod/smhtt_ul/ff_and_cr_2018UL_mt__2026-03-27__v2
-workdir_name: extended_corrections_set_with_applied_es_corrections
+workdir_name: version_2025-03-28_classic
era: '2018'
tree: ntuple
@@ -24,3 +24,9 @@ tau_vs_jet_wgt_wps:
use_embedding: true
use_center_of_mass_bins: true
+
+stat_sigma: 1.0
+
+skip_corrections_compatible_to_one: false
+skip_corrections_p_value: 0.05
+use_suppressed_mc_errors_for_correction_selection: true
diff --git a/configs/smhtt_ul/2018/corrections_mt.yaml b/configs/smhtt_ul/2018/corrections_mt.yaml
deleted file mode 100644
index 1594090..0000000
--- a/configs/smhtt_ul/2018/corrections_mt.yaml
+++ /dev/null
@@ -1,770 +0,0 @@
-templates:
- split_schemes:
- 3j: &3j_split
- split_categories:
- njets: ["==0", "==1", ">=2"]
- split_categories_binedges:
- njets: [-0.5, 0.5, 1.5, 22.5]
- correction_option:
- "==0": "smoothed"
- "==1": "smoothed"
- ">=2": "smoothed"
- bandwidth:
- "==0": 1.0
- "==1": 1.0
- ">=2": 1.0
- 2j: &2j_split
- split_categories:
- njets: ["<=1", ">=2"]
- split_categories_binedges:
- njets: [-0.5, 1.5, 22.5]
- correction_option:
- "<=1": "smoothed"
- ">=2": "smoothed"
- bandwidth:
- "<=1": 1
- ">=2": 1
- var_dependence_n_bins:
- QCD:
- equipopulated_binning_options: &QCD_var_dependence_n_bins
- var_dependence_n_bins:
- "==0": 11
- "==1": 9
- ">=2": 7
- Wjets:
- equipopulated_binning_options: &Wjets_var_dependence_n_bins
- var_dependence_n_bins:
- "==0": 15
- "==1": 11
- ">=2": 9
- ttbar:
- equipopulated_binning_options: &ttbar_var_dependence_n_bins
- var_dependence_n_bins:
- "<=1": 11
- ">=2": 15
- correction_variations__with_mc_subtraction_shift: &correction_variations__with_mc_subtraction_shift
- correction_variations:
- - StatShift
- - SystMCShift
- - SystBandAsym
- correction_variations__without_mc_subtraction_shift: &correction_variations__without_mc_subtraction_shift
- correction_variations:
- - StatShift
- - SystBandAsym
- variables:
- eta_1: &eta_1
- var_dependence: eta_1
- equipopulated_binning_options: &eta_1__eq_bin_opt
- variable_config:
- eta_1:
- min: -2.1
- max: +2.1
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 1.0
- eta_2: &eta_2
- var_dependence: eta_2
- equipopulated_binning_options: &eta_2__eq_bin_opt
- variable_config:
- eta_2:
- min: -2.5
- max: +2.5
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 1.5
- deltaEta_ditaupair: &deltaEta_ditaupair
- var_dependence: deltaEta_ditaupair
- equipopulated_binning_options: &deltaEta_ditaupair__eq_bin_opt
- variable_config:
- deltaEta_ditaupair:
- min: -4.9
- max: +4.9
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 1.75
- deltaR_ditaupair: &deltaR_ditaupair
- var_dependence: deltaR_ditaupair
- equipopulated_binning_options: &deltaR_ditaupair__eq_bin_opt
- variable_config:
- deltaR_ditaupair:
- min: 0.5
- max: 5.0
- rounding: 4
- correction_option: "smoothed"
- jeta_1: &jeta_1
- var_dependence: jeta_1
- equipopulated_binning_options: &jeta_1__eq_bin_opt
- variable_config:
- jeta_1:
- min: -5.0
- max: +5.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 2.0
- jeta_2: &jeta_2
- var_dependence: jeta_2
- equipopulated_binning_options: &jeta_2__eq_bin_opt
- variable_config:
- jeta_2:
- min: -5.0
- max: +5.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 2.0
- jpt_1: &jpt_1
- var_dependence: jpt_1
- equipopulated_binning_options: &jpt_1__eq_bin_opt
- variable_config:
- jpt_1:
- min: 30.0
- max: 150.0
- rounding: 2
- correction_option: "smoothed"
- jpt_2: &jpt_2
- var_dependence: jpt_2
- equipopulated_binning_options: &jpt_2__eq_bin_opt
- variable_config:
- jpt_2:
- min: 30.0
- max: 150.0
- rounding: 2
- correction_option: "smoothed"
- met: &met
- var_dependence: met
- equipopulated_binning_options: &met__eq_bin_opt
- variable_config:
- met:
- min: 0.0
- max: 175.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 25
- pt_tt: &pt_tt
- var_dependence: pt_tt
- equipopulated_binning_options: &pt_tt__eq_bin_opt
- variable_config:
- pt_tt:
- min: 0.0
- max: 150.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 25
- pt_ttjj: &pt_ttjj
- var_dependence: pt_ttjj
- equipopulated_binning_options: &pt_ttjj__eq_bin_opt
- variable_config:
- pt_ttjj:
- min: 0.0
- max: 150.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 25
- mt_tot: &mt_tot
- var_dependence: mt_tot
- equipopulated_binning_options: &mt_tot__eq_bin_opt
- variable_config:
- mt_tot:
- min: 0.0
- max: 250.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 40
- mass_2: &mass_2
- var_dependence: mass_2
- equipopulated_binning_options: &mass_2__eq_bin_opt
- variable_config:
- mass_2:
- min: 0.2
- max: 2.0
- rounding: 4
- add_left: [0.0]
- correction_option: "binwise#[0]+smoothed"
- bandwidth: 0.3
- tau_decaymode_2: &tau_decaymode_2
- var_dependence: tau_decaymode_2
- correction_option: "binwise"
- bandwidth: 1.0
- var_bins: [-0.5, 0.5, 9.5, 10.5, 11.5]
- nbtag: &nbtag
- var_dependence: nbtag
- correction_option: "binwise"
- bandwidth: 1.0
- var_bins: [-0.5, 0.5, 1.5, 2.5, 22.5]
- iso_1: &iso_1
- var_dependence: iso_1
- equipopulated_binning_options: &iso_1__eq_bin_opt
- variable_config:
- iso_1:
- min: 0.00005
- max: 0.15
- rounding: 6
- add_left: [0.0]
- correction_option: "binwise#[0]+smoothed"
- bandwidth: 0.02
- deltaR_1j1: &deltaR_1j1
- var_dependence: deltaR_1j1
- equipopulated_binning_options: &deltaR_1j1__eq_bin_opt
- variable_config:
- deltaR_1j1:
- min: 0.5
- max: 7.0
- rounding: 3
- correction_option: "smoothed"
- bandwidth: 0.9
- deltaR_12j1: &deltaR_12j1
- var_dependence: deltaR_12j1
- equipopulated_binning_options: &deltaR_12j1__eq_bin_opt
- variable_config:
- deltaR_12j1:
- min: 0.0
- max: 10.0
- correction_option: "smoothed"
- bandwidth: 1.25
- m_vis: &m_vis
- var_dependence: m_vis
- equipopulated_binning_options: &m_vis__eq_bin_opt
- variable_config:
- m_vis:
- min: 10.0
- max: 250.0
- rounding: 2
- correction_option: "smoothed"
- bandwidth: 30
-
-channel: mt
-target_processes:
- QCD:
- chain_DR_SR_to_non_closure: false
-
- non_closure:
- tau_decaymode_2:
- <<: [*tau_decaymode_2, *3j_split]
- correction_variations: ["StatShift", "SystMCShift"]
- eta_1:
- <<: [*eta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_1__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [0.8, 1.0, 1.0]
- var_bins:
- "==0": [-2.1, -1.56, -1.26, -0.86, -0.5, -0.14, 0.22, 0.56, 0.93, 1.3, 1.63, 2.1]
- "==1": [-2.1, -1.51, -1.13, -0.66, -0.22, 0.22, 0.7, 1.15, 1.54, 2.1]
- ">=2": [-2.1, -1.37, -0.82, -0.33, 0.23, 0.81, 1.37, 2.1]
- eta_2:
- <<: [*eta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_2__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [0.9, 1.1, 1.5]
- var_bins:
- "==0": [-2.5, -1.68, -1.28, -0.92, -0.57, -0.18, 0.24, 0.62, 0.99, 1.31, 1.78, 2.5]
- "==1": [-2.5, -1.67, -1.19, -0.73, -0.25, 0.28, 0.72, 1.17, 1.59, 2.5]
- ">=2": [-2.5, -1.41, -0.87, -0.21, 0.4, 0.94, 1.55, 2.5]
- jeta_1:
- <<: [*jeta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_1__eq_bin_opt, *QCD_var_dependence_n_bins]
- var_dependence_n_bins: [1, 9, 7]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 2.5, 2.5]
- var_bins:
- "==0": [-5.0, 5.0]
- "==1": [-5.0, -2.73, -1.63, -0.92, -0.28, 0.34, 0.92, 1.65, 2.61, 5.0]
- ">=2": [-5.0, -2.02, -1.07, -0.33, 0.31, 0.99, 1.98, 5.0]
- jeta_2:
- <<: [*jeta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_2__eq_bin_opt, *QCD_var_dependence_n_bins]
- var_dependence_n_bins: [1, 1, 7]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 4]
- var_bins:
- "==0": [-5.0, 5.0]
- "==1": [-5.0, 5.0]
- ">=2": [-5.0, -2.1, -1.13, -0.36, 0.33, 1.16, 2.2, 5.0]
- jpt_1:
- <<: [*jpt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_1__eq_bin_opt, *QCD_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 50.0, 60.0]
- var_bins:
- "==0": [30.0, 150.0]
- "==1": [30.0, 31.86, 34.0, 36.75, 40.41, 45.06, 51.5, 61.09, 150.0]
- ">=2": [30.0, 42.38, 51.03, 59.81, 71.12, 86.0, 150.0]
- jpt_2:
- <<: [*jpt_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_2__eq_bin_opt, *QCD_var_dependence_n_bins]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 45.0]
- var_bins:
- "==0": [30.0, 150.0]
- "==1": [30.0, 150.0]
- ">=2": [30.0, 32.44, 35.62, 39.5, 45.22, 54.28, 150.0]
- met:
- <<: [*met, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*met__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [25.0, 45.0, 60.0]
- var_bins:
- "==0": [0.0, 8.43, 12.17, 15.46, 18.63, 21.79, 25.19, 29.1, 33.7, 39.63, 48.85, 150.0]
- "==1": [0.0, 10.73, 15.77, 20.26, 25.23, 30.93, 37.33, 46.39, 60.81, 150.0]
- ">=2": [0.0, 15.33, 23.41, 32.63, 41.85, 54.06, 76.44, 150.0]
- deltaEta_ditaupair:
- <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaEta_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [1.25, 1.5, 2.0]
- var_bins:
- "==0": [-4.9, -2.13, -1.42, -0.93, -0.53, -0.18, 0.16, 0.56, 0.96, 1.43, 2.08, 4.9]
- "==1": [-4.9, -2.0, -1.23, -0.65, -0.23, 0.25, 0.72, 1.28, 2.04, 4.9]
- ">=2": [-4.9, -1.9, -1.03, -0.39, 0.26, 0.87, 1.76, 4.9]
- deltaR_ditaupair:
- <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [0.8, 1.1, 1.2]
- var_bins:
- "==0": [0.5, 2.6814, 2.869, 2.9787, 3.0526, 3.1104, 3.1696, 3.2609, 3.3865, 3.5747, 3.9186, 5.0]
- "==1": [0.5, 1.653, 2.227, 2.5243, 2.7304, 2.9125, 3.0715, 3.2696, 3.6723, 5.0]
- ">=2": [0.5, 1.3291, 1.9185, 2.3753, 2.7661, 3.0644, 3.4088, 5.0]
- deltaR_1j1:
- <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_1j1__eq_bin_opt, *QCD_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- var_bins:
- "==0": [0.5, 7.0]
- "==1": [0.5, 1.67, 2.205, 2.51, 2.774, 3.019, 3.235, 3.586, 4.235, 7.0]
- ">=2": [0.5, 1.825, 2.382, 2.761, 3.009, 3.272, 3.759, 7.0]
- deltaR_12j1:
- <<: [*deltaR_12j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_12j1__eq_bin_opt, *QCD_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 1.5, 1.5]
- var_bins:
- "==0": [0.0, 10.0]
- "==1": [0.0, 2.28, 2.73, 2.96, 3.11, 3.27, 3.55, 3.98, 4.87, 10.0]
- ">=2": [0.0, 2.26, 2.72, 2.95, 3.14, 3.46, 4.06, 10.0]
- pt_ttjj:
- <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*pt_ttjj__eq_bin_opt, *QCD_var_dependence_n_bins]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 35.0]
- var_bins:
- "==0": [0.0, 150.0]
- "==1": [0.0, 150.0]
- ">=2": [0.0, 15.52, 22.28, 30.06, 38.0, 49.31, 69.77, 150.0]
- mass_2:
- <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mass_2__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [0.25, 0.35, 0.4]
- var_bins:
- "==0": [0.0, 0.2, 0.5107, 0.6216, 0.7388, 0.835, 0.9121, 0.9941, 1.0732, 1.1543, 1.25, 1.3623, 2.0]
- "==1": [0.0, 0.2, 0.5322, 0.665, 0.79, 0.896, 1.0088, 1.0957, 1.1982, 1.3271, 2.0]
- ">=2": [0.0, 0.2, 0.564, 0.7314, 0.8735, 0.9932, 1.1123, 1.2715, 2.0]
- mt_tot:
- <<: [*mt_tot, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mt_tot__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [25, 50, 60]
- var_bins:
- "==0": [50.0, 72.17, 77.52, 81.84, 85.76, 89.79, 94.23, 99.24, 105.25, 113.61, 130.06, 250.0]
- "==1": [0.0, 59.33, 72.18, 80.25, 87.75, 95.78, 103.33, 114.05, 132.26, 250.0]
- ">=2": [0.0, 53.08, 71.56, 83.98, 96.25, 111.26, 137.98, 250.0]
- m_vis:
- <<: [*m_vis, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*m_vis__eq_bin_opt, *QCD_var_dependence_n_bins]
- bandwidth: [25, 40.0, 60.0]
- var_bins:
- "==0": [10.0, 62.57, 66.96, 71.28, 75.86, 80.81, 86.89, 94.51, 105.32, 121.43, 147.86, 250.0]
- "==1": [10.0, 52.87, 64.52, 71.76, 79.13, 87.73, 99.12, 117.93, 148.5, 250.0]
- ">=2": [10.0, 50.01, 66.18, 78.78, 94.87, 113.93, 146.55, 250.0]
- iso_1:
- <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: *QCD_var_dependence_n_bins
- variable_config:
- iso_1:
- min: 0.05
- max: 0.15
- rounding: 6
- bandwidth: [0.04, 0.04, 0.04]
- correction_option: "smoothed"
- var_bins:
- "==0": [0.05, 0.056882, 0.063543, 0.070468, 0.078181, 0.086647, 0.095377, 0.105744, 0.115638, 0.126327, 0.137486, 0.15]
- "==1": [0.05, 0.057096, 0.065105, 0.073987, 0.083517, 0.096737, 0.108642, 0.122276, 0.13489, 0.15]
- ">=2": [0.05, 0.059351, 0.068657, 0.081325, 0.096132, 0.110882, 0.128247, 0.15]
-
- DR_SR:
- <<: [*pt_tt, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*pt_tt__eq_bin_opt, *QCD_var_dependence_n_bins]
- var_dependence_n_bins:
- "==0": 9
- "==1": 7
- ">=2": 5
- bandwidth: [25.0, 40.0, 40.0]
- var_bins:
- "==0": [0.0, 8.28, 12.42, 16.22, 19.91, 23.9, 28.63, 34.4, 43.76, 150.0]
- "==1": [0.0, 25.39, 36.58, 46.12, 55.76, 68.1, 86.99, 150.0]
- ">=2": [0.0, 38.69, 61.59, 82.53, 106.93, 150.0]
- SRlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
- lep_mt: (mt_1 < 50)
- nbtag: (nbtag >= 0)
- tau_pair_sign: ((q_1*q_2) > 0)
- lep_iso: "(!((iso_1 >= 0.05) && (iso_1 <= 0.15)))"
- ARlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
- lep_mt: (mt_1 < 50)
- nbtag: (nbtag >= 0)
- tau_pair_sign: ((q_1*q_2) > 0)
- lep_iso: "(!((iso_1 >= 0.05) && (iso_1 <= 0.15)))"
- AR_SR_cuts:
- lep_mt: (mt_1 < 70)
- nbtag: (nbtag >= 0)
- tau_pair_sign: ((q_1*q_2) < 0)
- lep_iso: "(!((iso_1 >= 0.05) && (iso_1 <= 0.15)))"
- non_closure:
- nbtag:
- <<: [*nbtag, *3j_split]
- correction_variations: ["StatShift", "SystMCShift"]
-
- Wjets:
- chain_DR_SR_to_non_closure: false
-
- non_closure:
- tau_decaymode_2:
- <<: [*tau_decaymode_2, *3j_split]
- correction_variations: ["StatShift", "SystMCShift"]
- eta_1:
- <<: [*eta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- var_dependence_n_bins:
- "==0": 15
- "==1": 11
- ">=2": 5
- bandwidth: [0.65, 0.85, 1.25]
- var_bins:
- "==0": [-2.1, -1.73, -1.43, -1.15, -0.88, -0.62, -0.38, -0.12, 0.12, 0.38, 0.64, 0.89, 1.16, 1.45, 1.75, 2.1]
- "==1": [-2.1, -1.61, -1.22, -0.85, -0.49, -0.13, 0.2, 0.55, 0.89, 1.25, 1.64, 2.1]
- ">=2": [-2.1, -1.1, -0.33, 0.39, 1.16, 2.1]
- eta_2:
- <<: [*eta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [0.6, 0.9, 1.2]
- var_bins:
- "==0": [-2.5, -1.64, -1.28, -1.03, -0.78, -0.55, -0.32, -0.09, 0.15, 0.37, 0.58, 0.81, 1.03, 1.29, 1.67, 2.5]
- "==1": [-2.5, -1.52, -1.12, -0.78, -0.43, -0.11, 0.22, 0.51, 0.82, 1.14, 1.55, 2.5]
- ">=2": [-2.5, -1.41, -0.95, -0.55, -0.14, 0.25, 0.63, 1.0, 1.43, 2.5]
- jeta_1:
- <<: [*jeta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- var_dependence_n_bins: [1, 9, 7]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 2.0, 2.0]
- var_bins:
- "==0": [-5.0, 5.0]
- "==1": [-5.0, -2.71, -1.69, -0.94, -0.3, 0.34, 1.0, 1.77, 2.77, 5.0]
- ">=2": [-5.0, -2.03, -1.07, -0.35, 0.35, 1.08, 2.07, 5.0]
- jeta_2:
- <<: [*jeta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
- var_dependence_n_bins: [1, 1, 7]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 3.0]
- var_bins:
- "==0": [-5.0, 5.0]
- "==1": [-5.0, 5.0]
- ">=2": [-5.0, -2.35, -1.22, -0.36, 0.39, 1.2, 2.28, 5.0]
- jpt_1:
- <<: [*jpt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 40.0, 55.0]
- var_bins:
- "==0": [30.0, 150.0]
- "==1": [30.0, 31.75, 33.75, 36.06, 38.91, 42.56, 46.97, 52.62, 59.94, 70.38, 150.0]
- ">=2": [30.0, 40.97, 48.16, 55.28, 63.12, 71.88, 82.69, 97.0, 150.0]
- jpt_2:
- <<: [*jpt_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 45.0]
- var_bins:
- "==0": [30.0, 150.0]
- "==1": [30.0, 150.0]
- ">=2": [30.0, 31.98, 34.44, 37.22, 41.0, 45.97, 52.78, 62.31, 150.0]
- met:
- <<: [*met, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*met__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [20.0, 30.0, 60.0]
- var_bins:
- "==0": [0.0, 26.44, 31.36, 35.36, 38.87, 42.19, 45.55, 48.79, 52.11, 55.63, 59.36, 63.63, 68.67, 74.99, 85.01, 150.0]
- "==1": [0.0, 30.7, 37.25, 42.61, 47.21, 52.07, 57.24, 62.89, 69.93, 79.35, 94.81, 150.0]
- ">=2": [0.0, 34.6, 43.53, 51.39, 58.87, 67.67, 77.63, 91.67, 113.58, 150.0]
- deltaEta_ditaupair:
- <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaEta_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [0.8, 1.1, 1.25]
- var_bins:
- "==0": [-4.9, -1.9, -1.36, -1.02, -0.75, -0.52, -0.31, -0.11, 0.09, 0.3, 0.51, 0.74, 1.0, 1.34, 1.87, 4.9]
- "==1": [-4.9, -1.75, -1.16, -0.75, -0.43, -0.14, 0.14, 0.43, 0.74, 1.14, 1.74, 4.9]
- ">=2": [-4.9, -1.56, -0.96, -0.53, -0.16, 0.19, 0.54, 0.95, 1.54, 4.9]
- deltaR_ditaupair:
- <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [0.5, 0.6, 0.7]
- var_bins:
- "==0": [0.5, 1.291, 1.6664, 1.9482, 2.1706, 2.3528, 2.5196, 2.6606, 2.783, 2.8902, 2.9821, 3.0643, 3.1321, 3.2287, 3.4481, 5.0]
- "==1": [0.5, 1.1946, 1.5966, 1.9154, 2.1746, 2.4058, 2.6139, 2.801, 2.9638, 3.1049, 3.3191, 5.0]
- ">=2": [0.5, 1.1567, 1.5816, 1.9338, 2.2206, 2.4933, 2.7366, 2.9631, 3.1951, 5.0]
- deltaR_1j1:
- <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_1j1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 1.0, 1.2]
- var_bins:
- "==0": [0.5, 7.0]
- "==1": [0.5, 1.257, 1.688, 2.026, 2.312, 2.572, 2.811, 3.037, 3.282, 3.69, 4.342, 7.0]
- ">=2": [0.5, 1.242, 1.733, 2.12, 2.446, 2.72, 2.978, 3.261, 3.842, 7.0]
- deltaR_12j1:
- <<: [*deltaR_12j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_12j1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- correction_option: ["skip", "smoothed", "smoothed"]
- bandwidth: [1.0, 1.5, 1.75]
- var_bins:
- "==0": [0.0, 10.0]
- "==1": [0.0, 1.78, 2.26, 2.55, 2.77, 2.94, 3.1, 3.29, 3.58, 4.02, 4.82, 10.0]
- ">=2": [0.0, 1.78, 2.28, 2.61, 2.84, 3.04, 3.23, 3.57, 4.21, 10.0]
- pt_ttjj:
- <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*pt_ttjj__eq_bin_opt, *Wjets_var_dependence_n_bins]
- correction_option: ["skip", "skip", "smoothed"]
- bandwidth: [1.0, 1.0, 30]
- var_bins:
- "==0": [0.0, 150.0]
- "==1": [0.0, 150.0]
- ">=2": [0.0, 13.13, 19.85, 25.75, 31.71, 38.56, 46.7, 57.79, 75.16, 150.0]
- mass_2:
- <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mass_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [0.225, 0.225, 0.25]
- var_bins:
- "==0": [0.0, 0.2, 0.4827, 0.5635, 0.6396, 0.707, 0.7681, 0.8281, 0.8857, 0.9453, 1.0078, 1.0713, 1.1377, 1.2109, 1.2949, 1.4092, 2.0]
- "==1": [0.0, 0.2, 0.5088, 0.6177, 0.7119, 0.7959, 0.8696, 0.9551, 1.0381, 1.1279, 1.2285, 1.3545, 2.0]
- ">=2": [0.0, 0.2, 0.5308, 0.6577, 0.7612, 0.8506, 0.957, 1.0625, 1.1758, 1.3076, 2.0]
- mt_tot:
- <<: [*mt_tot, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mt_tot__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [20.0, 30.0, 50.0]
- var_bins:
- "==0": [90.0, 107.0, 114.05, 118.88, 122.8, 126.47, 129.96, 133.5, 137.38, 141.66, 146.66, 152.62, 160.27, 171.17, 189.87, 250.0]
- "==1": [80.0, 105.43, 113.18, 120.11, 126.5, 133.37, 140.45, 148.83, 158.49, 171.81, 194.95, 250.0]
- ">=2": [80.0, 107.35, 119.32, 129.92, 139.77, 150.85, 163.8, 180.28, 204.68, 250.0]
- m_vis:
- <<: [*m_vis, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*m_vis__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [20, 30, 40]
- var_bins:
- "==0": [10.0, 42.86, 56.57, 65.25, 71.44, 76.81, 82.65, 88.89, 94.81, 101.38, 108.76, 117.89, 129.67, 146.41, 173.6, 250.0]
- "==1": [10.0, 47.9, 61.79, 70.4, 78.55, 87.62, 97.03, 108.32, 123.11, 142.49, 172.66, 250.0]
- ">=2": [10.0, 52.86, 67.26, 77.69, 90.57, 104.12, 120.68, 142.58, 175.33, 250.0]
- iso_1:
- <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*iso_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
- bandwidth: [0.03, 0.04, 0.05]
- var_bins:
- "==0": [0.0, 0.00005, 0.004932, 0.007516, 0.010087, 0.012785, 0.015814, 0.019493, 0.023479, 0.028324, 0.034053, 0.040962, 0.049714, 0.060662, 0.075963, 0.100649, 0.15]
- "==1": [0.0, 0.00005, 0.005621, 0.009072, 0.012884, 0.017279, 0.022713, 0.029417, 0.037605, 0.049066, 0.064637, 0.090462, 0.15]
- ">=2": [0.0, 0.00005, 0.005602, 0.009668, 0.014723, 0.020748, 0.028099, 0.039254, 0.055474, 0.082078, 0.15]
-
- DR_SR:
- <<: [*pt_tt, *3j_split, *correction_variations__with_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*pt_tt__eq_bin_opt, *Wjets_var_dependence_n_bins]
- var_dependence_n_bins:
- "==0": 13
- "==1": 9
- ">=2": 7
- bandwidth: [25.0, 40.0, 50.0]
- var_bins:
- "==0": [0.0, 6.74, 9.78, 12.39, 14.84, 17.3, 19.85, 22.57, 25.6, 29.06, 33.27, 38.82, 47.6, 150.0]
- "==1": [0.0, 23.29, 32.66, 40.35, 48.0, 55.9, 65.43, 78.07, 98.38, 150.0]
- ">=2": [0.0, 32.56, 49.55, 65.25, 80.72, 98.18, 119.72, 150.0]
- SRlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
- lep_mt: (mt_1 > 0)
- nbtag: (nbtag == 0)
- tau_pair_sign: ((q_1*q_2) < 0)
- lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
- ARlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
- lep_mt: (mt_1 > 0)
- nbtag: (nbtag == 0)
- tau_pair_sign: ((q_1*q_2) < 0)
- lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
- AR_SR_cuts:
- lep_mt: (mt_1 < 70)
- nbtag: (nbtag >= 0)
- tau_pair_sign: ((q_1*q_2) < 0)
- lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
- non_closure:
- nbtag:
- <<: [*nbtag, *3j_split]
- var_bins: [-0.5, 0.5, 22.5]
- correction_variations: ["StatShift", "SystMCShift"]
-
- ttbar:
- non_closure:
- tau_decaymode_2:
- <<: [*tau_decaymode_2, *2j_split]
- correction_variations: ["StatShift"]
- eta_1:
- <<: [*eta_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [0.6, 1.0]
- var_bins:
- "<=1": [-2.1, -1.49, -1.13, -0.8, -0.46, -0.13, 0.11, 0.46, 0.78, 1.18, 1.51, 2.1]
- ">=2": [-2.1, -1.66, -1.38, -1.07, -0.82, -0.6, -0.37, -0.09, 0.1, 0.32, 0.57, 0.82, 1.05, 1.31, 1.7, 2.1]
- eta_2:
- <<: [*eta_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*eta_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [0.7, 0.7]
- var_bins:
- "<=1": [-2.5, -1.54, -1.03, -0.71, -0.36, -0.05, 0.26, 0.58, 0.87, 1.2, 1.64, 2.5]
- ">=2": [-2.5, -1.82, -1.52, -1.2, -0.95, -0.65, -0.39, -0.16, 0.1, 0.39, 0.65, 0.91, 1.17, 1.47, 1.83, 2.5]
- jeta_1:
- <<: [*jeta_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- correction_option: "smoothed"
- bandwidth: [1.0, 2.0]
- var_bins:
- "<=1": [-5.0, -1.81, -1.27, -0.75, -0.42, -0.17, 0.3, 0.86, 1.39, 1.75, 2.2, 5.0]
- ">=2": [-5.0, -2.17, -1.71, -1.34, -1.06, -0.68, -0.4, -0.21, 0.09, 0.36, 0.66, 0.99, 1.36, 1.74, 2.15, 5.0]
- jeta_2:
- <<: [*jeta_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jeta_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
- var_dependence_n_bins: [1, 15]
- correction_option: ["skip", "smoothed"]
- var_bins:
- "<=1": [-5.0, 5.0]
- ">=2": [-5.0, -2.29, -1.78, -1.35, -1.03, -0.77, -0.42, -0.08, 0.18, 0.43, 0.69, 1.02, 1.35, 1.71, 2.3, 5.0]
- jpt_1:
- <<: [*jpt_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [30.0, 20.0]
- var_bins:
- "<=1": [30.0, 32.38, 35.75, 39.12, 41.78, 47.94, 53.25, 60.53, 67.19, 80.25, 98.12, 150.0]
- ">=2": [30.0, 44.72, 49.97, 55.5, 60.47, 65.44, 69.75, 74.75, 80.44, 87.81, 93.75, 100.25, 108.81, 120.31, 133.88, 150.0]
- jpt_2:
- <<: [*jpt_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*jpt_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [30.0, 30.0]
- correction_option: ["skip", "smoothed"]
- var_bins:
- "<=1": [30.0, 150.0]
- ">=2": [30.0, 32.59, 35.66, 38.44, 40.5, 43.03, 45.84, 49.53, 52.69, 56.81, 62.22, 68.44, 75.0, 84.88, 104.12, 150.0]
- met:
- <<: [*met, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*met__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [40.0, 35.0]
- var_bins:
- "<=1": [0.0, 9.22, 13.78, 18.12, 21.91, 25.8, 29.35, 34.7, 41.03, 48.98, 64.82, 150.0]
- ">=2": [0.0, 10.67, 16.47, 21.16, 26.32, 30.32, 35.4, 40.28, 45.93, 51.92, 58.67, 68.58, 77.57, 90.43, 114.09, 150.0]
- deltaEta_ditaupair:
- <<: [*deltaEta_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaEta_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [1.5, 1.75]
- var_bins:
- "<=1": [-4.9, -1.79, -1.25, -0.8, -0.45, -0.21, 0.1, 0.35, 0.7, 1.02, 1.58, 4.9]
- ">=2": [-4.9, -2.13, -1.5, -1.12, -0.84, -0.59, -0.35, -0.11, 0.1, 0.36, 0.6, 0.86, 1.17, 1.55, 2.08, 4.9]
- deltaR_ditaupair:
- <<: [*deltaR_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [0.75, 1.0]
- var_bins:
- "<=1": [0.5, 1.3127, 1.8146, 2.1011, 2.4094, 2.5761, 2.7567, 2.8974, 3.0268, 3.135, 3.3705, 5.0]
- ">=2": [0.5, 0.8992, 1.2307, 1.4672, 1.6895, 1.8938, 2.0824, 2.2625, 2.4282, 2.5938, 2.7408, 2.9351, 3.0912, 3.2107, 3.5425, 5.0]
- deltaR_1j1:
- <<: [*deltaR_1j1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_1j1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- var_bins:
- "<=1": [0.5, 1.463, 1.926, 2.258, 2.439, 2.576, 2.729, 2.877, 3.041, 3.194, 3.576, 7.0]
- ">=2": [0.5, 1.184, 1.565, 1.866, 2.105, 2.308, 2.508, 2.634, 2.772, 2.901, 3.014, 3.105, 3.252, 3.49, 3.886, 7.0]
- deltaR_12j1:
- <<: [*deltaR_12j1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*deltaR_12j1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- correction_option: "smoothed"
- bandwidth: [1.5, 1.5]
- var_bins:
- "<=1": [0.0, 1.95, 2.47, 2.74, 2.89, 2.98, 3.06, 3.14, 3.29, 3.62, 4.16, 10.0]
- ">=2": [0.0, 1.52, 2.0, 2.26, 2.46, 2.6, 2.72, 2.84, 2.95, 3.04, 3.15, 3.24, 3.39, 3.64, 4.21, 10.0]
- pt_ttjj:
- <<: [*pt_ttjj, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*pt_ttjj__eq_bin_opt, *ttbar_var_dependence_n_bins]
- correction_option: ["skip", "smoothed"]
- var_bins:
- "<=1": [0.0, 150.0]
- ">=2": [0.0, 12.41, 17.29, 22.62, 27.07, 31.94, 36.75, 41.57, 45.61, 50.55, 57.19, 63.8, 72.66, 84.61, 100.26, 150.0]
- mass_2:
- <<: [*mass_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mass_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [0.225, 0.175]
- var_bins:
- "<=1": [0.0, 0.2, 0.5444, 0.6445, 0.728, 0.8008, 0.8672, 0.9683, 1.0303, 1.1143, 1.2314, 1.3623, 2.0]
- ">=2": [0.0, 0.2, 0.4619, 0.5469, 0.6182, 0.6982, 0.7715, 0.8232, 0.8687, 0.938, 0.9937, 1.0635, 1.1367, 1.2285, 1.3125, 1.3984, 2.0]
- mt_tot:
- <<: [*mt_tot, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*mt_tot__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [35.0, 40.0]
- var_bins:
- "<=1": [0.0, 63.54, 78.85, 85.94, 92.79, 98.62, 104.47, 115.13, 125.29, 139.43, 162.49, 250.0]
- ">=2": [0.0, 49.93, 64.75, 75.18, 84.76, 92.22, 99.33, 105.61, 111.19, 118.26, 125.95, 136.61, 150.02, 166.48, 192.71, 250.0]
- m_vis:
- <<: [*m_vis, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*m_vis__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [20, 20]
- var_bins:
- "<=1": [10.0, 55.86, 65.63, 72.36, 79.46, 86.85, 92.84, 101.48, 112.62, 129.11, 156.72, 250.0]
- ">=2": [10.0, 38.19, 49.23, 58.54, 66.52, 73.73, 80.35, 87.79, 94.93, 103.08, 111.97, 122.64, 136.18, 155.85, 185.32, 250.0]
- iso_1:
- <<: [*iso_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
- equipopulated_binning_options:
- <<: [*iso_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
- bandwidth: [0.05, 0.05]
- var_bins:
- "<=1": [0.0, 0.00005, 0.006302, 0.010062, 0.015117, 0.019868, 0.026705, 0.034487, 0.042188, 0.053464, 0.070283, 0.09919, 0.15]
- ">=2": [0.0, 0.00005, 0.00474, 0.0073, 0.009086, 0.012093, 0.015567, 0.01926, 0.024698, 0.029262, 0.036078, 0.044737, 0.054924, 0.067122, 0.08194, 0.107338, 0.15]
-
\ No newline at end of file
diff --git a/configs/smhtt_ul/2018/corrections_mt.yaml b/configs/smhtt_ul/2018/corrections_mt.yaml
new file mode 120000
index 0000000..a7cd0ae
--- /dev/null
+++ b/configs/smhtt_ul/2018/corrections_mt.yaml
@@ -0,0 +1 @@
+corrections_mt.yaml.classic
\ No newline at end of file
diff --git a/configs/smhtt_ul/2018/corrections_mt.yaml.classic b/configs/smhtt_ul/2018/corrections_mt.yaml.classic
new file mode 100644
index 0000000..6af3500
--- /dev/null
+++ b/configs/smhtt_ul/2018/corrections_mt.yaml.classic
@@ -0,0 +1,802 @@
+templates:
+ split_schemes:
+ 3j: &3j_split
+ split_categories:
+ njets: ["==0", "==1", ">=2"]
+ split_categories_binedges:
+ njets: [-0.5, 0.5, 1.5, 22.5]
+ correction_option:
+ "==0": "smoothed"
+ "==1": "smoothed"
+ ">=2": "smoothed"
+ bandwidth:
+ "==0": 1.0
+ "==1": 1.0
+ ">=2": 1.0
+ 2j: &2j_split
+ split_categories:
+ njets: ["<=1", ">=2"]
+ split_categories_binedges:
+ njets: [-0.5, 1.5, 22.5]
+ correction_option:
+ "<=1": "smoothed"
+ ">=2": "smoothed"
+ bandwidth:
+ "<=1": 1
+ ">=2": 1
+ var_dependence_n_bins:
+ QCD:
+ equipopulated_binning_options: &QCD_var_dependence_n_bins
+ var_dependence_n_bins:
+ "==0": 11
+ "==1": 9
+ ">=2": 7
+ Wjets:
+ equipopulated_binning_options: &Wjets_var_dependence_n_bins
+ var_dependence_n_bins:
+ "==0": 15
+ "==1": 11
+ ">=2": 9
+ ttbar:
+ equipopulated_binning_options: &ttbar_var_dependence_n_bins
+ var_dependence_n_bins:
+ "<=1": 11
+ ">=2": 15
+ correction_variations__with_mc_subtraction_shift: &correction_variations__with_mc_subtraction_shift
+ correction_variations:
+ - StatShift
+ - SystMCShift
+ - SystBandAsym
+ correction_variations__without_mc_subtraction_shift: &correction_variations__without_mc_subtraction_shift
+ correction_variations:
+ - StatShift
+ - SystBandAsym
+ variables:
+ eta_1: &eta_1
+ var_dependence: eta_1
+ equipopulated_binning_options: &eta_1__eq_bin_opt
+ variable_config:
+ eta_1:
+ min: -2.1
+ max: +2.1
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.0
+ eta_2: &eta_2
+ var_dependence: eta_2
+ equipopulated_binning_options: &eta_2__eq_bin_opt
+ variable_config:
+ eta_2:
+ min: -2.5
+ max: +2.5
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.5
+ deltaEta_ditaupair: &deltaEta_ditaupair
+ var_dependence: deltaEta_ditaupair
+ equipopulated_binning_options: &deltaEta_ditaupair__eq_bin_opt
+ variable_config:
+ deltaEta_ditaupair:
+ min: -4.9
+ max: +4.9
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.75
+ deltaR_ditaupair: &deltaR_ditaupair
+ var_dependence: deltaR_ditaupair
+ equipopulated_binning_options: &deltaR_ditaupair__eq_bin_opt
+ variable_config:
+ deltaR_ditaupair:
+ min: 0.5
+ max: 5.0
+ rounding: 4
+ correction_option: "smoothed"
+ jeta_1: &jeta_1
+ var_dependence: jeta_1
+ equipopulated_binning_options: &jeta_1__eq_bin_opt
+ variable_config:
+ jeta_1:
+ min: -5.0
+ max: +5.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 2.0
+ jeta_2: &jeta_2
+ var_dependence: jeta_2
+ equipopulated_binning_options: &jeta_2__eq_bin_opt
+ variable_config:
+ jeta_2:
+ min: -5.0
+ max: +5.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 2.0
+ jpt_1: &jpt_1
+ var_dependence: jpt_1
+ equipopulated_binning_options: &jpt_1__eq_bin_opt
+ variable_config:
+ jpt_1:
+ min: 30.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ jpt_2: &jpt_2
+ var_dependence: jpt_2
+ equipopulated_binning_options: &jpt_2__eq_bin_opt
+ variable_config:
+ jpt_2:
+ min: 30.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ met: &met
+ var_dependence: met
+ equipopulated_binning_options: &met__eq_bin_opt
+ variable_config:
+ met:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ mt_1: &mt_1
+ var_dependence: mt_1
+ equipopulated_binning_options: &mt_1__eq_bin_opt
+ variable_config:
+ mt_1:
+ min: 0.0
+ max: 70.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 20
+ pt_tt: &pt_tt
+ var_dependence: pt_tt
+ equipopulated_binning_options: &pt_tt__eq_bin_opt
+ variable_config:
+ pt_tt:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ pt_ttjj: &pt_ttjj
+ var_dependence: pt_ttjj
+ equipopulated_binning_options: &pt_ttjj__eq_bin_opt
+ variable_config:
+ pt_ttjj:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ mt_tot: &mt_tot
+ var_dependence: mt_tot
+ equipopulated_binning_options: &mt_tot__eq_bin_opt
+ variable_config:
+ mt_tot:
+ min: 0.0
+ max: 250.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 40
+ mass_2: &mass_2
+ var_dependence: mass_2
+ equipopulated_binning_options: &mass_2__eq_bin_opt
+ variable_config:
+ mass_2:
+ min: 0.2
+ max: 2.0
+ rounding: 4
+ add_left: [0.0]
+ correction_option: "binwise#[0]+smoothed"
+ bandwidth: 0.3
+ tau_decaymode_2: &tau_decaymode_2
+ var_dependence: tau_decaymode_2
+ correction_option: "binwise"
+ bandwidth: 1.0
+ var_bins: [-0.5, 0.5, 9.5, 10.5, 11.5]
+ iso_1: &iso_1
+ var_dependence: iso_1
+ equipopulated_binning_options: &iso_1__eq_bin_opt
+ variable_config:
+ iso_1:
+ min: 0.00005
+ max: 0.15
+ rounding: 6
+ add_left: [0.0]
+ correction_option: "binwise#[0]+smoothed"
+ bandwidth: 0.02
+ deltaR_1j1: &deltaR_1j1
+ var_dependence: deltaR_1j1
+ equipopulated_binning_options: &deltaR_1j1__eq_bin_opt
+ variable_config:
+ deltaR_1j1:
+ min: 0.5
+ max: 7.0
+ rounding: 3
+ correction_option: "smoothed"
+ bandwidth: 0.9
+ deltaR_12j1: &deltaR_12j1
+ var_dependence: deltaR_12j1
+ equipopulated_binning_options: &deltaR_12j1__eq_bin_opt
+ variable_config:
+ deltaR_12j1:
+ min: 0.0
+ max: 10.0
+ correction_option: "smoothed"
+ bandwidth: 1.25
+ m_vis: &m_vis
+ var_dependence: m_vis
+ equipopulated_binning_options: &m_vis__eq_bin_opt
+ variable_config:
+ m_vis:
+ min: 40.0
+ max: 250.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 30
+
+channel: mt
+target_processes:
+ QCD:
+ chain_DR_SR_to_non_closure: true
+
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *3j_split]
+ correction_variations: ["StatShift", "SystMCShift"]
+ eta_1:
+ <<: [*eta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.8, 1.0, 1.0]
+ var_bins:
+ "==0": [-2.1, -1.56, -1.26, -0.86, -0.5, -0.14, 0.22, 0.56, 0.93, 1.3, 1.63, 2.1]
+ "==1": [-2.1, -1.51, -1.13, -0.66, -0.22, 0.22, 0.7, 1.15, 1.54, 2.1]
+ ">=2": [-2.1, -1.37, -0.82, -0.33, 0.23, 0.81, 1.37, 2.1]
+ eta_2:
+ <<: [*eta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_2__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.9, 1.1, 1.5]
+ var_bins:
+ "==0": [-2.5, -1.68, -1.28, -0.92, -0.57, -0.18, 0.24, 0.62, 0.99, 1.31, 1.78, 2.5]
+ "==1": [-2.5, -1.67, -1.19, -0.73, -0.25, 0.28, 0.72, 1.17, 1.59, 2.5]
+ ">=2": [-2.5, -1.41, -0.87, -0.21, 0.4, 0.94, 1.55, 2.5]
+ jeta_1:
+ <<: [*jeta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ var_dependence_n_bins: [1, 9, 7]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 2.5, 2.5]
+ var_bins:
+ "==0": [-5.0, 5.0]
+ "==1": [-5.0, -2.73, -1.63, -0.92, -0.28, 0.34, 0.92, 1.65, 2.61, 5.0]
+ ">=2": [-5.0, -2.02, -1.07, -0.33, 0.31, 0.99, 1.98, 5.0]
+ jeta_2:
+ <<: [*jeta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_2__eq_bin_opt, *QCD_var_dependence_n_bins]
+ var_dependence_n_bins: [1, 1, 7]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 4]
+ var_bins:
+ "==0": [-5.0, 5.0]
+ "==1": [-5.0, 5.0]
+ ">=2": [-5.0, -2.1, -1.13, -0.36, 0.33, 1.16, 2.2, 5.0]
+ jpt_1:
+ <<: [*jpt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 50.0, 60.0]
+ var_bins:
+ "==0": [30.0, 150.0]
+ "==1": [30.0, 31.86, 34.0, 36.75, 40.41, 45.06, 51.5, 61.09, 78.06, 150.0]
+ ">=2": [30.0, 42.38, 51.03, 59.81, 71.12, 86.0, 110.12, 150.0]
+ jpt_2:
+ <<: [*jpt_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_2__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 45.0]
+ var_bins:
+ "==0": [30.0, 150.0]
+ "==1": [30.0, 150.0]
+ ">=2": [30.0, 32.44, 35.62, 39.5, 45.22, 54.28, 70.0, 150.0]
+ met:
+ <<: [*met, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*met__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [25.0, 45.0, 60.0]
+ var_bins:
+ "==0": [0.0, 8.43, 12.17, 15.46, 18.63, 21.79, 25.19, 29.1, 33.7, 39.62, 48.82, 150.0]
+ "==1": [0.0, 10.72, 15.73, 20.25, 25.21, 30.93, 37.27, 46.11, 60.71, 150.0]
+ ">=2": [0.0, 15.22, 23.13, 32.17, 41.67, 53.45, 74.36, 150.0]
+ pt_tt:
+ <<: [*pt_tt, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_tt__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [25.0, 35.0, 50.0]
+ var_bins:
+ "==0": [0.0, 7.38, 10.89, 13.94, 16.75, 19.51, 22.79, 26.2, 30.58, 36.0, 44.1, 150.0]
+ "==1": [0.0, 20.06, 28.4, 35.9, 42.23, 49.73, 57.79, 68.77, 85.18, 150.0]
+ ">=2": [0.0, 27.36, 44.71, 60.61, 75.95, 91.15, 112.19, 150.0]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [1.25, 1.5, 2.0]
+ var_bins:
+ "==0": [-4.9, -2.13, -1.42, -0.93, -0.53, -0.18, 0.16, 0.56, 0.96, 1.43, 2.08, 4.9]
+ "==1": [-4.9, -2.0, -1.23, -0.65, -0.23, 0.25, 0.72, 1.28, 2.04, 4.9]
+ ">=2": [-4.9, -1.9, -1.03, -0.39, 0.26, 0.87, 1.76, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.8, 1.1, 1.2]
+ var_bins:
+ "==0": [0.5, 2.6814, 2.869, 2.9787, 3.0526, 3.1104, 3.1696, 3.2609, 3.3865, 3.5747, 3.9186, 5.0]
+ "==1": [0.5, 1.653, 2.227, 2.5243, 2.7304, 2.9125, 3.0715, 3.2696, 3.6723, 5.0]
+ ">=2": [0.5, 1.3291, 1.9185, 2.3753, 2.7661, 3.0644, 3.4088, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ var_bins:
+ "==0": [0.5, 7.0]
+ "==1": [0.5, 1.67, 2.205, 2.51, 2.774, 3.019, 3.235, 3.586, 4.235, 7.0]
+ ">=2": [0.5, 1.825, 2.382, 2.761, 3.009, 3.272, 3.759, 7.0]
+ deltaR_12j1:
+ <<: [*deltaR_12j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_12j1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 1.5, 1.5]
+ var_bins:
+ "==0": [0.0, 10.0]
+ "==1": [0.0, 2.28, 2.73, 2.96, 3.11, 3.27, 3.55, 3.98, 4.87, 10.0]
+ ">=2": [0.0, 2.26, 2.72, 2.95, 3.14, 3.46, 4.06, 10.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 35.0]
+ var_bins:
+ "==0": [0.0, 150.0]
+ "==1": [0.0, 150.0]
+ ">=2": [0.0, 15.52, 22.28, 30.06, 38.0, 49.31, 69.77, 150.0]
+ mass_2:
+ <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.25, 0.35, 0.4]
+ var_bins:
+ "==0": [0.0, 0.2, 0.5107, 0.6216, 0.7388, 0.835, 0.9121, 0.9941, 1.0732, 1.1543, 1.25, 1.3623, 2.0]
+ "==1": [0.0, 0.2, 0.5322, 0.665, 0.79, 0.896, 1.0088, 1.0957, 1.1982, 1.3271, 2.0]
+ ">=2": [0.0, 0.2, 0.564, 0.7314, 0.8735, 0.9932, 1.1123, 1.2715, 2.0]
+ mt_tot:
+ <<: [*mt_tot, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mt_tot__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [25, 50, 60]
+ var_bins:
+ "==0": [50.0, 72.17, 77.52, 81.84, 85.76, 89.79, 94.23, 99.24, 105.25, 113.61, 130.06, 250.0]
+ "==1": [0.0, 59.33, 72.18, 80.25, 87.75, 95.78, 103.33, 114.05, 132.26, 250.0]
+ ">=2": [0.0, 53.08, 71.56, 83.98, 96.25, 111.26, 137.98, 250.0]
+ m_vis:
+ <<: [*m_vis, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*m_vis__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [25, 40.0, 60.0]
+ var_bins:
+ "==0": [40.0, 62.77, 67.12, 71.4, 75.97, 80.87, 87.03, 94.65, 105.53, 121.52, 147.95, 250.0]
+ "==1": [40.0, 59.53, 67.51, 74.21, 81.05, 89.85, 101.46, 120.46, 151.46, 250.0]
+ ">=2": [40.0, 59.6, 71.14, 82.96, 98.79, 118.59, 150.46, 250.0]
+ iso_1:
+ <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: *QCD_var_dependence_n_bins
+ variable_config:
+ iso_1:
+ min: 0.05
+ max: 0.15
+ rounding: 6
+ bandwidth: [0.04, 0.04, 0.04]
+ correction_option: "smoothed"
+ var_bins:
+ "==0": [0.05, 0.056882, 0.063543, 0.070468, 0.078181, 0.086647, 0.095377, 0.105744, 0.115638, 0.126327, 0.137486, 0.15]
+ "==1": [0.05, 0.057096, 0.065105, 0.073987, 0.083517, 0.096737, 0.108642, 0.122276, 0.13489, 0.15]
+ ">=2": [0.05, 0.059351, 0.068657, 0.081325, 0.096132, 0.110882, 0.128247, 0.15]
+
+ DR_SR:
+ use_embedding: false
+ use_orthogonal_fake_factors: true
+ compute_orthogonal_fake_factors_using_data: true
+ <<: [*mt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mt_1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ var_dependence_n_bins:
+ "==0": 9
+ "==1": 7
+ ">=2": 5
+ variable_config:
+ mt_1:
+ min: 0.0
+ max: 50.0
+ rounding: 2
+ bandwidth: [25.0, 40.0, 40.0]
+ var_bins:
+ "==0": [0.0, 3.07, 6.12, 9.59, 13.53, 17.81, 23.01, 29.07, 37.62, 50.0]
+ "==1": [0.0, 4.49, 9.31, 15.38, 21.65, 29.06, 38.65, 50.0]
+ ">=2": [0.0, 6.97, 14.92, 24.8, 36.54, 50.0]
+ SRlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
+ lep_mt: (mt_1 < 50)
+ nbtag: (nbtag >= 0)
+ tau_pair_sign: ((q_1*q_2) > 0)
+ lep_iso: "(iso_1 > 0.15)"
+ ARlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
+ lep_mt: (mt_1 < 50)
+ nbtag: (nbtag >= 0)
+ tau_pair_sign: ((q_1*q_2) > 0)
+ lep_iso: "(iso_1 > 0.15)"
+ AR_SR_cuts:
+ lep_mt: (mt_1 < 50)
+ nbtag: (nbtag >= 0)
+ tau_pair_sign: ((q_1*q_2) < 0)
+ lep_iso: "(iso_1 > 0.15)"
+
+ Wjets:
+ chain_DR_SR_to_non_closure: true
+
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *3j_split]
+ correction_variations: ["StatShift", "SystMCShift"]
+ eta_1:
+ <<: [*eta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ var_dependence_n_bins:
+ "==0": 15
+ "==1": 11
+ ">=2": 5
+ bandwidth: [0.65, 0.85, 1.25]
+ var_bins:
+ "==0": [-2.1, -1.73, -1.43, -1.15, -0.88, -0.62, -0.38, -0.12, 0.12, 0.38, 0.64, 0.89, 1.16, 1.45, 1.75, 2.1]
+ "==1": [-2.1, -1.61, -1.22, -0.85, -0.49, -0.13, 0.2, 0.55, 0.89, 1.25, 1.64, 2.1]
+ ">=2": [-2.1, -1.1, -0.33, 0.39, 1.16, 2.1]
+ eta_2:
+ <<: [*eta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.6, 0.9, 1.2]
+ var_bins:
+ "==0": [-2.5, -1.64, -1.28, -1.03, -0.78, -0.55, -0.32, -0.09, 0.15, 0.37, 0.58, 0.81, 1.03, 1.29, 1.67, 2.5]
+ "==1": [-2.5, -1.52, -1.12, -0.78, -0.43, -0.11, 0.22, 0.51, 0.82, 1.14, 1.55, 2.5]
+ ">=2": [-2.5, -1.41, -0.95, -0.55, -0.14, 0.25, 0.63, 1.0, 1.43, 2.5]
+ jeta_1:
+ <<: [*jeta_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ var_dependence_n_bins: [1, 9, 7]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 2.0, 2.0]
+ var_bins:
+ "==0": [-5.0, 5.0]
+ "==1": [-5.0, -2.71, -1.69, -0.94, -0.3, 0.34, 1.0, 1.77, 2.77, 5.0]
+ ">=2": [-5.0, -2.03, -1.07, -0.35, 0.35, 1.08, 2.07, 5.0]
+ jeta_2:
+ <<: [*jeta_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ var_dependence_n_bins: [1, 1, 7]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 3.0]
+ var_bins:
+ "==0": [-5.0, 5.0]
+ "==1": [-5.0, 5.0]
+ ">=2": [-5.0, -2.35, -1.22, -0.36, 0.39, 1.2, 2.28, 5.0]
+ jpt_1:
+ <<: [*jpt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 40.0, 55.0]
+ var_bins:
+ "==0": [30.0, 150.0]
+ "==1": [30.0, 31.75, 33.75, 36.06, 38.91, 42.56, 46.97, 52.62, 59.94, 70.38, 89.44, 150.0]
+ ">=2": [30.0, 40.97, 48.16, 55.28, 63.12, 71.88, 82.69, 97.0, 117.88, 150.0]
+ jpt_2:
+ <<: [*jpt_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 45.0]
+ var_bins:
+ "==0": [30.0, 150.0]
+ "==1": [30.0, 150.0]
+ ">=2": [30.0, 31.98, 34.44, 37.22, 41.0, 45.97, 52.78, 62.31, 79.88, 150.0]
+ met:
+ <<: [*met, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*met__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [20.0, 30.0, 60.0]
+ var_bins:
+ "==0": [0.0, 26.44, 31.34, 35.35, 38.85, 42.17, 45.53, 48.76, 52.09, 55.59, 59.33, 63.57, 68.59, 74.89, 84.82, 150.0]
+ "==1": [0.0, 30.65, 37.18, 42.51, 47.1, 51.9, 56.98, 62.52, 69.52, 78.66, 93.41, 150.0]
+ ">=2": [0.0, 34.19, 43.15, 50.77, 58.13, 66.48, 76.11, 88.39, 107.4, 150.0]
+ pt_tt:
+ <<: [*pt_tt, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_tt__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [25.0, 35.0, 50.0]
+ var_bins:
+ "==0": [0.0, 6.97, 10.21, 12.86, 15.37, 17.8, 20.33, 22.96, 25.8, 28.95, 32.51, 36.56, 41.44, 47.79, 57.64, 150.0]
+ "==1": [0.0, 20.67, 28.94, 35.54, 41.79, 48.14, 54.45, 61.69, 70.13, 81.43, 99.35, 150.0]
+ ">=2": [0.0, 28.71, 41.98, 53.14, 64.21, 75.14, 87.86, 102.82, 121.49, 150.0]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.8, 1.1, 1.25]
+ var_bins:
+ "==0": [-4.9, -1.9, -1.36, -1.02, -0.75, -0.52, -0.31, -0.11, 0.09, 0.3, 0.51, 0.74, 1.0, 1.34, 1.87, 4.9]
+ "==1": [-4.9, -1.75, -1.16, -0.75, -0.43, -0.14, 0.14, 0.43, 0.74, 1.14, 1.74, 4.9]
+ ">=2": [-4.9, -1.56, -0.96, -0.53, -0.16, 0.19, 0.54, 0.95, 1.54, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.5, 0.6, 0.7]
+ var_bins:
+ "==0": [0.5, 1.291, 1.6664, 1.9482, 2.1706, 2.3528, 2.5196, 2.6606, 2.783, 2.8902, 2.9821, 3.0643, 3.1321, 3.2287, 3.4481, 5.0]
+ "==1": [0.5, 1.1946, 1.5966, 1.9154, 2.1746, 2.4058, 2.6139, 2.801, 2.9638, 3.1049, 3.3191, 5.0]
+ ">=2": [0.5, 1.1567, 1.5816, 1.9338, 2.2206, 2.4933, 2.7366, 2.9631, 3.1951, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 1.0, 1.2]
+ var_bins:
+ "==0": [0.5, 7.0]
+ "==1": [0.5, 1.257, 1.688, 2.026, 2.312, 2.572, 2.811, 3.037, 3.282, 3.69, 4.342, 7.0]
+ ">=2": [0.5, 1.242, 1.733, 2.12, 2.446, 2.72, 2.978, 3.261, 3.842, 7.0]
+ deltaR_12j1:
+ <<: [*deltaR_12j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_12j1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 1.5, 1.75]
+ var_bins:
+ "==0": [0.0, 10.0]
+ "==1": [0.0, 1.78, 2.26, 2.55, 2.77, 2.94, 3.1, 3.29, 3.58, 4.02, 4.82, 10.0]
+ ">=2": [0.0, 1.78, 2.28, 2.61, 2.84, 3.04, 3.23, 3.57, 4.21, 10.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 30]
+ var_bins:
+ "==0": [0.0, 150.0]
+ "==1": [0.0, 150.0]
+ ">=2": [0.0, 13.13, 19.85, 25.75, 31.71, 38.56, 46.7, 57.79, 75.16, 150.0]
+ mass_2:
+ <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.225, 0.225, 0.25]
+ var_bins:
+ "==0": [0.0, 0.2, 0.4827, 0.5635, 0.6396, 0.707, 0.7681, 0.8281, 0.8857, 0.9453, 1.0078, 1.0713, 1.1377, 1.2109, 1.2949, 1.4092, 2.0]
+ "==1": [0.0, 0.2, 0.5088, 0.6177, 0.7119, 0.7959, 0.8696, 0.9551, 1.0381, 1.1279, 1.2285, 1.3545, 2.0]
+ ">=2": [0.0, 0.2, 0.5308, 0.6577, 0.7612, 0.8506, 0.957, 1.0625, 1.1758, 1.3076, 2.0]
+ mt_tot:
+ <<: [*mt_tot, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mt_tot__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [20.0, 30.0, 50.0]
+ var_bins:
+ "==0": [90.0, 107.0, 114.05, 118.88, 122.8, 126.47, 129.96, 133.5, 137.38, 141.66, 146.66, 152.62, 160.27, 171.17, 189.87, 250.0]
+ "==1": [80.0, 105.43, 113.18, 120.11, 126.5, 133.37, 140.45, 148.83, 158.49, 171.81, 194.95, 250.0]
+ ">=2": [80.0, 107.35, 119.32, 129.92, 139.77, 150.85, 163.8, 180.28, 204.68, 250.0]
+ m_vis:
+ <<: [*m_vis, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*m_vis__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [20, 30, 40]
+ var_bins:
+ "==0": [40.0, 53.81, 63.19, 69.57, 74.63, 80.07, 85.73, 91.46, 97.14, 103.66, 111.09, 120.22, 132.11, 148.61, 175.66, 250.0]
+ "==1": [40.0, 56.8, 66.41, 74.1, 82.15, 90.75, 100.06, 111.42, 126.0, 145.26, 175.05, 250.0]
+ ">=2": [40.0, 59.87, 70.97, 81.27, 93.8, 107.02, 123.57, 145.41, 177.53, 250.0]
+ iso_1:
+ <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*iso_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.03, 0.04, 0.05]
+ var_bins:
+ "==0": [0.0, 0.00005, 0.004932, 0.007516, 0.010087, 0.012785, 0.015814, 0.019493, 0.023479, 0.028324, 0.034053, 0.040962, 0.049714, 0.060662, 0.075963, 0.100649, 0.15]
+ "==1": [0.0, 0.00005, 0.005621, 0.009072, 0.012884, 0.017279, 0.022713, 0.029417, 0.037605, 0.049066, 0.064637, 0.090462, 0.15]
+ ">=2": [0.0, 0.00005, 0.005602, 0.009668, 0.014723, 0.020748, 0.028099, 0.039254, 0.055474, 0.082078, 0.15]
+
+ DR_SR:
+ use_embedding: true
+ use_orthogonal_fake_factors: true
+ compute_orthogonal_fake_factors_using_data: false
+ <<: [*mt_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mt_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ var_dependence_n_bins:
+ "==0": 13
+ "==1": 9
+ ">=2": 7
+ bandwidth: [25.0, 40.0, 50.0]
+ var_bins:
+ "==0": [0.0, 2.48, 5.08, 7.83, 10.91, 14.38, 18.35, 23.16, 28.9, 35.74, 43.7, 52.34, 61.16, 70.0]
+ "==1": [0.0, 5.24, 11.06, 17.27, 24.28, 32.24, 40.86, 50.19, 59.96, 70.0]
+ ">=2": [0.0, 6.96, 14.47, 22.71, 32.19, 43.21, 56.0, 70.0]
+ SRlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
+ lep_mt: (mt_1 > 70)
+ nbtag: (nbtag == 0)
+ tau_pair_sign: ((q_1*q_2) < 0)
+ lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
+ ARlike_cuts: # Try for yet perpendicular to the fake_factors_mt.yaml definition!
+ lep_mt: (mt_1 > 70)
+ nbtag: (nbtag == 0)
+ tau_pair_sign: ((q_1*q_2) < 0)
+ lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
+ AR_SR_cuts: # this is only on MC!
+ lep_mt: (mt_1 < 70)
+ nbtag: (nbtag >= 0)
+ tau_pair_sign: ((q_1*q_2) < 0)
+ lep_iso: ((iso_1 >= 0.0) && (iso_1 <= 0.15))
+
+ ttbar:
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *2j_split]
+ correction_variations: ["StatShift"]
+ eta_1:
+ <<: [*eta_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.6, 1.0]
+ var_bins:
+ "<=1": [-2.1, -1.52, -1.08, -0.74, -0.44, -0.14, 0.13, 0.43, 0.72, 1.08, 1.51, 2.1]
+ ">=2": [-2.1, -1.61, -1.28, -1.0, -0.76, -0.53, -0.32, -0.1, 0.1, 0.33, 0.53, 0.76, 1.01, 1.29, 1.62, 2.1]
+ eta_2:
+ <<: [*eta_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*eta_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.7, 0.7]
+ var_bins:
+ "<=1": [-2.5, -1.6, -1.18, -0.83, -0.48, -0.14, 0.19, 0.51, 0.87, 1.19, 1.59, 2.5]
+ ">=2": [-2.5, -1.83, -1.42, -1.15, -0.89, -0.62, -0.36, -0.1, 0.15, 0.4, 0.66, 0.93, 1.18, 1.44, 1.83, 2.5]
+ jeta_1:
+ <<: [*jeta_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ correction_option: "smoothed"
+ bandwidth: [1.0, 2.0]
+ var_bins:
+ "<=1": [-5.0, -1.8, -1.25, -0.85, -0.48, -0.17, 0.16, 0.49, 0.83, 1.24, 1.83, 5.0]
+ ">=2": [-5.0, -2.06, -1.56, -1.19, -0.88, -0.62, -0.36, -0.12, 0.13, 0.37, 0.63, 0.91, 1.22, 1.58, 2.08, 5.0]
+ jeta_2:
+ <<: [*jeta_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jeta_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ var_dependence_n_bins: [1, 15]
+ correction_option: ["skip", "smoothed"]
+ var_bins:
+ "<=1": [-5.0, 5.0]
+ ">=2": [-5.0, -2.23, -1.67, -1.27, -0.95, -0.66, -0.39, -0.12, 0.13, 0.39, 0.67, 0.95, 1.29, 1.69, 2.25, 5.0]
+ jpt_1:
+ <<: [*jpt_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [30.0, 20.0]
+ var_bins:
+ "<=1": [30.0, 35.36, 40.96, 46.65, 52.23, 58.54, 65.09, 72.75, 82.08, 94.53, 111.82, 150.0]
+ ">=2": [30.0, 47.65, 54.97, 60.91, 66.28, 71.41, 76.42, 81.65, 87.12, 92.85, 99.07, 105.86, 113.72, 123.2, 135.2, 150.0]
+ jpt_2:
+ <<: [*jpt_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*jpt_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [30.0, 30.0]
+ correction_option: ["skip", "smoothed"]
+ var_bins:
+ "<=1": [30.0, 150.0]
+ ">=2": [30.0, 34.04, 37.51, 40.9, 44.36, 47.9, 51.37, 55.15, 59.19, 63.62, 68.59, 74.66, 82.46, 93.05, 109.69, 150.0]
+ met:
+ <<: [*met, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*met__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [40.0, 35.0]
+ var_bins:
+ "<=1": [0.0, 14.66, 22.03, 28.03, 34.31, 40.61, 47.23, 55.17, 64.25, 76.67, 95.52, 150.0]
+ ">=2": [0.0, 13.5, 19.68, 25.11, 30.24, 35.17, 40.53, 46.2, 52.21, 58.7, 66.15, 74.85, 85.02, 97.62, 116.03, 150.0]
+ pt_tt:
+ <<: [*pt_tt, *2j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_tt__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [25.0, 35.0]
+ var_bins:
+ "<=1": [0.0, 24.45, 35.1, 43.91, 52.13, 60.74, 69.18, 78.19, 87.94, 100.55, 117.97, 150.0]
+ ">=2": [0.0, 25.92, 38.05, 47.24, 55.39, 62.86, 69.81, 76.99, 84.08, 91.29, 98.76, 106.54, 115.17, 124.87, 136.3, 150.0]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [1.5, 1.75]
+ var_bins:
+ "<=1": [-4.9, -1.57, -1.06, -0.71, -0.43, -0.17, 0.11, 0.39, 0.69, 1.05, 1.56, 4.9]
+ ">=2": [-4.9, -2.0, -1.47, -1.1, -0.82, -0.58, -0.35, -0.12, 0.1, 0.33, 0.55, 0.79, 1.07, 1.43, 1.99, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.75, 1.0]
+ var_bins:
+ "<=1": [0.5, 1.2589, 1.6931, 2.0017, 2.272, 2.4783, 2.6662, 2.8297, 2.9717, 3.103, 3.283, 5.0]
+ ">=2": [0.5, 0.9472, 1.2341, 1.4726, 1.6983, 1.9055, 2.0972, 2.2769, 2.4468, 2.6064, 2.7588, 2.9015, 3.0423, 3.1826, 3.465, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ var_bins:
+ "<=1": [0.5, 1.343, 1.785, 2.09, 2.344, 2.552, 2.723, 2.882, 3.037, 3.185, 3.478, 7.0]
+ ">=2": [0.5, 1.158, 1.535, 1.836, 2.086, 2.291, 2.467, 2.618, 2.754, 2.872, 2.984, 3.087, 3.201, 3.395, 3.767, 7.0]
+ deltaR_12j1:
+ <<: [*deltaR_12j1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_12j1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ correction_option: "smoothed"
+ bandwidth: [1.5, 1.5]
+ var_bins:
+ "<=1": [0.0, 1.8, 2.26, 2.53, 2.73, 2.88, 3.0, 3.11, 3.25, 3.48, 3.87, 10.0]
+ ">=2": [0.0, 1.5, 1.94, 2.22, 2.42, 2.58, 2.72, 2.83, 2.94, 3.03, 3.12, 3.23, 3.39, 3.64, 4.09, 10.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed"]
+ var_bins:
+ "<=1": [0.0, 150.0]
+ ">=2": [0.0, 15.23, 22.09, 27.98, 33.27, 38.42, 43.61, 48.82, 54.23, 60.19, 66.83, 74.34, 83.25, 95.43, 112.85, 150.0]
+ mass_2:
+ <<: [*mass_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.225, 0.175]
+ var_bins:
+ "<=1": [0.0, 0.2, 0.5542, 0.7139, 0.8491, 0.9507, 1.0391, 1.1172, 1.1963, 1.2773, 1.374, 1.4785, 2.0]
+ ">=2": [0.0, 0.2, 0.4985, 0.6196, 0.7344, 0.8311, 0.9072, 0.9736, 1.0352, 1.0957, 1.1553, 1.2168, 1.2793, 1.3447, 1.416, 1.5039, 2.0]
+ mt_tot:
+ <<: [*mt_tot, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mt_tot__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [35.0, 40.0]
+ var_bins:
+ "<=1": [0.0, 71.15, 85.76, 95.51, 104.09, 111.98, 120.16, 129.15, 140.06, 155.19, 180.06, 250.0]
+ ">=2": [0.0, 54.45, 70.52, 80.81, 88.59, 95.47, 102.0, 108.62, 115.3, 122.53, 130.56, 139.48, 150.09, 164.95, 189.62, 250.0]
+ m_vis:
+ <<: [*m_vis, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*m_vis__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [20, 20]
+ var_bins:
+ "<=1": [40.0, 57.23, 66.64, 74.08, 80.63, 87.14, 94.16, 102.36, 112.55, 128.21, 163.02, 250.0]
+ ">=2": [40.0, 52.04, 60.23, 66.79, 72.63, 78.31, 83.65, 89.47, 95.56, 102.24, 110.06, 119.97, 133.62, 153.79, 184.28, 250.0]
+ iso_1:
+ <<: [*iso_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*iso_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.05, 0.05]
+ var_bins:
+ "<=1": [0.0, 0.00005, 0.006133, 0.009499, 0.01269, 0.017176, 0.022144, 0.027969, 0.0369, 0.048008, 0.063692, 0.090369, 0.15]
+ ">=2": [0.0, 0.00005, 0.004676, 0.007108, 0.009587, 0.012164, 0.015111, 0.018535, 0.022638, 0.027259, 0.032977, 0.039821, 0.048317, 0.060037, 0.075678, 0.101079, 0.15]
diff --git a/configs/smhtt_ul/2018/corrections_mt.yaml.ml b/configs/smhtt_ul/2018/corrections_mt.yaml.ml
new file mode 100644
index 0000000..48ff21c
--- /dev/null
+++ b/configs/smhtt_ul/2018/corrections_mt.yaml.ml
@@ -0,0 +1,430 @@
+templates:
+ split_schemes:
+ 3j: &3j_split
+ split_categories:
+ njets: ["==0", "==1", ">=2"]
+ split_categories_binedges:
+ njets: [-0.5, 0.5, 1.5, 22.5]
+ correction_option:
+ "==0": "smoothed"
+ "==1": "smoothed"
+ ">=2": "smoothed"
+ bandwidth:
+ "==0": 1.0
+ "==1": 1.0
+ ">=2": 1.0
+ 2j: &2j_split
+ split_categories:
+ njets: ["<=1", ">=2"]
+ split_categories_binedges:
+ njets: [-0.5, 1.5, 22.5]
+ correction_option:
+ "<=1": "smoothed"
+ ">=2": "smoothed"
+ bandwidth:
+ "<=1": 1
+ ">=2": 1
+ var_dependence_n_bins:
+ QCD:
+ equipopulated_binning_options: &QCD_var_dependence_n_bins
+ var_dependence_n_bins:
+ "==0": 11
+ "==1": 9
+ ">=2": 7
+ Wjets:
+ equipopulated_binning_options: &Wjets_var_dependence_n_bins
+ var_dependence_n_bins:
+ "==0": 15
+ "==1": 11
+ ">=2": 9
+ ttbar:
+ equipopulated_binning_options: &ttbar_var_dependence_n_bins
+ var_dependence_n_bins:
+ "<=1": 11
+ ">=2": 15
+ correction_variations__with_mc_subtraction_shift: &correction_variations__with_mc_subtraction_shift
+ correction_variations:
+ - StatShift
+ - SystMCShift
+ - SystBandAsym
+ correction_variations__without_mc_subtraction_shift: &correction_variations__without_mc_subtraction_shift
+ correction_variations:
+ - StatShift
+ - SystBandAsym
+ variables:
+ eta_1:
+ var_dependence: eta_1
+ equipopulated_binning_options:
+ variable_config:
+ eta_1:
+ min: -2.1
+ max: +2.1
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.0
+ eta_2:
+ var_dependence: eta_2
+ equipopulated_binning_options:
+ variable_config:
+ eta_2:
+ min: -2.5
+ max: +2.5
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.5
+ deltaEta_ditaupair: &deltaEta_ditaupair
+ var_dependence: deltaEta_ditaupair
+ equipopulated_binning_options: &deltaEta_ditaupair__eq_bin_opt
+ variable_config:
+ deltaEta_ditaupair:
+ min: -4.9
+ max: +4.9
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 1.75
+ deltaR_ditaupair: &deltaR_ditaupair
+ var_dependence: deltaR_ditaupair
+ equipopulated_binning_options: &deltaR_ditaupair__eq_bin_opt
+ variable_config:
+ deltaR_ditaupair:
+ min: 0.5
+ max: 5.0
+ rounding: 4
+ correction_option: "smoothed"
+ jeta_1:
+ var_dependence: jeta_1
+ equipopulated_binning_options:
+ variable_config:
+ jeta_1:
+ min: -5.0
+ max: +5.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 2.0
+ jeta_2:
+ var_dependence: jeta_2
+ equipopulated_binning_options:
+ variable_config:
+ jeta_2:
+ min: -5.0
+ max: +5.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 2.0
+ jpt_1:
+ var_dependence: jpt_1
+ equipopulated_binning_options:
+ variable_config:
+ jpt_1:
+ min: 30.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ jpt_2:
+ var_dependence: jpt_2
+ equipopulated_binning_options:
+ variable_config:
+ jpt_2:
+ min: 30.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ met:
+ var_dependence: met
+ equipopulated_binning_options:
+ variable_config:
+ met:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ mt_1:
+ var_dependence: mt_1
+ equipopulated_binning_options:
+ variable_config:
+ mt_1:
+ min: 0.0
+ max: 70.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 20
+ pt_tt:
+ var_dependence: pt_tt
+ equipopulated_binning_options:
+ variable_config:
+ pt_tt:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ pt_ttjj: &pt_ttjj
+ var_dependence: pt_ttjj
+ equipopulated_binning_options: &pt_ttjj__eq_bin_opt
+ variable_config:
+ pt_ttjj:
+ min: 0.0
+ max: 150.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 25
+ mt_tot:
+ var_dependence: mt_tot
+ equipopulated_binning_options:
+ variable_config:
+ mt_tot:
+ min: 0.0
+ max: 250.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 40
+ mass_2: &mass_2
+ var_dependence: mass_2
+ equipopulated_binning_options: &mass_2__eq_bin_opt
+ variable_config:
+ mass_2:
+ min: 0.2
+ max: 2.0
+ rounding: 4
+ add_left: [0.0]
+ correction_option: "binwise#[0]+smoothed"
+ bandwidth: 0.3
+ tau_decaymode_2: &tau_decaymode_2
+ var_dependence: tau_decaymode_2
+ correction_option: "binwise"
+ bandwidth: 1.0
+ var_bins: [-0.5, 0.5, 9.5, 10.5, 11.5]
+ nbtag:
+ var_dependence: nbtag
+ correction_option: "binwise"
+ bandwidth: 1.0
+ var_bins: [-0.5, 0.5, 1.5, 2.5, 22.5]
+ iso_1: &iso_1
+ var_dependence: iso_1
+ equipopulated_binning_options: &iso_1__eq_bin_opt
+ variable_config:
+ iso_1:
+ min: 0.00005
+ max: 0.15
+ rounding: 6
+ add_left: [0.0]
+ correction_option: "binwise#[0]+smoothed"
+ bandwidth: 0.02
+ deltaR_1j1: &deltaR_1j1
+ var_dependence: deltaR_1j1
+ equipopulated_binning_options: &deltaR_1j1__eq_bin_opt
+ variable_config:
+ deltaR_1j1:
+ min: 0.5
+ max: 7.0
+ rounding: 3
+ correction_option: "smoothed"
+ bandwidth: 0.9
+ deltaR_12j1:
+ var_dependence: deltaR_12j1
+ equipopulated_binning_options:
+ variable_config:
+ deltaR_12j1:
+ min: 0.0
+ max: 10.0
+ correction_option: "smoothed"
+ bandwidth: 1.25
+ m_vis: &m_vis
+ var_dependence: m_vis
+ equipopulated_binning_options: &m_vis__eq_bin_opt
+ variable_config:
+ m_vis:
+ min: 40.0
+ max: 250.0
+ rounding: 2
+ correction_option: "smoothed"
+ bandwidth: 30
+
+channel: mt
+target_processes:
+ QCD:
+ chain_DR_SR_to_non_closure: false
+
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *3j_split]
+ correction_variations: ["StatShift", "SystMCShift"]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [1.25, 1.5, 2.0]
+ var_bins:
+ "==0": [-4.9, -2.13, -1.42, -0.93, -0.53, -0.18, 0.16, 0.56, 0.96, 1.43, 2.08, 4.9]
+ "==1": [-4.9, -2.0, -1.23, -0.65, -0.23, 0.25, 0.72, 1.28, 2.04, 4.9]
+ ">=2": [-4.9, -1.9, -1.03, -0.39, 0.26, 0.87, 1.76, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.8, 1.1, 1.2]
+ var_bins:
+ "==0": [0.5, 2.6814, 2.869, 2.9787, 3.0526, 3.1104, 3.1696, 3.2609, 3.3865, 3.5747, 3.9186, 5.0]
+ "==1": [0.5, 1.653, 2.227, 2.5243, 2.7304, 2.9125, 3.0715, 3.2696, 3.6723, 5.0]
+ ">=2": [0.5, 1.3291, 1.9185, 2.3753, 2.7661, 3.0644, 3.4088, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ var_bins:
+ "==0": [0.5, 7.0]
+ "==1": [0.5, 1.67, 2.205, 2.51, 2.774, 3.019, 3.235, 3.586, 4.235, 7.0]
+ ">=2": [0.5, 1.825, 2.382, 2.761, 3.009, 3.272, 3.759, 7.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *QCD_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 35.0]
+ var_bins:
+ "==0": [0.0, 150.0]
+ "==1": [0.0, 150.0]
+ ">=2": [0.0, 15.52, 22.28, 30.06, 38.0, 49.31, 69.77, 150.0]
+ mass_2:
+ <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *QCD_var_dependence_n_bins]
+ bandwidth: [0.25, 0.35, 0.4]
+ var_bins:
+ "==0": [0.0, 0.2, 0.5107, 0.6216, 0.7388, 0.835, 0.9121, 0.9941, 1.0732, 1.1543, 1.25, 1.3623, 2.0]
+ "==1": [0.0, 0.2, 0.5322, 0.665, 0.79, 0.896, 1.0088, 1.0957, 1.1982, 1.3271, 2.0]
+ ">=2": [0.0, 0.2, 0.564, 0.7314, 0.8735, 0.9932, 1.1123, 1.2715, 2.0]
+ iso_1:
+ <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: *QCD_var_dependence_n_bins
+ variable_config:
+ iso_1:
+ min: 0.05
+ max: 0.15
+ rounding: 6
+ bandwidth: [0.04, 0.04, 0.04]
+ correction_option: "smoothed"
+ var_bins:
+ "==0": [0.05, 0.056882, 0.063543, 0.070468, 0.078181, 0.086647, 0.095377, 0.105744, 0.115638, 0.126327, 0.137486, 0.15]
+ "==1": [0.05, 0.057096, 0.065105, 0.073987, 0.083517, 0.096737, 0.108642, 0.122276, 0.13489, 0.15]
+ ">=2": [0.05, 0.059351, 0.068657, 0.081325, 0.096132, 0.110882, 0.128247, 0.15]
+
+ Wjets:
+ chain_DR_SR_to_non_closure: false
+
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *3j_split]
+ correction_variations: ["StatShift", "SystMCShift"]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.8, 1.1, 1.25]
+ var_bins:
+ "==0": [-4.9, -1.9, -1.36, -1.02, -0.75, -0.52, -0.31, -0.11, 0.09, 0.3, 0.51, 0.74, 1.0, 1.34, 1.87, 4.9]
+ "==1": [-4.9, -1.75, -1.16, -0.75, -0.43, -0.14, 0.14, 0.43, 0.74, 1.14, 1.74, 4.9]
+ ">=2": [-4.9, -1.56, -0.96, -0.53, -0.16, 0.19, 0.54, 0.95, 1.54, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.5, 0.6, 0.7]
+ var_bins:
+ "==0": [0.5, 1.291, 1.6664, 1.9482, 2.1706, 2.3528, 2.5196, 2.6606, 2.783, 2.8902, 2.9821, 3.0643, 3.1321, 3.2287, 3.4481, 5.0]
+ "==1": [0.5, 1.1946, 1.5966, 1.9154, 2.1746, 2.4058, 2.6139, 2.801, 2.9638, 3.1049, 3.3191, 5.0]
+ ">=2": [0.5, 1.1567, 1.5816, 1.9338, 2.2206, 2.4933, 2.7366, 2.9631, 3.1951, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed", "smoothed"]
+ bandwidth: [1.0, 1.0, 1.2]
+ var_bins:
+ "==0": [0.5, 7.0]
+ "==1": [0.5, 1.257, 1.688, 2.026, 2.312, 2.572, 2.811, 3.037, 3.282, 3.69, 4.342, 7.0]
+ ">=2": [0.5, 1.242, 1.733, 2.12, 2.446, 2.72, 2.978, 3.261, 3.842, 7.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ correction_option: ["skip", "skip", "smoothed"]
+ bandwidth: [1.0, 1.0, 30]
+ var_bins:
+ "==0": [0.0, 150.0]
+ "==1": [0.0, 150.0]
+ ">=2": [0.0, 13.13, 19.85, 25.75, 31.71, 38.56, 46.7, 57.79, 75.16, 150.0]
+ mass_2:
+ <<: [*mass_2, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.225, 0.225, 0.25]
+ var_bins:
+ "==0": [0.0, 0.2, 0.4827, 0.5635, 0.6396, 0.707, 0.7681, 0.8281, 0.8857, 0.9453, 1.0078, 1.0713, 1.1377, 1.2109, 1.2949, 1.4092, 2.0]
+ "==1": [0.0, 0.2, 0.5088, 0.6177, 0.7119, 0.7959, 0.8696, 0.9551, 1.0381, 1.1279, 1.2285, 1.3545, 2.0]
+ ">=2": [0.0, 0.2, 0.5308, 0.6577, 0.7612, 0.8506, 0.957, 1.0625, 1.1758, 1.3076, 2.0]
+ iso_1:
+ <<: [*iso_1, *3j_split, *correction_variations__with_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*iso_1__eq_bin_opt, *Wjets_var_dependence_n_bins]
+ bandwidth: [0.03, 0.04, 0.05]
+ var_bins:
+ "==0": [0.0, 0.00005, 0.004932, 0.007516, 0.010087, 0.012785, 0.015814, 0.019493, 0.023479, 0.028324, 0.034053, 0.040962, 0.049714, 0.060662, 0.075963, 0.100649, 0.15]
+ "==1": [0.0, 0.00005, 0.005621, 0.009072, 0.012884, 0.017279, 0.022713, 0.029417, 0.037605, 0.049066, 0.064637, 0.090462, 0.15]
+ ">=2": [0.0, 0.00005, 0.005602, 0.009668, 0.014723, 0.020748, 0.028099, 0.039254, 0.055474, 0.082078, 0.15]
+
+ ttbar:
+ non_closure:
+ tau_decaymode_2:
+ <<: [*tau_decaymode_2, *2j_split]
+ correction_variations: ["StatShift"]
+ deltaEta_ditaupair:
+ <<: [*deltaEta_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaEta_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [1.5, 1.75]
+ var_bins:
+ "<=1": [-4.9, -1.57, -1.06, -0.71, -0.43, -0.17, 0.11, 0.39, 0.69, 1.05, 1.56, 4.9]
+ ">=2": [-4.9, -2.0, -1.47, -1.1, -0.82, -0.58, -0.35, -0.12, 0.1, 0.33, 0.55, 0.79, 1.07, 1.43, 1.99, 4.9]
+ deltaR_ditaupair:
+ <<: [*deltaR_ditaupair, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_ditaupair__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.75, 1.0]
+ var_bins:
+ "<=1": [0.5, 1.2589, 1.6931, 2.0017, 2.272, 2.4783, 2.6662, 2.8297, 2.9717, 3.103, 3.283, 5.0]
+ ">=2": [0.5, 0.9472, 1.2341, 1.4726, 1.6983, 1.9055, 2.0972, 2.2769, 2.4468, 2.6064, 2.7588, 2.9015, 3.0423, 3.1826, 3.465, 5.0]
+ deltaR_1j1:
+ <<: [*deltaR_1j1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*deltaR_1j1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ var_bins:
+ "<=1": [0.5, 1.343, 1.785, 2.09, 2.344, 2.552, 2.723, 2.882, 3.037, 3.185, 3.478, 7.0]
+ ">=2": [0.5, 1.158, 1.535, 1.836, 2.086, 2.291, 2.467, 2.618, 2.754, 2.872, 2.984, 3.087, 3.201, 3.395, 3.767, 7.0]
+ pt_ttjj:
+ <<: [*pt_ttjj, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*pt_ttjj__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ correction_option: ["skip", "smoothed"]
+ var_bins:
+ "<=1": [0.0, 150.0]
+ ">=2": [0.0, 15.23, 22.09, 27.98, 33.27, 38.42, 43.61, 48.82, 54.23, 60.19, 66.83, 74.34, 83.25, 95.43, 112.85, 150.0]
+ mass_2:
+ <<: [*mass_2, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*mass_2__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.225, 0.175]
+ var_bins:
+ "<=1": [0.0, 0.2, 0.5542, 0.7139, 0.8491, 0.9507, 1.0391, 1.1172, 1.1963, 1.2773, 1.374, 1.4785, 2.0]
+ ">=2": [0.0, 0.2, 0.4985, 0.6196, 0.7344, 0.8311, 0.9072, 0.9736, 1.0352, 1.0957, 1.1553, 1.2168, 1.2793, 1.3447, 1.416, 1.5039, 2.0]
+ iso_1:
+ <<: [*iso_1, *2j_split, *correction_variations__without_mc_subtraction_shift]
+ equipopulated_binning_options:
+ <<: [*iso_1__eq_bin_opt, *ttbar_var_dependence_n_bins]
+ bandwidth: [0.05, 0.05]
+ var_bins:
+ "<=1": [0.0, 0.00005, 0.006133, 0.009499, 0.01269, 0.017176, 0.022144, 0.027969, 0.0369, 0.048008, 0.063692, 0.090369, 0.15]
+ ">=2": [0.0, 0.00005, 0.004676, 0.007108, 0.009587, 0.012164, 0.015111, 0.018535, 0.022638, 0.027259, 0.032977, 0.039821, 0.048317, 0.060037, 0.075678, 0.101079, 0.15]
diff --git a/configs/smhtt_ul/2018/fake_factors_mt.yaml b/configs/smhtt_ul/2018/fake_factors_mt.yaml
index d36d389..3ef78c9 100644
--- a/configs/smhtt_ul/2018/fake_factors_mt.yaml
+++ b/configs/smhtt_ul/2018/fake_factors_mt.yaml
@@ -45,7 +45,7 @@ target_processes:
fit_option:
"==0": ["poly_1"]
"==1": ["poly_1", "poly_2"]
- ">=2": ["poly_1", "poly_2"]
+ ">=2": ["poly_1"]
Wjets:
split_categories:
@@ -113,7 +113,7 @@ target_processes:
">82.16": [30.0, 35.3, 42.61, 54.79, 76.48, 150.0]
fit_option:
"==0": ["poly_1", "poly_2"]
- "==1": ["poly_1", "poly_2", "poly_3"]
+ "==1": ["poly_1", "poly_2"]
">=2": ["poly_1", "poly_2"]
ttbar:
@@ -171,8 +171,8 @@ target_processes:
var_dependence: pt_2
var_bins:
- "<=1": [30.0, 30.85, 31.77, 32.76, 33.78, 34.99, 36.3, 37.86, 39.58, 41.58, 44.13, 47.07, 51.67, 58.63, 71.12, 150.0]
- ">=2": [30.0, 31.69, 33.63, 35.9, 38.56, 42.03, 46.12, 52.56, 62.75, 82.88, 150.0]
+ "<=1": [30.0, 31.73, 33.36, 35.22, 37.27, 39.68, 42.23, 45.17, 48.65, 52.51, 57.21, 63.16, 70.71, 80.98, 96.82, 150.0]
+ ">=2": [30.0, 31.85, 33.99, 36.45, 39.37, 42.98, 47.76, 54.3, 63.96, 81.06, 150.0]
fit_option: ["poly_1", "poly_2", "poly_3"]
process_fractions:
diff --git a/configs/smhtt_ul/2018/preselection_mt.yaml b/configs/smhtt_ul/2018/preselection_mt.yaml
index a8e2ed7..35d9af9 100644
--- a/configs/smhtt_ul/2018/preselection_mt.yaml
+++ b/configs/smhtt_ul/2018/preselection_mt.yaml
@@ -66,6 +66,10 @@ processes:
- T
- L
+column_definitions:
+ event_parity:
+ expression: event % 2
+
event_selection:
had_tau_decay_mode: (tau_decaymode_2 == 0) || (tau_decaymode_2 == 1) || (tau_decaymode_2 == 10) || (tau_decaymode_2 == 11)
had_tau_id_vs_ele: id_tau_vsEle_VVLoose_2 > 0.5
@@ -80,38 +84,26 @@ mc_weights:
stitching:
- DYjets
- Wjets
- had_tau_id_vs_ele: (gen_match_2==5) * ((id_tau_vsEle_VVLoose_2>0.5)*id_wgt_tau_vsEle_VVLoose_2 + (id_tau_vsEle_VVLoose_2<0.5)) + (gen_match_2!=5)
- had_tau_id_vs_mu: (gen_match_2==5) * ((id_tau_vsMu_Tight_2>0.5)*id_wgt_tau_vsMu_Tight_2 + (id_tau_vsMu_Tight_2<0.5)) + (gen_match_2!=5)
+ had_tau_id_vs_ele: id_wgt_tau_vsEle_VVLoose_2
+ had_tau_id_vs_mu: id_wgt_tau_vsMu_Tight_2
lep_id: id_wgt_mu_1
lep_iso: iso_wgt_mu_1
lumi: ""
pileup: puweight
- single_trigger: trg_wgt_single_mu24ormu27
+ single_trigger: ((pt_1>25)*trg_wgt_single_mu24ormu27)
+ # for ml ffs add this
+ # had_tau_id_vs_jet: ((gen_match_2 == 5) * ((id_tau_vsJet_Tight_2 > 0.5) * id_wgt_tau_vsJet_Tight_2 + (id_tau_vsJet_Tight_2 < 0.5)) + (gen_match_2 != 5))
+ # btag: btag_weight
+
emb_weights:
- generator: (emb_genweight * emb_idsel_wgt_1 * emb_idsel_wgt_2 * emb_triggersel_wgt)
+ generator: (emb_genweight * emb_idsel_wgt_1 * emb_idsel_wgt_2 * emb_triggersel_wgt) * (gen_match_1==4 && gen_match_2==5)
lep_id: id_wgt_mu_1
lep_iso: iso_wgt_mu_1
single_trigger: trg_wgt_single_mu24ormu27
+ # for ml ffs add this
+ # had_tau_id_vs_jet: ((gen_match_2 == 5) * ((id_tau_vsJet_Tight_2 > 0.5) * id_wgt_tau_vsJet_Tight_2 + (id_tau_vsJet_Tight_2 < 0.5)) + (gen_match_2 != 5))
output_features:
- - mt_1
- - dilepton_veto
- - weight
- - btag_weight
- - q_1
- - pt_2
- - q_2
- - gen_match_2
- - pt_1
- - extramuon_veto
- - extraelec_veto
- - dimuon_veto
- - tau_decaymode_1
- - tau_decaymode_2
- - mass_1
- - mass_2
- - iso_1
- - iso_2
- beta_1
- beta_2
- bphi_1
@@ -120,6 +112,7 @@ output_features:
- bpt_2
- btag_value_1
- btag_value_2
+ - btag_weight
- deltaEta_12j1
- deltaEta_12j2
- deltaEta_12jj
@@ -147,18 +140,37 @@ output_features:
- deltaR_2j2
- deltaR_ditaupair
- deltaR_jj
+ - dilepton_veto
+ - dimuon_veto
+ - dxy_1
+ - dxy_2
+ - dz_1
+ - dz_2
- eta_1
- eta_2
+ - eta_fastmtt
+ - event
+ - event_parity
+ - extraelec_veto
+ - extramuon_veto
+ - gen_match_2
+ - iso_1
+ - iso_2
- jeta_1
- jeta_2
- jphi_1
- jphi_2
- jpt_1
- jpt_2
+ - lumi
+ - m_fastmtt
- m_vis
+ - mass_1
+ - mass_2
- met
- metphi
- mjj
+ - mt_1
- mt_2
- mt_tot
- nbtag
@@ -167,11 +179,18 @@ output_features:
- pfmetphi
- phi_1
- phi_2
+ - phi_fastmtt
+ - pt_1
+ - pt_2
- pt_dijet
+ - pt_fastmtt
- pt_tt
- pt_ttjj
- pt_vis
- - eta_fastmtt
- - m_fastmtt
- - phi_fastmtt
- - pt_fastmtt
+ - pzetamissvis
+ - q_1
+ - q_2
+ - run
+ - tau_decaymode_1
+ - tau_decaymode_2
+ - weight
diff --git a/docs/corrections.md b/docs/corrections.md
index f2a051c..6282a52 100644
--- a/docs/corrections.md
+++ b/docs/corrections.md
@@ -1,6 +1,7 @@
# Fake Factor corrections
In this step the corrections for the fake factors are calculated. This should be run after the FF calculation step.
+### Configuration
Currently two different correction types are implemented:
1. non closure correction depending on a specific variable
@@ -39,7 +40,30 @@ Each correction has following specifications:
`ARlike_cuts` | `dict` | event selections for the application-like region of the target process that should be replaced compared to the selection used in the previous FF calculation step
`AR_SR_cuts` | `dict` | event selections for a switch from the determination region to the signal/application region, this is only relevant for `DR_SR` corrections
`non_closure` | `dict` | this is only relevant for `DR_SR` corrections, since for this corrections additional fake factors are calculated. It's possible to calculated and apply non closure corrections to these fake factors before calculating the actual DR to SR correction.
-
+
+### Statistical check
+As an option it is possible to check using a sliding-window compatibility test whether a computed correction is statistically consistent with 1.0 (i.e., a flat correction, meaning no genuine fake factor bias). The test works as follows:
+
+1. **Error selection**: Per-bin uncertainties are taken either as the fully propagated errors (data + MC statistical, added in quadrature) or as the data-only "MC-suppressed" errors — depending on the parameter `use_suppressed_mc_errors_for_correction_selection`.
+2. **Sliding-window scan**: Pulls $(y_i - 1)/\sigma_i$ are computed for each bin. The test scans all contiguous windows of 1 to $N$ bins and finds the most significant deviation (minimum p-value):
+
+$$z_\text{window} = \frac{\left|\sum_{i} \text{pull}_i\right|}{\sqrt{N_\text{window}}}$$
+
+3. **Look-elsewhere correction**: A Bonferroni-style correction is applied to account for the scan over multiple windows:
+
+$$p_\text{shape} = 1 - (1 - p_\text{min})^{N_\text{bins}}$$
+
+4. **Auto-skipping**: If `p_shape > skip_corrections_p_value` (correction is compatible with 1) **and** `skip_corrections_compatible_to_one` is `true`, the correction is replaced with a flat 1.0. Bandwidth/shape variations are also set to 1.0. Statistical variations are collapsed to a single inclusive uncertainty. MC-shift variations are either fit to a constant or also set to 1.0.
+
+Three parameter can be set to define this check:
+
+ parameter | type | description
+ ---|---|---
+ `skip_corrections_compatible_to_one` | `bool` | Master switch for automatic skipping of corrections compatible with 1.0. If `True` and a correction passes the p-value threshold (see below), it is replaced by a flat 1.0 correction and all shape/bandwidth uncertainties are also flattened. Statistical uncertainties are collapsed into a single inclusive value. Defaults to `false`.
+ `skip_corrections_p_value` | `float` | P-value threshold for the compatibility-with-1 test. A correction is considered compatible with 1.0 (and thus auto-skipped) if its shape p-value is above this threshold. Higher values are more aggressive (skip more corrections). Only has an effect when `skip_corrections_compatible_to_one` is set to `true`. Defaults to `0.05`.
+ `use_suppressed_mc_errors_for_correction_selection` | `bool` | Controls which per-bin uncertainties are used in the compatibility test. If `True`, only the data statistical uncertainties are used — MC statistical errors from the MC subtraction step are ignored ("suppressed"). If `False`, the fully propagated errors (data + MC statistical uncertainties added in quadrature) are used. Using the suppressed errors avoids MC-dominated samples from artificially increasing the uncertainty and causing genuine corrections to be auto-skipped. Defaults to `true`.
+
+### Running calculations
To run the FF correction step, execute the python script and specify the config file (relative path possible):
```bash
python ff_corrections.py --config-file PATH/CONFIG.yaml
diff --git a/docs/fakefactors.md b/docs/fakefactors.md
index 184dff4..e39fb33 100644
--- a/docs/fakefactors.md
+++ b/docs/fakefactors.md
@@ -1,6 +1,7 @@
# Fake Factor calculation
In this step the fake factors are calculated. This should be run after the preselection step.
+### Configuration
All information for the FF calculation step is defined in a configuration file in the `configs/ANALYSIS/ERA/` folder using the `common_settings.yaml` and a more specific config file. The `common_settings.yaml` has to be named like that and is used for all steps of the fake factor estimation (`preselection`, `FF calculation`, `FF corrections`).
The FF calculation config has the following parameters:
@@ -9,8 +10,9 @@ General options for the calculation:
parameter | type | description
---|---|---
`channel` | `string` | tau pair decay channels ("et", "mt", "tt")
- `use_embedding` | `bool` | True if embedded sample should be used, False if only MC sample should be used
+ `use_embedding` | `bool` | `true` if embedded sample should be used, `false` if only MC sample should be used
`use_center_of_mass_bins` | `bool` | Changes the x-data that is entering FF and correction calculation. If set then a center of mass value is used for the x-data, calculated from events entering the corresponding bin. If not set, the bin centers are used. Default is set to True.
This will not affect FF and correction calculation that are set to `"binwise"` (the x-data values although displayed in plots are not used)
+ `stat_sigma` | `float` | This parameter defines the number of standard deviations to be considered when determining the uncertainties for fit or smoothing parameters, which are then stored in the correctionlib files. Default is `1.0`.
In `target_processes` the processes for which FFs should be calculated (normally for QCD, Wjets, ttbar) are defined.
Each target process needs some specifications:
@@ -41,7 +43,8 @@ In `process_fractions` specifications for the calculation of the process fractio
`SR_cuts` | `list` | see `target_processes`, (optional) not needed for the fraction calculation
**Note:** When using split binning for process fraction calculations, the `var_bins` parameter can also be defined in the same manner as for `target_processes`.
-
+
+### Running calculations
To run the FF calculation step, execute the python script and specify the config file (relative path possible):
```bash
python ff_calculation.py --config-file PATH/CONFIG.yaml
diff --git a/docs/ml_fakefactors.md b/docs/ml_fakefactors.md
new file mode 100644
index 0000000..7fd193f
--- /dev/null
+++ b/docs/ml_fakefactors.md
@@ -0,0 +1,41 @@
+# ML-Based Fake Factor Evaluation
+
+The framework supports the evaluation of fake factors based on machine learning models in the ONNX format instead of the classic approach of measuring them (see [Fake Factor Calculation](fakefactors.md)). ONNXRuntime is utilized to to evaluate the models within ROOT RDataFrames.
+
+## ML Training
+
+Not include yet. Files/models have to be provided externally.
+
+## Configuration
+
+To use ML-based fake factors, the configuration must specify the ONNX model details for each process. These details are defined in a YAML configuration file, which should be located in the `workdir/TAG` directory. The configuration includes:
+
+- **Model Path**: The path to the ONNX model file.
+- **Model Inputs**: A list of input variable names required by the model.
+- **Define Columns**: Optional expressions to define missing columns dynamically.
+
+Example YAML configuration:
+```yaml
+target_processes:
+ QCD:
+ model_path: "models/qcd_fake_factors.onnx"
+ model_input:
+ - "event_parity"
+ - "pt_1"
+ - "njets"
+ ...
+ define_columns:
+ event_parity: event % 2
+ ...
+ Wjets:
+ model_path: "models/wjets_fake_factors.onnx"
+ model_input:
+ - "pt_2"
+ - "nbtag"
+ ...
+ ...
+```
+
+## Notes
+
+- Ensure that the ONNX model is compatible with ONNXRuntime and that the input variable names match those in the configuration.
\ No newline at end of file
diff --git a/docs/preselection.md b/docs/preselection.md
index 9bda137..3b3597a 100644
--- a/docs/preselection.md
+++ b/docs/preselection.md
@@ -1,5 +1,7 @@
# Event preselection
-This framework is designed for n-tuples (and friend trees) produced with CROWN as input.
+This framework is designed for n-tuples (and friend trees) produced with CROWN as input.
+
+### Configuration
All information for the preselection step should be defined in configuration files in the `configs/ANALYSIS/ERA/` folder using the `common_settings.yaml` file and a more specific config file. The `common_settings.yaml` has to be named like that and is used for all steps of the fake factor estimation (`preselection`, `FF calculation`, `FF corrections`).
The preselection config has the following parameters:
@@ -83,6 +85,7 @@ There are two types of weights.
Scale factors for b-tagging and tau ID vs jet are applied on the fly during the FF calculation step.
+### Running preselection
To run the preselection step, execute the python script and specify the config file (relative path possible):
```bash
python preselection.py --config-file configs/PATH/CONFIG.yaml
diff --git a/environment.yml b/environment.yml
index 2f77e1b..2cf305b 100644
--- a/environment.yml
+++ b/environment.yml
@@ -33,22 +33,29 @@ dependencies:
- bzip2=1.0.8=h4bc722e_7
- c-ares=1.34.5=hb9d3cd8_0
- c-compiler=1.11.0=h4d9bdce_0
- - ca-certificates=2025.8.3=hbd8a1cb_0
+ - ca-certificates=2026.2.25=hbd8a1cb_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cairo=1.18.4=h3394656_0
- - certifi=2025.8.3=pyhd8ed1ab_0
+ - certifi=2026.2.25=pyhd8ed1ab_0
- cffi=1.17.1=py312h06ac9bb_0
- cfitsio=4.6.2=ha0b56bc_1
- charset-normalizer=3.4.3=pyhd8ed1ab_0
- click=8.2.1=pyh707e725_0
- colorama=0.4.6=pyhd8ed1ab_1
+ - coloredlogs=15.0.1=pyhd8ed1ab_4
- comm=0.2.3=pyhe01879c_0
- compilers=1.11.0=ha770c72_0
- conda-gcc-specs=14.3.0=hb991d5c_4
- contourpy=1.3.3=py312hd9148b4_1
- correctionlib=2.7.0=py312ha04a795_0
+ - cpython=3.12.13=py312hd8ed1ab_0
- cramjam=2.11.0=py312h848b54d_0
+ - cuda-cudart=12.9.79=h5888daf_0
+ - cuda-cudart_linux-64=12.9.79=h3f2d84a_0
+ - cuda-nvrtc=12.9.86=hecca717_1
+ - cuda-version=12.9=h4f385c5_3
+ - cudnn=9.10.2.21=hbcb9cd8_0
- cxx-compiler=1.11.0=hfcd1e18_0
- cycler=0.12.1=pyhd8ed1ab_1
- cyrus-sasl=2.1.28=hd9c7081_0
@@ -91,6 +98,8 @@ dependencies:
- gl2ps=1.4.2=hae5d5c5_1
- glew=2.1.0=h9c3ff4c_2
- glib-tools=2.84.3=hf516916_0
+ - gmp=6.3.0=hac33072_2
+ - gmpy2=2.3.0=py312hcaba1f9_1
- graphite2=1.3.14=hecca717_1
- graphviz=13.1.2=h87b6fe6_0
- gsl=2.7=he838d99_0
@@ -108,12 +117,14 @@ dependencies:
- hpack=4.1.0=pyhd8ed1ab_0
- httpcore=1.0.9=pyh29332c3_0
- httpx=0.28.1=pyhd8ed1ab_0
+ - humanfriendly=10.0=pyh707e725_8
- hyperframe=6.1.0=pyhd8ed1ab_0
- icu=75.1=he02047a_0
- idna=3.10=pyhd8ed1ab_1
- importlib-metadata=8.7.0=pyhe01879c_1
- importlib-resources=6.5.2=pyhd8ed1ab_0
- importlib_resources=6.5.2=pyhd8ed1ab_0
+ - ipdb=0.13.13=pyhd8ed1ab_1
- ipykernel=6.30.1=pyh82676e8_0
- ipyparallel=9.0.1=pyh29332c3_0
- ipython=9.4.0=pyhfa0c392_0
@@ -143,6 +154,7 @@ dependencies:
- lcms2=2.17=h717163a_0
- ld_impl_linux-64=2.44=h1423503_1
- lerc=4.0.0=h0aef613_1
+ - libabseil=20260107.1=cxx17_h7b12aa8_0
- libasprintf=0.25.1=h3f43e3d_1
- libasprintf-devel=0.25.1=h3f43e3d_1
- libblas=3.9.0=34_h59b9bed_openblas
@@ -152,8 +164,14 @@ dependencies:
- libcblas=3.9.0=34_he106b2a_openblas
- libclang-cpp20.1=20.1.8=default_hddf928d_0
- libclang13=20.1.8=default_ha444ac7_0
+ - libcublas=12.9.1.4=h676940d_1
+ - libcudnn=9.10.2.21=hf7e9902_0
+ - libcudnn-dev=9.10.2.21=h58dd1b1_0
+ - libcufft=11.4.1.4=hecca717_1
- libcups=2.3.3=hb8b1518_5
+ - libcurand=10.3.10.19=h676940d_1
- libcurl=8.14.1=h332b0f4_0
+ - libcusparse=12.5.10.65=hecca717_2
- libdeflate=1.24=h86f0d12_0
- libdrm=2.4.125=hb9d3cd8_0
- libedit=3.1.20250104=pl5321h7949ede_0
@@ -188,6 +206,7 @@ dependencies:
- libnghttp2=1.64.0=h161d5f1_0
- libnsl=2.0.1=hb9d3cd8_1
- libntlm=1.8=hb9d3cd8_0
+ - libnvjitlink=12.9.86=hecca717_2
- libopenblas=0.3.30=pthreads_h94d23a6_1
- libopengl=1.7.0=ha4b6fd6_2
- libpciaccess=0.18=hb9d3cd8_0
@@ -225,10 +244,13 @@ dependencies:
- mistune=3.1.3=pyh29332c3_0
- mkdocs=1.6.1=pyhd8ed1ab_1
- mkdocs-get-deps=0.2.0=pyhd8ed1ab_1
- - mkdocs-material=9.6.16=pyhd8ed1ab_0
+ - mkdocs-material=9.7.6=pyhcf101f3_1
- mkdocs-material-extensions=1.3.1=pyhd8ed1ab_1
+ - mpc=1.4.0=he0a73b1_0
+ - mpfr=4.2.2=he0a73b1_0
- mplhep=0.4.0=pyhd8ed1ab_0
- mplhep_data=0.0.4=pyhd8ed1ab_2
+ - mpmath=1.4.1=pyhd8ed1ab_0
- munkres=1.1.4=pyhd8ed1ab_1
- mypy_extensions=1.1.0=pyha770c72_0
- nbclient=0.10.2=pyhd8ed1ab_0
@@ -241,9 +263,10 @@ dependencies:
- notebook-shim=0.2.4=pyhd8ed1ab_1
- numba=0.61.2=py312h7bcfee6_1
- numpy=2.2.6=py312h72c5963_0
+ - onnxruntime=1.24.2=py312h5fe9fb3_200_cuda129
- openjpeg=2.5.3=h55fea9a_1
- openldap=2.6.10=he970967_0
- - openssl=3.5.2=h26f9b46_0
+ - openssl=3.6.2=h35e630c_0
- overrides=7.7.0=pyhd8ed1ab_1
- packaging=25.0=pyh29332c3_1
- paginate=0.5.7=pyhd8ed1ab_1
@@ -263,6 +286,7 @@ dependencies:
- portalocker=3.2.0=py312h7900ff3_0
- prometheus_client=0.22.1=pyhd8ed1ab_0
- prompt-toolkit=3.0.51=pyha770c72_0
+ - protobuf=6.33.5=py312ha7b3241_2
- psutil=7.0.0=py312h66e93f0_0
- pthread-stubs=0.4=hb9d3cd8_1002
- ptyprocess=0.7.0=pyhd8ed1ab_1
@@ -271,7 +295,7 @@ dependencies:
- pydantic=2.11.7=pyh3cfb1c2_0
- pydantic-core=2.33.2=py312h680f630_0
- pygments=2.19.2=pyhd8ed1ab_0
- - pymdown-extensions=10.16.1=pyhd8ed1ab_0
+ - pymdown-extensions=10.21.2=pyhd8ed1ab_0
- pyparsing=3.2.3=pyhe01879c_2
- pyside6=6.9.1=py312hdb827e4_0
- pysocks=1.7.1=pyha55dd90_7
@@ -279,6 +303,7 @@ dependencies:
- python=3.12.11=h9e4cc4f_0_cpython
- python-dateutil=2.9.0.post0=pyhe01879c_2
- python-fastjsonschema=2.21.1=pyhd8ed1ab_0
+ - python-flatbuffers=25.9.23=pyh1e1bc0e_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2025.2=pyhd8ed1ab_0
- python-xxhash=3.5.0=py312h66e93f0_2
@@ -301,6 +326,7 @@ dependencies:
- rpds-py=0.27.0=py312h868fb18_0
- ruamel.yaml=0.18.14=py312h66e93f0_0
- ruamel.yaml.clib=0.2.8=py312h66e93f0_1
+ - scipy=1.17.1=py312h54fa4ab_0
- scitokens-cpp=1.1.3=h6ac2c77_0
- send2trash=1.8.3=pyh0d859eb_1
- setuptools=80.9.0=pyhff2d567_0
@@ -309,11 +335,13 @@ dependencies:
- sniffio=1.3.1=pyhd8ed1ab_1
- soupsieve=2.7=pyhd8ed1ab_0
- stack_data=0.6.3=pyhd8ed1ab_1
+ - sympy=1.14.0=pyh2585a3b_106
- sysroot_linux-64=2.17=h0157908_18
- tbb=2022.2.0=hb60516a_0
- terminado=0.18.1=pyh0d859eb_0
- tinycss2=1.4.0=pyhd8ed1ab_0
- tk=8.6.13=noxft_hd72426e_102
+ - toml=0.10.2=pyhcf101f3_3
- tomli=2.2.1=pyhe01879c_2
- tornado=6.5.2=py312h4c3975b_0
- tqdm=4.67.1=pyhd8ed1ab_1
diff --git a/ff_calculation.py b/ff_calculation.py
index 75cf08c..33505d6 100644
--- a/ff_calculation.py
+++ b/ff_calculation.py
@@ -7,13 +7,13 @@
import os
from typing import Dict, List, Tuple, Union
+import CustomLogging as logging_helper
import FF_calculation.FF_QCD as FF_QCD
import FF_calculation.FF_ttbar as FF_ttbar
import FF_calculation.FF_Wjets as FF_Wjets
import helper.correctionlib_json as corrlib
import helper.ff_functions as ff_func
import helper.functions as func
-import CustomLogging as logging_helper
from FF_calculation.fractions import fraction_calculation
from helper.hooks_and_patches import Histo1DPatchedRDataFrame
@@ -55,13 +55,15 @@
}
FF_DATA_SCALING_FACTOR_CALCULATION_FUNCTIONS = {
- **{k: lambda *args, **kwargs: (None, None) for k in {
- "QCD",
- "QCD_subleading",
- "Wjets",
- "process_fractions",
- "process_fractions_subleading",
- }
+ **{
+ k: lambda *args, **kwargs: (None, None)
+ for k in {
+ "QCD",
+ "QCD_subleading",
+ "Wjets",
+ "process_fractions",
+ "process_fractions_subleading",
+ }
}, # only necessary for ttbar and ttbar_subleading
"ttbar": FF_ttbar.calculation_FF_data_scaling_factor,
"ttbar_subleading": FF_ttbar.calculation_FF_data_scaling_factor,
@@ -142,9 +144,7 @@ def FF_calculation(
@logging_helper.LogDecorator().grouped_logs(extractor=lambda args: f"ff_calculation.{args[0]}")
-def run_ff_calculation(
- args: Tuple[str, Dict[str, Union[Dict, List, str]], List[str], str]
-) -> Tuple[Tuple, Dict]:
+def run_ff_calculation(args: Tuple[str, Dict[str, Union[Dict, List, str]], List[str], str]) -> Tuple[Tuple, Dict]:
"""
This function can be used for multiprocessing. It runs the fake factor calculation step for a specified process.
diff --git a/ff_corrections.py b/ff_corrections.py
index 5575563..c49861b 100644
--- a/ff_corrections.py
+++ b/ff_corrections.py
@@ -20,9 +20,10 @@
import helper.ff_functions as ff_func
import helper.functions as func
from ff_calculation import FF_calculation
-from helper.ff_evaluators import FakeFactorCorrectionEvaluator, FakeFactorEvaluator, DRSRCorrectionEvaluator
+from helper.ff_evaluators import get_fake_factor_evaluator, FakeFactorCorrectionEvaluator, FakeFactorEvaluator, DRSRCorrectionEvaluator
from helper.hooks_and_patches import Histo1DPatchedRDataFrame
import CustomLogging as logging_helper
+import configs.general_definitions as gd
parser = argparse.ArgumentParser()
@@ -309,7 +310,7 @@ def run_ff_calculation_for_DRtoSR(
str,
Dict[str, Union[Dict, List, str]],
Dict[str, Union[Dict, str]],
- List[str],
+ func.SamplePathList,
str,
]
) -> Union[Dict, None]:
@@ -333,11 +334,25 @@ def run_ff_calculation_for_DRtoSR(
process=process,
to_AR_SR=False,
)
+ if not corr_config["target_processes"][process]["DR_SR"].get("use_orthogonal_fake_factors", True):
+ log.info(f"DR to SR correction for {process} process is configured to not use orthogonal fake factors. Skipping fake factor calculation for DR to SR correction.")
+ return None
+
+ use_data_flag = corr_config["target_processes"][process]["DR_SR"].get("compute_orthogonal_fake_factors_using_data", True)
+ if process.startswith("QCD") and not use_data_flag:
+ raise NotImplementedError("compute_orthogonal_fake_factors_using_data=False is not currently implemented for QCD.")
+ ff_config["target_processes"][process]["compute_orthogonal_fake_factors_using_data"] = use_data_flag
+
log.info(f"Calculating fake factors for the DR to SR correction for the {process} process.")
log.info("-" * 50)
result = FF_calculation(
config=ff_config,
- sample_paths=sample_paths,
+ sample_paths=sample_paths.switch_embedding_state(
+ corr_config["target_processes"][process]["DR_SR"].get(
+ "use_embedding",
+ config.get("use_embedding", False)
+ )
+ ),
output_path=output_path,
process=process,
logger=f"ff_corrections.{process}",
@@ -384,12 +399,12 @@ def run_non_closure_correction_for_DRtoSR(
assert "ttbar" not in process, "ttbar is not supported for DR to SR corrections"
var_dependences = [config["target_processes"][process]["var_dependence"]] + list(config["target_processes"][process]["split_categories"].keys())
- evaluator = FakeFactorEvaluator.loading_from_file(
+ evaluator = get_fake_factor_evaluator(
config=config,
process=process,
var_dependences=var_dependences,
for_DRtoSR=True,
- logger=f"ff_corrections.{process}",
+ logger=f"ff_corrections.{process}.DR_SR",
)
corrections.update(
@@ -398,11 +413,16 @@ def run_non_closure_correction_for_DRtoSR(
corr_config=corr_config,
process=process,
evaluator=evaluator,
- sample_paths=sample_paths,
+ sample_paths=sample_paths.switch_embedding_state(
+ process_config["DR_SR"].get(
+ "use_embedding",
+ config.get("use_embedding", False)
+ )
+ ),
output_path=output_path,
for_DRtoSR=True,
DR_SR_evaluator=None,
- logger=f"ff_corrections.{process}",
+ logger=f"ff_corrections.{process}.DR_SR",
)
)
else:
@@ -451,18 +471,18 @@ def run_correction(
DR_SR_correction = None
if "DR_SR" in corr_config["target_processes"][process]:
- evaluator = FakeFactorEvaluator.loading_from_file(
+ evaluator = get_fake_factor_evaluator(
config=config,
process=process,
var_dependences=var_dependences,
- for_DRtoSR=True,
+ for_DRtoSR=corr_config["target_processes"][process]["DR_SR"].get("use_orthogonal_fake_factors", True),
logger=f"ff_corrections.{process}",
)
corr_evaluators = []
DR_SR_config = corr_config["target_processes"][process]["DR_SR"]
- for corr_var in DR_SR_config["non_closure"].keys():
+ for corr_var in DR_SR_config.get("non_closure", {}).keys():
non_closure_corr_vars_DR_SR = corr_var
if "split_categories" in DR_SR_config["non_closure"][corr_var]:
split_variables = list(DR_SR_config["non_closure"][corr_var]["split_categories"].keys())
@@ -511,10 +531,10 @@ def run_correction(
args_list=[
(
split_collection,
- config,
+ ff_config,
DR_SR_config,
process,
- sample_paths,
+ sample_paths.switch_embedding_state(DR_SR_config.get("use_embedding", ff_config.get("use_embedding", False))),
save_path,
f"ff_corrections.{process}",
evaluator,
@@ -553,7 +573,7 @@ def run_correction(
)
if "non_closure" in corr_config["target_processes"][process]:
- evaluator = FakeFactorEvaluator.loading_from_file(
+ evaluator = get_fake_factor_evaluator(
config=config,
process=process,
var_dependences=var_dependences,
@@ -616,11 +636,21 @@ def run_correction(
if config.get("use_center_of_mass_bins", True):
func.RuntimeVariables.RDataFrameWrapper = Histo1DPatchedRDataFrame
+ func.RuntimeVariables.SKIP_CORRECTIONS_COMPATIBLE_TO_ONE = corr_config.get("skip_corrections_compatible_to_one", False)
+ func.RuntimeVariables.SKIP_CORRECTIONS_P_VALUE = corr_config.get("skip_corrections_p_value", 0.05)
+ func.RuntimeVariables.USE_SUPPRESSED_MC_ERRORS_FOR_CORRECTION_SELECTION = corr_config.get("use_suppressed_mc_errors_for_correction_selection", True)
+
# setting default systematic variations if not present in the config
if "correction_variations" not in corr_config:
- corr_config["correction_variations"] = ("Stat_1Sigma", "Syst_MCShift", "Syst_BandAsym")
+ corr_config["correction_variations"] = gd.default_correction_variations
+ else:
+ if isinstance(corr_config["correction_variations"], str):
+ corr_config["correction_variations"] = [corr_config["correction_variations"]]
+ for var in corr_config["correction_variations"]:
+ if var not in gd.VARIATIONS:
+ raise ValueError(f"Variation {var} is not defined in the general definitions! Please choose from {gd.VARIATIONS} or add the variation to the general definitions if it is missing.")
- ########### needed precalculations for DR to SR corrections ###########
+ # ########## needed precalculations for DR to SR corrections ########## #
# initializing the fake factor calculation for DR to SR corrections
ff_for_DRtoSR_file = os.path.join(
@@ -644,8 +674,8 @@ def run_correction(
test_config = corr_config["target_processes"][proc].get("DR_SR", {})
is_valid_cache = all(
- func.nested_object_comparison(__test_config[k], test_config[k])
- for k in ("SRlike_cuts", "ARlike_cuts", "AR_SR_cuts")
+ func.nested_object_comparison(__test_config.get(k), test_config.get(k))
+ for k in ("SRlike_cuts", "ARlike_cuts", "AR_SR_cuts", "use_embedding", "compute_fake_factors_using_data")
)
else:
is_valid_cache = False
@@ -717,7 +747,7 @@ def run_correction(
for_DRtoSR=True,
)
- ########### real fake factor corrections ###########
+ # ########## non closure fake factor corrections ########## #
corrections = {
"QCD": {},
@@ -746,6 +776,8 @@ def run_correction(
for_DRtoSR=False,
)
+ ff_func.print_statistical_compatibility_summary(DR_SR_corrections, corrections, logger="ff_corrections")
+
with open(os.path.join(save_path, "done"), "w") as done_file:
done_file.write("")
diff --git a/helper/correctionlib_json.py b/helper/correctionlib_json.py
index bba4b25..3263b7c 100644
--- a/helper/correctionlib_json.py
+++ b/helper/correctionlib_json.py
@@ -195,12 +195,12 @@ def generate_ff_corrlib_json(
return cset
-def get_ff_content(category: str, variable_info: Tuple[str, List[float]], ff_info: Dict, variation: str) -> cs.Category:
+def get_ff_content(categories: Tuple[str], variable_info: Tuple[str, List[float]], ff_info: Dict, variation: str) -> cs.Category:
"""
Function which produces a correctionlib Category with Binning as content based on the measured fake factors for a specific category and variation.
Args:
- category: Name of the category for which the content is produced
+ categories: Tuple with category names for which the content is produced
variable_info: Tuple with information (name and binning) about the variable the fake factors depends on
ff_info: Dictionary of fake factor functions as strings, e.g. ff_function[PROCESS][CATEGORY_1][VARIATION]
variation: Name of the variation for which the content is produced
@@ -209,18 +209,18 @@ def get_ff_content(category: str, variable_info: Tuple[str, List[float]], ff_inf
Category object from correctionlib with Binning as content
"""
# this is the case if smoothing is used to estimate the fake factor function
- if "default" in ff_info[category].keys():
- ff_version = "default" if "downsampled" not in ff_info[category] else "downsampled"
- unc_list = list(ff_info[category][ff_version]["variations"].keys())
+ if "default" in ff_info[categories[-1]].keys():
+ ff_version = "default" if "downsampled" not in ff_info[categories[-1]] else "downsampled"
+ unc_list = list(ff_info[categories[-1]][ff_version]["variations"].keys())
if variation not in unc_list and variation != "nominal":
return cs.Binning(
nodetype="binning",
input=variable_info[0],
- edges=list(ff_info[category][ff_version]["nominal"]["edges"]),
- content=list(ff_info[category][ff_version]["nominal"]["content"]),
+ edges=list(ff_info[categories[-1]][ff_version]["nominal"]["edges"]),
+ content=list(ff_info[categories[-1]][ff_version]["nominal"]["content"]),
flow="clamp",
)
- ff_bin_info = ff_info[category][ff_version]["variations"][variation] if variation != "nominal" else ff_info[category][ff_version]["nominal"]
+ ff_bin_info = ff_info[categories[-1]][ff_version]["variations"][variation] if variation != "nominal" else ff_info[categories[-1]][ff_version]["nominal"]
return cs.Binning(
nodetype="binning",
input=variable_info[0],
@@ -229,16 +229,16 @@ def get_ff_content(category: str, variable_info: Tuple[str, List[float]], ff_inf
flow="clamp",
)
# this is the case if a polynomial fit is used to estimate the fake factor function
- elif "nominal" in ff_info[category].keys():
- unc_list = list(ff_info[category]["variations"].keys())
+ elif "nominal" in ff_info[categories[-1]].keys():
+ unc_list = list(ff_info[categories[-1]]["variations"].keys())
if variation not in unc_list and variation != "nominal":
return cs.Binning(
nodetype="binning",
input=variable_info[0],
**get_edges_and_content(
- ff_info[category]["nominal"],
+ ff_info[categories[-1]]["nominal"],
variable_info[0],
- variable_info[1][category],
+ variable_info[1][categories[0]] if len(categories) == 1 else variable_info[1][categories[0]][categories[1]],
),
flow="clamp",
)
@@ -246,14 +246,14 @@ def get_ff_content(category: str, variable_info: Tuple[str, List[float]], ff_inf
nodetype="binning",
input=variable_info[0],
**get_edges_and_content(
- ff_info[category]["variations"][variation] if variation != "nominal" else ff_info[category]["nominal"],
+ ff_info[categories[-1]]["variations"][variation] if variation != "nominal" else ff_info[categories[-1]]["nominal"],
variable_info[0],
- variable_info[1][category],
+ variable_info[1][categories[0]] if len(categories) == 1 else variable_info[1][categories[0]][categories[1]],
),
flow="clamp",
)
else:
- raise ValueError(f"The provided fake factor information for category {category} is not valid. Please check the ff_functions dictionary.")
+ raise ValueError(f"The provided fake factor information for category {categories} is not valid. Please check the ff_functions dictionary.")
def make_1D_ff(
@@ -327,7 +327,7 @@ def make_1D_ff(
input=cat_inputs[0],
edges=process_conf["split_categories_binedges"][cat_inputs[0]],
content=[
- get_ff_content(cat1, variable_info, ff_functions, unc_name) for cat1 in ff_functions
+ get_ff_content((cat1,), variable_info, ff_functions, unc_name) for cat1 in ff_functions
],
flow="clamp",
),
@@ -339,7 +339,7 @@ def make_1D_ff(
input=cat_inputs[0],
edges=process_conf["split_categories_binedges"][cat_inputs[0]],
content=[
- get_ff_content(cat1, variable_info, ff_functions, "nominal") for cat1 in ff_functions
+ get_ff_content((cat1,), variable_info, ff_functions, "nominal") for cat1 in ff_functions
],
flow="clamp",
),
@@ -445,7 +445,7 @@ def make_2D_ff(
else binedges_conf[cat_inputs[1]]
),
content=[
- get_ff_content(cat2, variable_info, ff_functions[cat1], unc_name) for cat2 in ff_functions[cat1]
+ get_ff_content((cat1, cat2), variable_info, ff_functions[cat1], unc_name) for cat2 in ff_functions[cat1]
],
flow="clamp",
)
@@ -470,7 +470,7 @@ def make_2D_ff(
else binedges_conf[cat_inputs[1]]
),
content=[
- get_ff_content(cat2, variable_info, ff_functions[cat1], "nominal") for cat2 in ff_functions[cat1]
+ get_ff_content((cat1, cat2), variable_info, ff_functions[cat1], "nominal") for cat2 in ff_functions[cat1]
],
flow="clamp",
)
@@ -669,6 +669,9 @@ def unique_append(self, item):
binning = correction_conf["var_bins"]
correction_variations = correction_conf.get("correction_variations", gd.default_correction_variations)
+ for var in correction_variations:
+ if var not in gd.VARIATIONS:
+ raise ValueError(f"Variation {var} is not defined in the general definitions! Please choose from {gd.VARIATIONS} or add the variation to the general definitions if it is missing.")
correction_variations = ["".join(it) for it in product(correction_variations, ["Up", "Down"])]
if is_2D:
diff --git a/helper/ff_evaluators.py b/helper/ff_evaluators.py
index ba5bc78..011d7c0 100644
--- a/helper/ff_evaluators.py
+++ b/helper/ff_evaluators.py
@@ -1,13 +1,19 @@
import logging
import os
+import ctypes
from typing import Any, Dict, List, Tuple, Union
+import traceback
import correctionlib
import correctionlib.schemav2 as cs
import ROOT
-import CustomLogging as logging_helper
+import onnxruntime as ort
+import numpy as np
+import helper.functions as func
+
+_DECLARED_CXX_OBJECTS: set = set()
class FakeFactorEvaluator:
"""
@@ -36,16 +42,23 @@ def loading_from_file(
f"fake_factors_{config['channel']}_for_corrections.json",
)
- correctionlib.register_pyroot_binding()
-
log.info(f"Loading fake factor file {path} for process {process}")
- ROOT.gInterpreter.Declare(
- f'auto {process}_{"for_DRtoSR" if for_DRtoSR else ""} = '
- f'correction::CorrectionSet::from_file("{path}")->'
- f'at("{process}_fake_factors");'
- )
-
- return cls(process, var_dependences, for_DRtoSR)
+ correctionlib.register_pyroot_binding()
+
+ _declare_command_object = f"{process}_{'for_DRtoSR' if for_DRtoSR else ''}"
+ _declare_command = " ".join([
+ f"{process}_{'for_DRtoSR' if for_DRtoSR else ''} =",
+ f"correction::CorrectionSet::from_file(\"{path}\") ->",
+ f"at(\"{process}_fake_factors\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, var_dependences, for_DRtoSR, logger=logger)
@classmethod
def loading_from_CorrectionSet(
@@ -63,19 +76,28 @@ def loading_from_CorrectionSet(
log.info(f"Loading fake factor from string for process {process}")
correctionlib.register_pyroot_binding()
- ROOT.gInterpreter.Declare(
- f'auto {process}_{"for_DRtoSR" if for_DRtoSR else ""} = '
- f'correction::CorrectionSet::from_string("{literal}")->'
- f'at("{process}_fake_factors");'
- )
-
- return cls(process, var_dependences, for_DRtoSR)
+
+ _declare_command_object = f"{process}_{'for_DRtoSR' if for_DRtoSR else ''}"
+ _declare_command = " ".join([
+ f"{process}_{'for_DRtoSR' if for_DRtoSR else ''} =",
+ f"correction::CorrectionSet::from_string(\"{literal}\") ->",
+ f"at(\"{process}_fake_factors\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, var_dependences, for_DRtoSR, logger=logger)
def __init__(
self,
process: str,
var_dependences: List[str],
for_DRtoSR: bool,
+ logger: Union[str, logging.Logger, None] = None,
):
"""
Initiating a new evaluator for fake factors using correctionlib.
@@ -92,6 +114,8 @@ def __init__(
self.for_DRtoSR = "for_DRtoSR" if for_DRtoSR else ""
self.var_dependences = var_dependences
+ self.log = logging.getLogger(logger) if logger else logging.getLogger(__name__)
+
@property
def str_var_dependences(self) -> List[str]:
return ", ".join([f'(float){var}' for var in self.var_dependences])
@@ -107,11 +131,13 @@ def evaluate_fake_factor(self, rdf: Any) -> Any:
Return:
root DataFrame object with a new column with the evaluated fake factors
"""
+ self.log.debug(f"Evaluating fake factor for process {self.process} with variables {self.var_dependences} and for_DRtoSR={self.for_DRtoSR}")
eval_str = self.str_var_dependences + ', "nominal"'
rdf = rdf.Define(
f"{self.process}_fake_factor",
- f'{self.process}_{self.for_DRtoSR}->evaluate({{{eval_str}}})',
+ f"{self.process}_{self.for_DRtoSR}->evaluate({{{eval_str}}})",
)
+ self.log.debug(f"Defined column '{self.process}_fake_factor' with evaluation string: {self.process}_{self.for_DRtoSR}->evaluate({{{eval_str}}})")
return rdf
@@ -147,15 +173,23 @@ def loading_from_file(
variable = corr_variable if isinstance(corr_variable, str) else corr_variable[0]
- correctionlib.register_pyroot_binding()
log.info(f"Loading fake factor correction file {path} for process {process}")
- ROOT.gInterpreter.Declare(
- f'auto {process}_corr_{variable}_{_for_DRtoSR} = '
- f'correction::CorrectionSet::from_file("{path}")'
- f'->at("{process}_non_closure_{variable}_correction");'
- )
-
- return cls(process, corr_variable, for_DRtoSR)
+ correctionlib.register_pyroot_binding()
+
+ _declare_command_object = f"{process}_corr_{variable}_{_for_DRtoSR}"
+ _declare_command = " ".join([
+ f"{process}_corr_{variable}_{_for_DRtoSR} =",
+ f"correction::CorrectionSet::from_file(\"{path}\") ->",
+ f"at(\"{process}_non_closure_{variable}_correction\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, corr_variable, for_DRtoSR, logger=logger)
@classmethod
def loading_from_CorrectionSet(
@@ -175,19 +209,28 @@ def loading_from_CorrectionSet(
log.info(f"Loading fake factor correction from string for process {process}")
correctionlib.register_pyroot_binding()
- ROOT.gInterpreter.Declare(
- f'auto {process}_corr_{variable}_{"for_DRtoSR" if for_DRtoSR else ""} = '
- f'correction::CorrectionSet::from_string("{literal}")'
- f'->at("{process}_non_closure_{variable}_correction");'
- )
-
- return cls(process, corr_variable, for_DRtoSR)
+
+ _declare_command_object = f"{process}_corr_{variable}_{'for_DRtoSR' if for_DRtoSR else ''}"
+ _declare_command = " ".join([
+ f"{process}_corr_{variable}_{'for_DRtoSR' if for_DRtoSR else ''} =",
+ f"correction::CorrectionSet::from_string(\"{literal}\") ->",
+ f"at(\"{process}_non_closure_{variable}_correction\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, corr_variable, for_DRtoSR, logger=logger)
def __init__(
self,
process: str,
corr_variable: Union[str, Tuple[str, ...]],
for_DRtoSR: bool,
+ logger: Union[str, logging.Logger, None] = None,
):
"""
Initiating a new evaluator for fake factor corrections using correctionlib.
@@ -210,6 +253,8 @@ def __init__(
self.variable = corr_variable
self.var_dependences = [corr_variable]
+ self.log = logging.getLogger(logger) if logger else logging.getLogger(__name__)
+
@property
def corr_str(self) -> str:
return f"{self.process}_ff_corr_{self.variable}"
@@ -228,11 +273,13 @@ def evaluate_correction(self, rdf: Any) -> Any:
Return:
root DataFrame object with a new column with the evaluated fake factor corrections
"""
+ self.log.debug(f"Evaluating fake factor correction for process {self.process} with variable {self.variable} and for_DRtoSR={self.for_DRtoSR}")
eval_str = self.str_var_dependences + ', "nominal"'
rdf = rdf.Define(
self.corr_str,
f"{self.process}_corr_{self.variable}_{self.for_DRtoSR}->evaluate({{{eval_str}}})",
)
+ self.log.debug(f"Defined column '{self.corr_str}' with evaluation string: {self.process}_corr_{self.variable}_{self.for_DRtoSR}->evaluate({{{eval_str}}})")
return rdf
@@ -267,16 +314,23 @@ def loading_from_file(
directories = ["workdir", config["workdir_name"], config["era"]]
path = os.path.join(*directories, f"FF_corrections_{config['channel']}.json")
- correctionlib.register_pyroot_binding()
log.info(f"Loading DR_SR correction from file {path} for process {process}")
-
- ROOT.gInterpreter.Declare(
- f'auto {process}_corr_DR_SR = '
- f'correction::CorrectionSet::from_file("{path}")'
- f'->at("{process}_DR_SR_correction");'
- )
-
- return cls(process, corr_variable)
+ correctionlib.register_pyroot_binding()
+
+ _declare_command_object = f"{process}_corr_DR_SR"
+ _declare_command = " ".join([
+ f"{process}_corr_DR_SR =",
+ f"correction::CorrectionSet::from_file(\"{path}\") ->",
+ f"at(\"{process}_DR_SR_correction\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, corr_variable, logger=logger)
@classmethod
def loading_from_CorrectionSet(
@@ -306,19 +360,27 @@ def loading_from_CorrectionSet(
log.info(f"Loading DR_SR correction from string for process {process}")
correctionlib.register_pyroot_binding()
-
- ROOT.gInterpreter.Declare(
- f'auto {process}_corr_DR_SR = '
- f'correction::CorrectionSet::from_string("{literal}")'
- f'->at("{process}_DR_SR_correction");'
- )
-
- return cls(process, corr_variable)
+
+ _declare_command_object = f"{process}_corr_DR_SR"
+ _declare_command = " ".join([
+ f"{process}_corr_DR_SR =",
+ f"correction::CorrectionSet::from_string(\"{literal}\") ->",
+ f"at(\"{process}_DR_SR_correction\");",
+ ])
+
+ if _declare_command_object not in _DECLARED_CXX_OBJECTS:
+ ROOT.gInterpreter.Declare(f"std::shared_ptr {_declare_command_object} = nullptr;")
+ _DECLARED_CXX_OBJECTS.add(_declare_command_object)
+
+ ROOT.gInterpreter.ProcessLine(_declare_command)
+
+ return cls(process, corr_variable, logger=logger)
def __init__(
self,
process: str,
corr_variable: Union[str, Tuple[str, ...]],
+ logger: Union[str, logging.Logger, None] = None
):
"""
Initializes the DR to SR correction evaluator.
@@ -338,6 +400,8 @@ def __init__(
self.variable = corr_variable
self.var_dependences = [corr_variable]
+ self.log = logging.getLogger(logger) if logger else logging.getLogger(__name__)
+
@property
def corr_str(self) -> str:
"""
@@ -362,12 +426,209 @@ def evaluate_correction(self, rdf: Any) -> Any:
Returns:
root DataFrame object with a new column with the evaluated DR to SR corrections
"""
+ self.log.debug(f"Evaluating DR to SR correction for process {self.process} with variable {self.variable}")
eval_str = ", ".join(self.str_var_dependences) + ', "nominal"'
-
root_corr_name = f"{self.process}_corr_DR_SR"
-
rdf = rdf.Define(
self.corr_str,
f"{root_corr_name}->evaluate({{{eval_str}}})",
)
+ self.log.debug(f"Defined column '{self.corr_str}' with evaluation string: {root_corr_name}->evaluate({{{eval_str}}})")
+ return rdf
+
+
+def _onnx_ctypes_callback(proc_id: int, features_ptr) -> float:
+ """
+ Pure Python function called natively from C++.
+ Intercepts raw memory pointer of the RDataFrame array, zero-copy casts it to numpy, and
+ runs ONNX inference.
+ """
+ try:
+ proc_info = _PROCESS_ID_MAP[proc_id]
+ sess = proc_info["sess"]
+ ort_input_names = proc_info["ort_input_names"]
+ ort_input_shapes = proc_info["ort_input_shapes"]
+ n_features = proc_info["n_features"]
+
+ features_array = np.ctypeslib.as_array(features_ptr, shape=(n_features,))
+
+ if len(ort_input_names) == 1:
+ expected_shape = ort_input_shapes[0]
+
+ target_shape = [1 if not isinstance(dim, int) else dim for dim in expected_shape] # Potential dynamic batch dim -> 1.
+
+ tensor = features_array.astype(np.float32).reshape(target_shape)
+ inputs = {ort_input_names[0]: tensor}
+ else:
+ inputs = {}
+ for name, shape, val in zip(ort_input_names, ort_input_shapes, features_array):
+ target_shape = [1 if not isinstance(dim, int) else dim for dim in shape]
+ inputs[name] = np.array([val], dtype=np.float32).reshape(target_shape)
+
+ out = sess.run(None, inputs)
+
+ return float(np.asarray(out[0]).item())
+ except Exception:
+ traceback.print_exc()
+ return -999.0
+
+
+# Global reference to the ctypes callback to avoid Python's Garbage Collector
+_PROCESS_ID_MAP = {}
+_C_CALLBACK_TYPE = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_int, ctypes.POINTER(ctypes.c_double))
+
+_GLOBAL_C_CALLBACK = _C_CALLBACK_TYPE(_onnx_ctypes_callback)
+_GLOBAL_C_CALLBACK_PTR = ctypes.cast(_GLOBAL_C_CALLBACK, ctypes.c_void_p).value
+
+
+class ONNXFakeFactorEvaluator:
+ """
+ Evaluator class to initiate a fake factor setup utilizing ONNX models.
+ The fake factors are evaluated dynamically inside the RDataFrame via ONNXRuntime.
+ """
+
+ @classmethod
+ def loading_from_config(
+ cls,
+ nn_config: Dict[str, Any],
+ process: str,
+ logger: str,
+ ) -> "ONNXFakeFactorEvaluator":
+ log = logging.getLogger(logger)
+ if process not in nn_config.get("target_processes", {}):
+ raise KeyError(f"Process {process} not found in NN config.")
+
+ proc_config = nn_config["target_processes"][process]
+ model_path = proc_config["model_path"]
+ model_inputs = proc_config["model_input"]
+ define_cols = proc_config.get("define_columns", {})
+
+ log.info(f"Loading ONNX model from {model_path} for process {process}")
+ return cls(process, model_path, model_inputs, define_cols, logger)
+
+ def __init__(
+ self,
+ process: str,
+ model_path: str,
+ model_inputs: List[str],
+ define_columns: Dict[str, str],
+ logger: Union[str, logging.Logger, None] = None
+ ):
+ self.process = process
+ self.model_path = model_path
+ self.model_inputs = model_inputs
+ self.define_columns = define_columns
+ self.cxx_func_name = f"onnx_eval_{self.process}"
+
+ self.log = logging.getLogger(logger) if logger else logging.getLogger(__name__)
+ self._is_initialized = False
+
+ def __getstate__(self):
+ """
+ Called when passing this object to a Multiprocessing Worker.
+ """
+ state = self.__dict__.copy()
+ state['_is_initialized'] = False
+ return state
+
+ def __setstate__(self, state):
+ """
+ Called inside the Multiprocessing Worker to rebuild the object.
+ """
+ self.__dict__.update(state)
+
+ def _initialize_worker_state(self):
+ """
+ Lazy Initialization: Sets up ONNX and ROOT C++ injection the first
+ time it is needed in whatever process calls it.
+ """
+ if self._is_initialized:
+ return
+
+ self.log.debug(f"Initializing ONNX Session and C++ context for {self.process} in PID {os.getpid()}")
+
+ if self.process not in [info.get("process") for info in _PROCESS_ID_MAP.values()]:
+ self.proc_id = len(_PROCESS_ID_MAP)
+ sess = ort.InferenceSession(self.model_path)
+ _PROCESS_ID_MAP[self.proc_id] = {
+ "process": self.process,
+ "sess": sess,
+ "ort_input_names": [inp.name for inp in sess.get_inputs()],
+ "ort_input_shapes": [inp.shape for inp in sess.get_inputs()],
+ "n_features": len(self.model_inputs)
+ }
+ else:
+ self.proc_id = next(k for k, v in _PROCESS_ID_MAP.items() if v["process"] == self.process)
+
+ if not hasattr(ROOT, self.cxx_func_name): # Inject C++ Code into ROOT's Cling compiler just once
+ cpp_code = f"""
+ #ifndef ONNX_EVAL_DEFINED_{self.process}
+ #define ONNX_EVAL_DEFINED_{self.process}
+ double {self.cxx_func_name}(const ROOT::VecOps::RVec& features) {{
+ typedef double (*CallbackType)(int, const double*);
+ CallbackType cb = (CallbackType){_GLOBAL_C_CALLBACK_PTR}ULL;
+ return cb({self.proc_id}, features.data());
+ }}
+ #endif
+ """
+ self.log.debug(f"Registering C++ execution wrapper for {self.process}")
+ ROOT.gInterpreter.Declare(cpp_code)
+
+ self._is_initialized = True
+
+ def evaluate_fake_factor(self, rdf: Any) -> Any:
+ """
+ Evaluating the fake factors using the loaded ONNX model.
+ Missing columns are defined on the fly before inference.
+ """
+ self._initialize_worker_state()
+
+ existing_cols = [str(_column) for _column in rdf.GetColumnNames()]
+
+ for column, expression in self.define_columns.items():
+ if column not in existing_cols:
+ self.log.debug(f"Defining column '{column}' as '{expression}'")
+ rdf = rdf.Define(column, expression)
+
+ rvec_col = f"{self.process}_onnx_features"
+ inputs_csv = ", ".join([f"(double){inp}" for inp in self.model_inputs])
+ rvec_expr = f"ROOT::VecOps::RVec{{{inputs_csv}}}"
+ self.log.debug(f"Packaging {len(self.model_inputs)} inputs into RVec column '{rvec_col}'")
+ rdf = rdf.Define(rvec_col, rvec_expr)
+
+ self.log.info(f"Applying ONNX fake factor evaluation for {self.process}")
+ rdf = rdf.Define(f"{self.process}_fake_factor", f"{self.cxx_func_name}({rvec_col})")
+
return rdf
+
+
+def get_fake_factor_evaluator(
+ config: Dict[str, Any],
+ process: str,
+ var_dependences: List[str],
+ for_DRtoSR: bool,
+ logger: str,
+) -> Union[FakeFactorEvaluator, ONNXFakeFactorEvaluator]:
+ """
+ Factory function to decide whether to load the traditional correctionlib JSON
+ evaluator or the new ONNX YAML-based NN evaluator.
+ """
+
+ log = logging.getLogger(logger)
+
+ directories = ["workdir", config["workdir_name"], config["era"]]
+ if not for_DRtoSR:
+ yaml_path = os.path.join(*directories, f"fake_factors_models_{config['channel']}.yaml")
+ else:
+ yaml_path = os.path.join(*directories, f"fake_factors_models_DR_SR_{config['channel']}.yaml")
+
+ if os.path.exists(yaml_path):
+ log.info(f"Found FF model config at {yaml_path}. Attempting to load ONNX-based evaluator for process {process}.")
+ with open(yaml_path, "r") as f:
+ nn_config = func.configured_yaml.load(f)
+ log.debug(f"NN Config for process {process}: {nn_config.get('target_processes', {}).get(process, {})}")
+ if process in nn_config.get("target_processes", {}) and "model_path" in nn_config["target_processes"][process]:
+ return ONNXFakeFactorEvaluator.loading_from_config(nn_config, process, logger)
+
+ log.info(f"Using classic correctionlib-based FakeFactorEvaluator for process {process}")
+ return FakeFactorEvaluator.loading_from_file(config, process, var_dependences, for_DRtoSR, logger)
diff --git a/helper/ff_functions.py b/helper/ff_functions.py
index 90070d1..8124425 100644
--- a/helper/ff_functions.py
+++ b/helper/ff_functions.py
@@ -5,6 +5,7 @@
import array
import functools
import inspect
+import io
import itertools as itt
import logging
import os
@@ -16,6 +17,9 @@
import numpy as np
import ROOT
+import scipy.stats
+from rich.console import Console
+from rich.table import Table
from wurlitzer import STDOUT, pipes
import configs.general_definitions as gd
@@ -93,9 +97,70 @@ def wrapper(*args: Any, **kwargs: Any) -> ROOT.RDataFrame:
log.warning("Filter resulted in zero events. Creating an empty snapshot with the correct schema.")
f = ROOT.TFile(cache_filepath, "RECREATE")
tree = ROOT.TTree(tree_name, tree_name)
+ _refs = [] # References needed in memory to prevent GC during branch assignment
for c in cols:
- arr = ROOT.std.vector("float")()
- tree.Branch(c, arr)
+ col_type = str(filtered_rdf.GetColumnType(c)).replace("ROOT::VecOps::RVec", "std::vector")
+
+ if col_type.startswith("std::vector"):
+ inner = col_type[col_type.find("<") + 1: col_type.rfind(">")]
+ try:
+ vec = ROOT.std.vector(inner)()
+ except Exception:
+ vec = ROOT.std.vector("float")()
+ tree.Branch(c, vec)
+ _refs.append(vec)
+ elif "string" in col_type:
+ s = ROOT.std.string()
+ tree.Branch(c, s)
+ _refs.append(s)
+ elif "Double" in col_type or "double" in col_type:
+ arr = array.array("d", [0.0])
+ tree.Branch(c, arr, f"{c}/D")
+ _refs.append(arr)
+ elif "Float" in col_type or "float" in col_type:
+ arr = array.array("f", [0.0])
+ tree.Branch(c, arr, f"{c}/F")
+ _refs.append(arr)
+ elif "ULong" in col_type or "unsigned long" in col_type:
+ arr = array.array("Q", [0])
+ tree.Branch(c, arr, f"{c}/l")
+ _refs.append(arr)
+ elif "Long" in col_type or "long" in col_type:
+ arr = array.array("q", [0])
+ tree.Branch(c, arr, f"{c}/L")
+ _refs.append(arr)
+ elif "UInt" in col_type or "unsigned int" in col_type:
+ arr = array.array("I", [0])
+ tree.Branch(c, arr, f"{c}/i")
+ _refs.append(arr)
+ elif "Int" in col_type or "int" in col_type:
+ arr = array.array("i", [0])
+ tree.Branch(c, arr, f"{c}/I")
+ _refs.append(arr)
+ elif "UShort" in col_type or "unsigned short" in col_type:
+ arr = array.array("H", [0])
+ tree.Branch(c, arr, f"{c}/s")
+ _refs.append(arr)
+ elif "Short" in col_type or "short" in col_type:
+ arr = array.array("h", [0])
+ tree.Branch(c, arr, f"{c}/S")
+ _refs.append(arr)
+ elif "UChar" in col_type or "unsigned char" in col_type:
+ arr = array.array("B", [0])
+ tree.Branch(c, arr, f"{c}/b")
+ _refs.append(arr)
+ elif "Char" in col_type or "char" in col_type:
+ arr = array.array("b", [0])
+ tree.Branch(c, arr, f"{c}/B")
+ _refs.append(arr)
+ elif "Bool" in col_type or "bool" in col_type:
+ arr = array.array("b", [0])
+ tree.Branch(c, arr, f"{c}/O")
+ _refs.append(arr)
+ else:
+ arr = array.array("f", [0.0])
+ tree.Branch(c, arr, f"{c}/F")
+ _refs.append(arr)
tree.Write()
f.Close()
else:
@@ -618,19 +683,20 @@ def rng_seed(seed: int) -> Generator[None, None, None]:
def controlplot_samples(
- use_embedding: bool,
+ sample_paths: List[str],
add_qcd: bool = True,
) -> List[str]:
"""
Returns the list of samples that should be used for the control plots.
Args:
- use_embedding: Boolean to use embedding or MC for genuine tau processes
+ sample_paths: List of sample paths to dynamically determine if embedding is used
add_qcd: Add QCD samples to the collection of samples to be plotted.
Returns:
List of samples used for controlplots
"""
+ use_embedding = any("embedding" == p.rsplit("/")[-1].rsplit(".")[0] for p in sample_paths)
samples = [
"diboson_J",
"diboson_L",
@@ -944,48 +1010,11 @@ def calc_fraction(hists: Dict[str, Any], target: str, processes: List[str]) -> A
return frac
-def add_fraction_variations(
- hists: Dict[str, Any], processes: List[str]
-) -> Dict[str, Dict[str, Any]]:
- """
- Function which calculates variations of fraction histograms. The fraction of each process
- is once varied up by 7% and all other processes are varied down by 7%. The same is done
- the other way around to get the down variations.
-
- Args:
- hists: Dictionary with fraction histograms for all considered processes
- processes: List of all cosidered processes for the fraction calculation
-
- Return:
- Dictionary with nominal, up and down variations of all considered processes
- """
- variations = dict()
- variations["nominal"] = dict(hists)
-
- for p in processes:
- hists_up = dict(hists)
- hists_down = dict(hists)
- for hist in hists:
- hists_up[hist] = hists_up[hist].Clone()
- hists_down[hist] = hists_down[hist].Clone()
- if p == hist:
- hists_up[hist].Scale(1.07)
- hists_down[hist].Scale(0.93)
- else:
- hists_up[hist].Scale(0.93)
- hists_down[hist].Scale(1.07)
- proc_name = "TTbar" if p == "ttbar_J" else p
- variations[f"frac{proc_name}UncUp"] = hists_up
- variations[f"frac{proc_name}UncDown"] = hists_down
-
- return variations
-
-
def get_yields_from_hists(
hists: Dict[str, Dict[str, Any]], processes: List[str]
) -> Dict[str, Dict[str, Dict[str, List[float]]]]:
"""
- This function transforms fraction histograms (with variations) obtained from calc_fraction() and add_fraction_variations()
+ This function transforms fraction histograms (with variations) obtained from calc_fraction()
into lists of fraction values which are later used to produce in the production of the correctionlib file.
Args:
@@ -1068,11 +1097,12 @@ def build_TGraph(
def fit_function(
- ff_hists: Union[List[Any], Any],
+ ff_hists: Union[List[Any]],
bin_edges: List[int],
logger: str,
fit_option: Union[str, List[str]],
limit_kwargs: Dict[str, Any],
+ stat_sigma: float = 1.0,
) -> Tuple[
Union[ROOT.TH1, ROOT.TGraphAsymmErrors],
Dict[str, Any],
@@ -1085,13 +1115,15 @@ def fit_function(
the fitted function variations are generated which are later used for plotting purposes.
Args:
- ff_hists: Either a list of nominal and MC varied ratio histograms or only the nominal ratio histogram
+ ff_hists: A list of either nominal and MC varied ratio histograms or only the nominal ratio histogram
bin_edges: Bins edges of the fitted variable, needed for the graphs for plotting
logger: Name of the logger that should be used
fit_option: List[str] correspond to a list of poly_n fits to be performed
with best fit being the one with the lowest chi2/ndf value and nominal and
variations being > 0 in fit range.
str: "poly_n" or "binwise", where n is the order of the polynomial fit
+ limit_kwargs: Dictionary with the keyword arguments for the fit limits, needs to include "limit_x" which is the upper limit of the fit range in x
+ stat_sigma: Sigma for the statistical uncertainty variation, default is 1.0 which corresponds
Return:
1. Initial histogram as TGraph or TH1 (if binwise is used)
@@ -1103,11 +1135,11 @@ def fit_function(
fit_option = [fit_option]
do_mc_subtr_unc, ff_hist_up, ff_hist_down = False, None, None
- if isinstance(ff_hists, list):
+ if isinstance(ff_hists, list) and len(ff_hists) == 3:
ff_hist, ff_hist_up, ff_hist_down = ff_hists
do_mc_subtr_unc = True
else:
- ff_hist = ff_hists
+ ff_hist = ff_hists[0]
retrival_function, convert = fitting_helper.get_wrapped_functions_from_fits, True
if fit_option == "binwise":
@@ -1123,6 +1155,7 @@ def fit_function(
function_collection=fit_option,
verbose=True,
limit_x=limit_kwargs["limit_x"],
+ stat_sigma=stat_sigma,
)
return (
@@ -1134,7 +1167,7 @@ def fit_function(
def calculate_non_closure_correction(
- SRlike: Dict[str, Any], ARlike: Dict[str, Any]
+ SRlike: Dict[str, Any], ARlike: Dict[str, Any], skip_frac: bool = False,
) -> Tuple[Any, Any]:
"""
Function which calculates non closure corrections based on the histograms from the determination regions.
@@ -1142,12 +1175,13 @@ def calculate_non_closure_correction(
Args:
SRlike: Dictionary with histograms from the signal-like determination region for all relevant processes
ARlike: Dictionary with histograms from the application-like determination region for all relevant processes
+ skip_frac: Skip the fraction scaling if FF * AR-like matches SR-like by FF construction. Set frac to 1.0
Return:
1. Ratio histogram of data (MC subtracted) in a signal-like region and data (scaled to MC subtracted) with applied fake factors in an application-like region,
2. Process fraction in the application-like region
"""
- frac = ARlike["data_subtracted"].GetMaximum() / ARlike["data"].GetMaximum()
+ frac = 1.0 if skip_frac else (ARlike["data_subtracted"].GetMaximum() / ARlike["data"].GetMaximum())
predicted = ARlike["data_ff"].Clone()
predicted.Scale(frac)
@@ -1289,7 +1323,7 @@ def sparsify(edges: np.ndarray, values: np.ndarray, threshold: float = 0.01) ->
step_size = v_range * threshold
if step_size == 0: # all values are identical
- return np.array([edges[0], edges[-1]]), np.array([values[0], values[-1]])
+ return np.array([edges[0], edges[-1]]), np.array([values[0]])
total_variation = cumulative_activity[-1]
@@ -1312,6 +1346,97 @@ def sparsify(edges: np.ndarray, values: np.ndarray, threshold: float = 0.01) ->
return new_edges, new_values
+def fit_to_constant(hist: ROOT.TH1) -> Tuple[float, float]:
+ """
+ Fits a histogram to a constant function and returns the fit result and its uncertainty.
+
+ Args:
+ hist: ROOT histogram to be fitted
+ Return:
+ Tuple containing the fit result (constant value) and its uncertainty
+ """
+ fit_res = hist.Fit("pol0", "SQN0")
+ if int(fit_res) == 0:
+ return fit_res.Get().Parameter(0), fit_res.Get().ParError(0)
+ else:
+ return 1.0, 0.0
+
+
+def statistical_check(
+ hist: Any,
+ corr_dict: Dict[str, np.ndarray],
+ mc_shifted_hist: Union[Dict[str, Any], None] = None,
+ stat_sigma: float = 1.0,
+) -> Dict[str, np.ndarray]:
+ """
+ Performs chi2 test checking compatibility of the correction with 1.0 within data uncertainties.
+ If compatible and RuntimeVariables.AUTO_SKIP_COMPATIBLE is True, correction is set to 1.0.
+ Then additionally stat. and bandwidth variations are set to 1.0. For MCShift variations,
+ if histograms are provided, they are fitted to a constant if RuntimeVariables.AUTO_SKIP_UNCERTAINTIES
+ is False, otherwise they are set to 1.0 as well.
+
+ Args:
+ hist: Histogram of the correction to be checked
+ corr_dict: Dictionary with the correction values and variations to be updated if the correction is compatible with 1.0
+ mc_shifted_hist: Dictionary with histograms for the MC shifted variations
+
+ Return:
+ Updated corr_dict with additional keys:
+ "p_value": p-value of the chi2 test,
+ "_auto_skipped": boolean if correction was set to 1.0
+ """
+
+ _, _, y_val, _, _, err_dn, err_up = build_TGraph(hist, return_components=True, add_xerrors_in_graph=True)
+
+ if hasattr(hist, "_extra_base_errors_mc_suppressed"):
+ if func.RuntimeVariables.USE_SUPPRESSED_MC_ERRORS_FOR_CORRECTION_SELECTION:
+ err = hist._extra_base_errors_mc_suppressed.clip(min=1e-6)
+ else:
+ err = hist._extra_base_errors_std.clip(min=1e-6)
+ else: # fallback
+ err = (np.array(err_dn) + np.array(err_up)).clip(min=1e-6) / 2.0
+
+ y, ndf = np.array(y_val), len(y_val)
+
+ p_value_shape_min, pulls = 1.0, (y - 1.0) / err
+ for window in range(1, ndf + 1):
+ for i in range(ndf - window + 1):
+ z_window = np.abs(np.sum(pulls[i: i + window]) / np.sqrt(window))
+ p_window = scipy.stats.norm.sf(z_window) * 2 # two-sided p-value
+ if p_window < p_value_shape_min:
+ p_value_shape_min = p_window
+ p_shape = 1 - (1 - p_value_shape_min) ** len(y)
+
+ corr_dict["default"]["p_value"] = p_shape
+
+ if (corr_dict["default"]["p_value"] > func.RuntimeVariables.SKIP_CORRECTIONS_P_VALUE) and func.RuntimeVariables.SKIP_CORRECTIONS_COMPATIBLE_TO_ONE:
+ corr_dict["default"]["_auto_skipped"] = True
+ corr_dict["default"]["nominal"] = np.ones_like(corr_dict["default"]["nominal"])
+
+ stat_unct_inclusive = 1.0 / np.sqrt(np.sum(1.0 / err**2))
+
+ for key in list(corr_dict["default"]["variations"].keys()):
+ if key.startswith((gd.VARIATIONS.SYST_BAND_ASYM, gd.VARIATIONS.SYST_BAND_HIGH, gd.VARIATIONS.SYST_BAND_LOW)): # No bandwidth uncertainty for a flat line
+ corr_dict["default"]["variations"][key] = np.ones_like(corr_dict["default"]["variations"][key])
+ elif key.startswith(gd.VARIATIONS.STAT) and "Up" in key:
+ corr_dict["default"]["variations"][key] = np.full_like(corr_dict["default"]["variations"][key], 1.0 + stat_sigma * stat_unct_inclusive)
+ elif key.startswith(gd.VARIATIONS.STAT) and "Down" in key:
+ corr_dict["default"]["variations"][key] = np.full_like(corr_dict["default"]["variations"][key], 1.0 - stat_sigma * stat_unct_inclusive)
+
+ if mc_shifted_hist is None:
+ corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Up"] = np.ones_like(corr_dict["default"]["nominal"])
+ corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Down"] = np.ones_like(corr_dict["default"]["nominal"])
+ else:
+ mc_up_val, _ = fit_to_constant(mc_shifted_hist["MCShiftUp"])
+ mc_dn_val, _ = fit_to_constant(mc_shifted_hist["MCShiftDown"])
+ corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Up"] = np.full_like(corr_dict["default"]["nominal"], mc_up_val)
+ corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Down"] = np.full_like(corr_dict["default"]["nominal"], mc_dn_val)
+ else:
+ corr_dict["default"]["_auto_skipped"] = False
+
+ return corr_dict
+
+
def smooth_function(
hist: Any,
bin_edges: List[float],
@@ -1319,8 +1444,8 @@ def smooth_function(
bandwidth: float,
bandwidth_variations: tuple = (0.5, 1.5),
mc_shifted_hist: Union[Dict[str, ROOT.TH1D], None] = None,
+ stat_sigma: float = 1.0,
sparsify_threshold: float = 0.01,
- for_FF: bool = False,
) -> Tuple[Any, Dict[str, np.ndarray]]:
"""
This function performs a smoothing fit of a histogram. Smoothing is mainly used for the corrections of the fake factors.
@@ -1333,7 +1458,6 @@ def smooth_function(
bandwidth: Bandwidth parameter for the kernel regression estimator
bandwidth_variations: Tuple with factors to vary the bandwidth up and down if correction_option is not "binwise"
sparsify_threshold: Threshold for sparsifying the smoothed function
- for_FF: If True, the function is used for fake factor calculations and returns a different set of keys
Return:
1. Initial root TGraph of the histogram,
2. Dictionary of arrays with information about the smoothed function values to be stored with correctionlib (nominal and variations)
@@ -1346,7 +1470,7 @@ def smooth_function(
bandwidth=bandwidth,
)
- nominal_graph, corr_dict = _smooth_function(**_kwargs, stat_sigma=1.0)
+ nominal_graph, corr_dict = _smooth_function(**_kwargs, stat_sigma=stat_sigma)
corr_dict["default"]["variations"][gd.VARIATIONS.STAT + "Up"] = corr_dict["default"]["variations"]["StatUp"]
corr_dict["default"]["variations"][gd.VARIATIONS.STAT + "Down"] = corr_dict["default"]["variations"]["StatDown"]
@@ -1357,8 +1481,8 @@ def smooth_function(
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Up"] = corr_dict["default"]["nominal"]
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Down"] = corr_dict["default"]["nominal"]
else:
- _, _mc_sub_up = _smooth_function(**{**_kwargs, "hist": mc_shifted_hist["MCShiftUp"]})
- _, _mc_sub_down = _smooth_function(**{**_kwargs, "hist": mc_shifted_hist["MCShiftDown"]})
+ _, _mc_sub_up = _smooth_function(**{**_kwargs, "hist": mc_shifted_hist["MCShiftUp"], "stat_sigma": stat_sigma})
+ _, _mc_sub_down = _smooth_function(**{**_kwargs, "hist": mc_shifted_hist["MCShiftDown"], "stat_sigma": stat_sigma})
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Up"] = _mc_sub_up["default"]["nominal"]
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_MC + "Down"] = _mc_sub_down["default"]["nominal"]
@@ -1386,8 +1510,8 @@ def smooth_function(
"content": corr_dict["default"]["variations"][key],
}
else:
- _, _high = _smooth_function(**{**_kwargs, "bandwidth": bandwidth * bandwidth_variations[1]})
- _, _low = _smooth_function(**{**_kwargs, "bandwidth": bandwidth * bandwidth_variations[0]})
+ _, _high = _smooth_function(**{**_kwargs, "bandwidth": bandwidth * bandwidth_variations[1], "stat_sigma": stat_sigma})
+ _, _low = _smooth_function(**{**_kwargs, "bandwidth": bandwidth * bandwidth_variations[0], "stat_sigma": stat_sigma})
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_BAND_HIGH + "Up"] = (_high["default"]["nominal"] - corr_dict["default"]["nominal"]) + corr_dict["default"]["nominal"]
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_BAND_HIGH + "Down"] = (corr_dict["default"]["nominal"] - _high["default"]["nominal"]) + corr_dict["default"]["nominal"]
@@ -1397,6 +1521,8 @@ def smooth_function(
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_BAND_ASYM + "Up"] = _high["default"]["nominal"]
corr_dict["default"]["variations"][gd.VARIATIONS.SYST_BAND_ASYM + "Down"] = _low["default"]["nominal"]
+ corr_dict = statistical_check(hist, corr_dict, mc_shifted_hist, stat_sigma)
+
# individual downsampling for correctionlib storage, not used for plotting
corr_dict["downsampled"] = {"nominal": None, "variations": {}}
corr_dict["downsampled"]["nominal"] = {
@@ -1503,8 +1629,8 @@ def _smooth_function(
if correction_option == "binwise":
_bins = np.array(bin_edges)
_nom = np.array(y)
- _up_stat = _nom + np.array(error_y_up)
- _down_stat = _nom - np.array(error_y_down)
+ _up_stat = _nom + stat_sigma * np.array(error_y_up)
+ _down_stat = _nom - stat_sigma * np.array(error_y_down)
elif correction_option == "smoothed":
_bins = np.array(smooth_x)
_nom = np.array(smooth_y)
@@ -1531,20 +1657,20 @@ def _smooth_function(
)
_up_stat = _nom + np.concatenate(
(
- error_y_up[left_slice],
+ stat_sigma * np.array(error_y_up[left_slice]),
[smooth_y_up[0]] if start_idx != 0 else [],
np.array(smooth_y_up)[overlap_slice],
[smooth_y_up[-1]] if end_idx != -1 else [],
- error_y_up[right_slice],
+ stat_sigma * np.array(error_y_up[right_slice]),
),
)
_down_stat = _nom - np.concatenate(
(
- error_y_down[left_slice],
+ stat_sigma * np.array(error_y_down[left_slice]),
[smooth_y_down[0]] if start_idx != 0 else [],
np.array(smooth_y_down)[overlap_slice],
[smooth_y_down[-1]] if end_idx != -1 else [],
- error_y_down[right_slice],
+ stat_sigma * np.array(error_y_down[right_slice]),
),
)
else:
@@ -1582,3 +1708,106 @@ def _append(a, b=None):
}
return nominal_graph, corr_dict
+
+
+def print_statistical_compatibility_summary(DR_SR_corrections: dict, non_closure_corrections: dict, logger: str) -> None:
+ """
+ Generates an ASCII table summarizing which corrections were applied vs. set to 1.0
+ due to compatibility with 1.0.
+
+ Args:
+ DR_SR_corrections: Dictionary with DR/SR corrections
+ non_closure_corrections: Dictionary with non-closure corrections
+ """
+
+ log = logging.getLogger(logger)
+
+ def flatten_categories(node, current_path=""):
+ if isinstance(node, dict) and "nominal" in node:
+ return {current_path if current_path else "Inclusive": node}
+ flattened = {}
+ if isinstance(node, dict):
+ for k, v in node.items():
+ formatted_k = str(k).replace("#", " ") # i.e. "njets#==0"
+ new_path = f"{current_path} | {formatted_k}" if current_path else formatted_k
+ flattened.update(flatten_categories(v["default"], new_path))
+ return flattened
+
+ # import ipdb;ipdb.set_trace()
+ merged = {}
+ for _dict in [DR_SR_corrections, non_closure_corrections]:
+ for process, correction in _dict.items():
+ if process not in merged:
+ merged[process] = {}
+ for correction_name, correction_categories in correction.items():
+ flat_correction_categories = flatten_categories(correction_categories)
+ if correction_name not in merged[process]:
+ merged[process][correction_name] = {}
+ merged[process][correction_name].update(flat_correction_categories)
+
+ string_io = io.StringIO() # headless Console writing to a string buffer without ANSI
+ console = Console(file=string_io, force_terminal=False, color_system=None, width=300)
+
+ log.info("Summary of corrections applied vs. set to 1.0 due to compatibility with 1.0:\n")
+ log.info(f"p-value threshold for auto-skipping: {func.RuntimeVariables.SKIP_CORRECTIONS_P_VALUE:.3f}")
+ log.info(f"Auto-skipping enabled: {func.RuntimeVariables.SKIP_CORRECTIONS_COMPATIBLE_TO_ONE}")
+
+ for process, correction in merged.items():
+ if not correction:
+ continue
+
+ all_categories = set()
+ for correction_categories in correction.values():
+ all_categories.update(correction_categories.keys())
+ all_categories = sorted(list(all_categories), key=lambda x: ("", x) if x == "Inclusive" else (x, x))
+
+ table = Table(title=f"Process: {process}", show_header=True)
+ table.add_column("Correction", no_wrap=True)
+
+ for category in all_categories:
+ table.add_column(f"Status\n{category}", justify="center")
+
+ table.add_column("", justify="center", width=2)
+
+ for category in all_categories:
+ table.add_column(f"p-value\n{category}", justify="center")
+
+ for correction_name, correction_categories in correction.items():
+ row = [correction_name.replace("non_closure_", "(nc) ")]
+
+ for category in all_categories:
+ node = correction_categories.get(category)
+ if node is None:
+ row.append("—")
+ else:
+ if node.get("_auto_skipped", False):
+ row.append("1.0")
+ elif "p_value" not in node:
+ if np.all(np.array(node["nominal"]) == 1.0):
+ row.append("-")
+ else:
+ row.append("Binwise")
+ else:
+ row.append("Applied")
+
+ row.append("")
+
+ for category in all_categories:
+ node = correction_categories.get(category)
+ if node is None:
+ row.append("—")
+ else:
+ if "p_value" in node:
+ pval = node["p_value"]
+ row.append(f"{pval:.3f}")
+ else:
+ row.append("-")
+
+ table.add_row(*row)
+
+ console.print(table)
+ console.print()
+
+ for line in string_io.getvalue().splitlines():
+ if line.strip():
+ log.info(line)
diff --git a/helper/fitting_helper.py b/helper/fitting_helper.py
index f18abf7..c62644d 100644
--- a/helper/fitting_helper.py
+++ b/helper/fitting_helper.py
@@ -88,6 +88,7 @@ def poly_n_func(
limit_y_nominal: Tuple[float, float] = _no_limit_default,
limit_y_up: Tuple[float, float] = _no_limit_default,
limit_y_down: Tuple[float, float] = _no_limit_default,
+ sigma: float = 1.0,
) -> Tuple[Callable, Callable, Callable]:
"""
Definition of a polynomial function with degree n and an up and down varied version.
@@ -115,7 +116,7 @@ def _nominal(x, param):
x = [_limit(x[0], *limit_x_up)]
nom = nominal(x, [param[i] for i in range(nominal.__n_param__)])
unc = func_yerr(x, param, nominal.__n_param__, _nominal)
- return _limit(nom + unc / 2.0, *limit_y_up)
+ return _limit(nom + sigma * unc / 2.0, *limit_y_up)
up.__name__ = f"poly_{n}_up"
up.__n_param__ = (n + 1) + (n + 1) * (n + 1)
@@ -128,7 +129,7 @@ def _nominal(x, param):
x = [_limit(x[0], *limit_x_down)]
nom = nominal(x, [param[i] for i in range(nominal.__n_param__)])
unc = func_yerr(x, param, nominal.__n_param__, _nominal)
- return _limit(nom - unc / 2.0, *limit_y_down)
+ return _limit(nom - sigma * unc / 2.0, *limit_y_down)
down.__name__ = f"poly_{n}_down"
down.__n_param__ = (n + 1) + (n + 1) * (n + 1)
@@ -146,6 +147,7 @@ def poly_n_str_func(
limit_y_nominal: Tuple[float, float] = _no_limit_default,
limit_y_up: Tuple[float, float] = _no_limit_default,
limit_y_down: Tuple[float, float] = _no_limit_default,
+ sigma: float = 1.0,
) -> Tuple[Callable, Callable, Callable]:
"""
Definition of a polynomial function as a string with degree n and an up and down varied version.
@@ -173,7 +175,7 @@ def up(x, param):
x = _limit(x, *limit_x_up, is_string=True)
nom = nominal(x, [param[i] for i in range(nominal.__n_param__)])
unc = str_func_yerr(x, param, nominal.__n_param__, _nominal)
- return _limit(f"(({nom}) + ({unc}) / 2.0)", *limit_y_up, is_string=True)
+ return _limit(f"(({nom}) + ({sigma} * {unc}) / 2.0)", *limit_y_up, is_string=True)
up.__name__ = f"poly_{n}_up"
up.__n_param__ = (n + 1) + (n + 1) * (n + 1)
@@ -182,7 +184,7 @@ def down(x, param):
x = _limit(x, *limit_x_down, is_string=True)
nom = nominal(x, [param[i] for i in range(nominal.__n_param__)])
unc = str_func_yerr(x, param, nominal.__n_param__, _nominal)
- return _limit(f"(({nom}) - ({unc}) / 2.0)", *limit_y_down, is_string=True)
+ return _limit(f"(({nom}) - ({sigma} * {unc}) / 2.0)", *limit_y_down, is_string=True)
down.__name__ = f"poly_{n}_down"
down.__n_param__ = (n + 1) + (n + 1) * (n + 1)
@@ -315,6 +317,7 @@ def get_wrapped_functions_from_fits(
Tuple[float, float], Dict[str, Tuple[float, float]]
] = _no_limit_default,
#
+ stat_sigma: float = 1.0,
**kwargs: Dict[str, Any],
) -> Tuple[Dict[str, Callable], Dict[str, str]]:
"""
@@ -356,8 +359,8 @@ def get_wrapped_functions_from_fits(
verbose=verbose,
)
- poly_n_func_limited = partial(poly_n_func, **limit_kwargs)
- poly_n_str_func_limited = partial(poly_n_str_func, **limit_kwargs)
+ poly_n_func_limited = partial(poly_n_func, **limit_kwargs, sigma=stat_sigma)
+ poly_n_str_func_limited = partial(poly_n_str_func, **limit_kwargs, sigma=stat_sigma)
_functions, _str_functions = dict(), dict()
_functions_limited, _str_functions_limited = dict(), dict()
@@ -479,14 +482,15 @@ def get_wrapped_functions_from_fits(
def hist_func(
hist: ROOT.TH1,
+ sigma: float = 1.0,
return_callable: bool = True,
) -> Tuple[Callable, List[float], List[float]]:
_nominal, _up, _down, _edges = [], [], [], []
for i in range(1, hist.GetNbinsX() + 1):
_nominal.append(hist.GetBinContent(i))
- _up.append(hist.GetBinContent(i) + hist.GetBinErrorUp(i))
- _down.append(hist.GetBinContent(i) - hist.GetBinErrorLow(i))
+ _up.append(hist.GetBinContent(i) + sigma * hist.GetBinErrorUp(i))
+ _down.append(hist.GetBinContent(i) - sigma * hist.GetBinErrorLow(i))
_edges.append((hist.GetBinLowEdge(i), hist.GetBinLowEdge(i + 1)))
def nominal(x):
@@ -529,6 +533,7 @@ def get_wrapped_hists(
do_mc_subtr_unc: bool,
logger: str,
verbose: bool = True,
+ sigma: float = 1.0,
**kwargs: Dict[str, Any],
) -> Dict[str, Union[ROOT.TF1, str, Callable]]:
"""
@@ -556,8 +561,8 @@ def get_wrapped_hists(
callable_results, str_results = {"nominal": None, "variations": {}}, {"nominal": None, "variations": {}}
- _nom = hist_func(ff_hist, return_callable=True)
- _nom_str = hist_func(ff_hist, return_callable=False)
+ _nom = hist_func(ff_hist, sigma=sigma, return_callable=True)
+ _nom_str = hist_func(ff_hist, sigma=sigma, return_callable=False)
callable_results["nominal"] = _nom[0]
callable_results["variations"][gd.VARIATIONS.STAT + "Up"] = _nom[1]
@@ -571,20 +576,20 @@ def get_wrapped_hists(
callable_results.update(
{
gd.VARIATIONS.SYST_MC + "Up": hist_func(
- ff_hist_up, return_callable=True
+ ff_hist_up, sigma=sigma, return_callable=True
)[0],
gd.VARIATIONS.SYST_MC + "Down": hist_func(
- ff_hist_down, return_callable=True
+ ff_hist_down, sigma=sigma, return_callable=True
)[0],
}
)
str_results.update(
{
gd.VARIATIONS.SYST_MC + "Up": hist_func(
- ff_hist_up, return_callable=False
+ ff_hist_up, sigma=sigma, return_callable=False
)[0],
gd.VARIATIONS.SYST_MC + "Down": hist_func(
- ff_hist_down, return_callable=False
+ ff_hist_down, sigma=sigma, return_callable=False
)[0],
}
)
diff --git a/helper/functions.py b/helper/functions.py
index 2352f48..6b76d7c 100644
--- a/helper/functions.py
+++ b/helper/functions.py
@@ -13,6 +13,7 @@
import sys
from decimal import Decimal
from typing import Any, Callable, Dict, List, Tuple, Union
+import pathlib
import numpy as np
import ROOT
@@ -181,6 +182,10 @@ class RuntimeVariables(object):
USE_CACHED_INTERMEDIATE_STEPS = False
RDataFrameWrapper = PassThroughWrapper
+ SKIP_CORRECTIONS_COMPATIBLE_TO_ONE = True
+ SKIP_CORRECTIONS_P_VALUE = 0.05
+ USE_SUPPRESSED_MC_ERRORS_FOR_CORRECTION_SELECTION = True
+
def __new__(cls) -> "RuntimeVariables":
if not hasattr(cls, "instance"):
cls.instance = super(RuntimeVariables, cls).__new__(cls)
@@ -283,6 +288,7 @@ def correction_config_comparison(
is_same &= nested_object_comparison(_test_config["SRlike_cuts"], _config["SRlike_cuts"])
is_same &= nested_object_comparison(_test_config["ARlike_cuts"], _config["ARlike_cuts"])
+ is_same &= nested_object_comparison(_test_config.get("use_embedding"), _config.get("use_embedding"))
_test_config = _test_config["non_closure"][closure_corr]
_config = _config["non_closure"][closure_corr]
@@ -527,13 +533,18 @@ def check_inputfiles(path: str, process: str, tree: str) -> List[str]:
log = logging.getLogger(f"preselection.{process}")
fsname = "root://cmsdcache-kit-disk.gridka.de/"
- xrdclient = client.FileSystem(fsname)
- status, listing = xrdclient.dirlist(path.replace(fsname, ""))
-
- if not status.ok:
- log.info(f"Error: {status.message}")
- sys.exit(1)
-
+ if fsname in path:
+ xrdclient = client.FileSystem(fsname)
+ status, listing = xrdclient.dirlist(path.replace(fsname, ""))
+
+ if not status.ok:
+ raise FileNotFoundError(f"Could not access path {path} on xrootd server. Status: {status.message}")
+
+ elif path.startswith("/ceph"):
+ listing = [pathlib.Path(it) for it in os.listdir(path)]
+ else:
+ raise ValueError(f"Unsupported file system for path {path}")
+
selected_files = []
for f in listing:
if f.name.endswith(".root"):
@@ -680,6 +691,35 @@ def define_columns(rdf: Any, column_definitions: dict, process: str) -> Any:
return rdf
+class SamplePathList(list):
+ """
+ A list-like object holding sample paths that can safely toggle between
+ embedding and MC-only states while preserving the full unmodified list in memory.
+ """
+ def __init__(self, all_paths: List[str], use_embedding: bool) -> None:
+ self.all_paths = all_paths
+ self.is_embedded = use_embedding
+
+ filtered_paths = []
+ for f in all_paths:
+ sample = f.rsplit("/")[-1].rsplit(".")[0]
+ if use_embedding and "_T" in sample:
+ continue
+ elif not use_embedding and sample == "embedding":
+ continue
+ filtered_paths.append(f)
+
+ super().__init__(filtered_paths)
+
+ def switch_embedding_state(self, use_embedding: bool) -> "SamplePathList":
+ if use_embedding == self.is_embedded:
+ return self
+ return SamplePathList(self.all_paths, use_embedding)
+
+ def __reduce__(self):
+ return (self.__class__, (self.all_paths, self.is_embedded))
+
+
def get_samples(config: Dict[str, Union[str, Dict, List]]) -> List[str]:
"""
Function to get a list of all sample paths which will be used for the fake factor calculation.
@@ -701,15 +741,7 @@ def get_samples(config: Dict[str, Union[str, Dict, List]]) -> List[str]:
f"The following files are loaded for era: {config['era']}, channel: {config['channel']} from {general_sample_path}"
)
log.info("-" * 50)
- sample_paths = glob.glob(general_sample_path)
- tmp_list = glob.glob(general_sample_path)
-
- for f in tmp_list:
- sample = f.rsplit("/")[-1].rsplit(".")[0]
- if config["use_embedding"] and "_T" in sample:
- sample_paths.remove(f)
- elif not config["use_embedding"] and sample == "embedding":
- sample_paths.remove(f)
+ sample_paths = SamplePathList(glob.glob(general_sample_path), config.get("use_embedding", False))
for f in sample_paths:
log.info(f)
diff --git a/helper/hooks_and_patches.py b/helper/hooks_and_patches.py
index 126c4cc..3cf94e2 100644
--- a/helper/hooks_and_patches.py
+++ b/helper/hooks_and_patches.py
@@ -7,6 +7,9 @@
_EXTRA_PARAM_MEANS = "_extra_weighted_means"
_EXTRA_PARAM_COUNTS = "_extra_weighted_counts"
_EXTRA_PARAM_FLAG = "_has_extra_params"
+_EXTRA_PARAM_BASE_VALUES = "_extra_base_values"
+_EXTRA_PARAM_BASE_ERRORS_STD = "_extra_base_errors_std"
+_EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED = "_extra_base_errors_mc_suppressed"
class PassThroughWrapper:
@@ -39,7 +42,7 @@ def __getattribute__(self, name: str) -> Any:
Returns:
Attribute value.
"""
- try: # Try to get the attribute from this instance or subclass.
+ try: # Try to get the attribute from this instance or subclass.
return object.__getattribute__(self, name)
except AttributeError: # Fallback: delegate to the wrapped object.
_obj = object.__getattribute__(self, "_obj")
@@ -69,7 +72,7 @@ def __delattr__(self, name: str) -> None:
class Histo1DPatchedRDataFrame(PassThroughWrapper):
"""
- A wrapper around ROOT.RDataFrame patching Histo1D method adding extra attributes
+ A wrapper around ROOT.RDataFrame patching Histo1D method adding extra attributes
(weighted counts and weighted means) to the produced histograms.
"""
@@ -134,6 +137,14 @@ def new_GetValue():
setattr(main_hist, _EXTRA_PARAM_MEANS, weighted_means)
setattr(main_hist, _EXTRA_PARAM_FLAG, flag)
+ N_bins = main_hist.GetNbinsX()
+ values = np.array([main_hist.GetBinContent(i) for i in range(1, N_bins + 1)])
+ errors = np.array([main_hist.GetBinError(i) for i in range(1, N_bins + 1)])
+
+ setattr(main_hist, _EXTRA_PARAM_BASE_VALUES, values)
+ setattr(main_hist, _EXTRA_PARAM_BASE_ERRORS_STD, errors)
+ setattr(main_hist, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, errors.copy())
+
# Overwrite GetValue so further calls return the histogram directly.
main_hist.GetValue = lambda: main_hist
return main_hist
@@ -228,6 +239,14 @@ def _AddError(self: ROOT.TH1, scale: float = 1.0) -> ROOT.TH1:
setattr(clone, _EXTRA_PARAM_MEANS, new_means)
setattr(clone, _EXTRA_PARAM_FLAG, True)
+ if hasattr(self, _EXTRA_PARAM_BASE_VALUES):
+ values = getattr(self, _EXTRA_PARAM_BASE_VALUES)
+ error_std = getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD)
+ error_supressed = getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED)
+ setattr(clone, _EXTRA_PARAM_BASE_VALUES, values + scale * error_std)
+ setattr(clone, _EXTRA_PARAM_BASE_ERRORS_STD, error_std.copy())
+ setattr(clone, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, error_supressed.copy())
+
return clone
@@ -275,6 +294,28 @@ def patched_Add(
setattr(self, _EXTRA_PARAM_MEANS, _means)
setattr(self, _EXTRA_PARAM_FLAG, True)
+ values_1 = getattr(self, _EXTRA_PARAM_BASE_VALUES, None)
+ values_2 = getattr(other, _EXTRA_PARAM_BASE_VALUES, None)
+ if values_1 is not None and values_2 is not None:
+ values = values_1 + factor * values_2
+
+ errors_std_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ errors_std_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ assert errors_std_1 is not None and errors_std_2 is not None, "Both histograms must have _extra_base_errors_std for error propagation."
+ errors_std = np.sqrt(errors_std_1 ** 2 + (factor * errors_std_2) ** 2)
+
+ errors_supressed_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ errors_supressed_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ assert errors_supressed_1 is not None and errors_supressed_2 is not None, "Both histograms must have _extra_base_errors_mc_suppressed for error propagation."
+ if factor < 0:
+ errors_supressed = errors_supressed_1.copy()
+ else:
+ errors_supressed = np.sqrt(errors_supressed_1 ** 2 + (factor * errors_supressed_2) ** 2)
+
+ setattr(self, _EXTRA_PARAM_BASE_VALUES, values)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, errors_std)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, errors_supressed)
+
return self
@@ -312,6 +353,31 @@ def patched_Multiply(
setattr(self, _EXTRA_PARAM_MEANS, _means)
setattr(self, _EXTRA_PARAM_FLAG, True)
+ values_1 = getattr(self, _EXTRA_PARAM_BASE_VALUES, None)
+ values_2 = getattr(other, _EXTRA_PARAM_BASE_VALUES, None)
+ if values_1 is not None and values_2 is not None:
+ values = values_1 * (factor * values_2)
+
+ error_std_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ error_std_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ assert error_std_1 is not None and error_std_2 is not None, "Both histograms must have _extra_base_errors_std for error propagation."
+
+ relative_error_std_1 = np.divide(error_std_1, values_1, out=np.zeros_like(error_std_1), where=(values_1 != 0))
+ relative_error_std_2 = np.divide(error_std_2, values_2, out=np.zeros_like(error_std_2), where=(values_2 != 0))
+ error_std = np.abs(values) * np.sqrt(relative_error_std_1 ** 2 + relative_error_std_2 ** 2)
+
+ error_supressed_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ error_supressed_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ assert error_supressed_1 is not None and error_supressed_2 is not None, "Both histograms must have _extra_base_errors_mc_suppressed for error propagation."
+
+ relative_error_supressed_1 = np.divide(error_supressed_1, values_1, out=np.zeros_like(error_supressed_1), where=(values_1 != 0))
+ relative_error_supressed_2 = np.divide(error_supressed_2, values_2, out=np.zeros_like(error_supressed_2), where=(values_2 != 0))
+ error_supressed = np.abs(values) * np.sqrt(relative_error_supressed_1 ** 2 + relative_error_supressed_2 ** 2)
+
+ setattr(self, _EXTRA_PARAM_BASE_VALUES, values)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, error_std)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, error_supressed)
+
return self
@@ -348,6 +414,31 @@ def patched_Divide(
setattr(self, _EXTRA_PARAM_MEANS, _means)
setattr(self, _EXTRA_PARAM_FLAG, True)
+ values_1 = getattr(self, _EXTRA_PARAM_BASE_VALUES, None)
+ values_2 = getattr(other, _EXTRA_PARAM_BASE_VALUES, None)
+ if values_1 is not None and values_2 is not None:
+ values = np.divide(values_1, factor * values_2, out=np.zeros_like(values_1), where=(values_2 != 0))
+
+ error_std_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ error_std_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ assert error_std_1 is not None and error_std_2 is not None, "Both histograms must have _extra_base_errors_std for error propagation."
+
+ relative_error_std_1 = np.divide(error_std_1, values_1, out=np.zeros_like(error_std_1), where=(values_1 != 0))
+ relative_error_std_2 = np.divide(error_std_2, values_2, out=np.zeros_like(error_std_2), where=(values_2 != 0))
+ error_std = np.abs(values) * np.sqrt(relative_error_std_1 ** 2 + relative_error_std_2 ** 2)
+
+ error_supressed_1 = getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ error_supressed_2 = getattr(other, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+ assert error_supressed_1 is not None and error_supressed_2 is not None, "Both histograms must have _extra_base_errors_mc_suppressed for error propagation."
+
+ relative_error_supressed_1 = np.divide(error_supressed_1, values_1, out=np.zeros_like(error_supressed_1), where=(values_1 != 0))
+ relative_error_supressed_2 = np.divide(error_supressed_2, values_2, out=np.zeros_like(error_supressed_2), where=(values_2 != 0))
+ error_supressed = np.abs(values) * np.sqrt(relative_error_supressed_1 ** 2 + relative_error_supressed_2 ** 2)
+
+ setattr(self, _EXTRA_PARAM_BASE_VALUES, values)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, error_std)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, error_supressed)
+
return self
@@ -380,6 +471,18 @@ def patched_Scale(
setattr(self, _EXTRA_PARAM_MEANS, _means)
setattr(self, _EXTRA_PARAM_FLAG, True)
+ values = getattr(self, _EXTRA_PARAM_BASE_VALUES, None)
+ if values is not None:
+ error_std = getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, None)
+ error_std_supressed = getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, None)
+
+ assert error_std is not None and error_std_supressed is not None, "Histogram must have _extra_base_errors_std and _extra_base_errors_mc_suppressed for error propagation."
+ assert error_std_supressed is not None, "Histogram must have _extra_base_errors_mc_suppressed for error propagation."
+
+ setattr(self, _EXTRA_PARAM_BASE_VALUES, values * factor)
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_STD, error_std * abs(factor))
+ setattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, error_std_supressed * abs(factor))
+
return self
@@ -408,6 +511,12 @@ def patched_Clone(
setattr(clone, _EXTRA_PARAM_COUNTS, getattr(self, _EXTRA_PARAM_COUNTS))
if hasattr(self, _EXTRA_PARAM_MEANS):
setattr(clone, _EXTRA_PARAM_MEANS, getattr(self, _EXTRA_PARAM_MEANS))
+ if hasattr(self, _EXTRA_PARAM_BASE_VALUES):
+ setattr(clone, _EXTRA_PARAM_BASE_VALUES, getattr(self, _EXTRA_PARAM_BASE_VALUES).copy())
+ if hasattr(self, _EXTRA_PARAM_BASE_ERRORS_STD):
+ setattr(clone, _EXTRA_PARAM_BASE_ERRORS_STD, getattr(self, _EXTRA_PARAM_BASE_ERRORS_STD).copy())
+ if hasattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED):
+ setattr(clone, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED, getattr(self, _EXTRA_PARAM_BASE_ERRORS_MC_SUPPRESSED).copy())
return clone
diff --git a/helper/weights.py b/helper/weights.py
index 85fdbb6..2677d74 100644
--- a/helper/weights.py
+++ b/helper/weights.py
@@ -91,9 +91,9 @@ def lumi_weight(rdf: Any, era: str) -> Any:
elif era == "2016postVFP":
rdf = rdf.Redefine("weight", "weight * 16.81 * 1000.")
elif era == "2017":
- rdf = rdf.Redefine("weight", "weight * 41.48 * 1000.")
+ rdf = rdf.Redefine("weight", "weight * 42.07 * 1000.")
elif era == "2018":
- rdf = rdf.Redefine("weight", "weight * 59.83 * 1000.")
+ rdf = rdf.Redefine("weight", "weight * 59.56 * 1000.")
elif era == "2022preEE":
rdf = rdf.Redefine("weight", "weight * 7.9804 * 1000.")
elif era == "2022postEE":
diff --git a/mkdocs.yml b/mkdocs.yml
index c85ba66..661d238 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -10,7 +10,7 @@ repo_name: KIT-CMS/TauFakeFactors
repo_url: https://github.com/KIT-CMS/TauFakeFactors/
# Copyright
-copyright: Copyright © 2025 Nikita Shadskiy, Artur Monsch
+copyright: Copyright © 2026 Nikita Shadskiy, Artur Monsch
# Theme
theme:
@@ -38,6 +38,7 @@ nav:
- Automated Binning: binning.md
- Fake Factor Calculation: fakefactors.md
- Fake Factor Corrections: corrections.md
+ - Fake Factor with ML (in development): ml_fakefactors.md
- Documentation: documentation.md
# Extensions
@@ -58,3 +59,8 @@ markdown_extensions:
- pymdownx.keys
- pymdownx.tabbed:
alternate_style: true
+ - pymdownx.arithmatex:
+ generic: true
+
+extra_javascript:
+ - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
diff --git a/preselection.py b/preselection.py
index 627f131..ad0e982 100644
--- a/preselection.py
+++ b/preselection.py
@@ -97,13 +97,6 @@ def run_sample_preselection(args: Tuple[str, Dict[str, Union[Dict, List, str]],
for cut in selection_conf:
rdf = rdf.Filter(f"({selection_conf[cut]})", f"cut on {cut}")
- # For Run 3 DY samples, we need to collect the events from two samples, that need to be selected
- # for different flavors
- if sample.startswith("DYto2L"):
- rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 11 || lhe_drell_yan_decay_flavor == 13", "DY e/mu selection")
- if sample.startswith("DYto2Tau"):
- rdf = rdf.Filter("lhe_drell_yan_decay_flavor == 15", "DY tau selection")
-
if process == "embedding":
rdf = filters.emb_tau_gen_match(rdf=rdf, channel=config["channel"])