diff --git a/.gitignore b/.gitignore index 68bc17f9..fb24bafe 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ coverage.xml .pytest_cache/ cover/ + # Translations *.mo *.pot diff --git a/analysis/vbs_vvh/analysis_processor_2FJMET_fromRDF.py b/analysis/vbs_vvh/analysis_processor_2FJMET_fromRDF.py new file mode 100644 index 00000000..abeb5617 --- /dev/null +++ b/analysis/vbs_vvh/analysis_processor_2FJMET_fromRDF.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python +#import sys +import coffea +import numpy as np +import awkward as ak +np.seterr(divide='ignore', invalid='ignore', over='ignore') +from coffea import processor +import hist +from hist import axis +from coffea.analysis_tools import PackedSelection + +from coffea.nanoevents.methods import vector +from config.variables.var_config import obj,other_objs, dense_variables_config + +from config.paths import objsel_cf,get_cutflow,cutflow_yamls_dir,default_cutflow_yaml +from config.variables.extra_definitions import VV_ndaughters, Higgs_final_state + +class AnalysisProcessor(processor.ProcessorABC): + + def __init__(self, samples, wc_names_lst=[], n_minus_1=False,project=None, cutflow_name=None): + print('debug',project,cutflow_name) + + if cutflow_name is None: + print(f"default cutflow is used.") + self.cutflow = objsel_cf + else: + print(f'{cutflow_yamls_dir}/{project}.yaml',cutflow_name) + try: #try project, then try all.yaml, finally just use basic + self.cutflow = get_cutflow(f'{cutflow_yamls_dir}/{project}.yaml',cutflow_name) + except: + try: + self.cutflow = get_cutflow(default_cutflow_yaml,cutflow_name) + print(f"Cannot get {cutflow_name} in {str(project)}.yaml. Found cutflow in defualt yaml and use it instead") + except: + try: + print(get_cutflow(f'{cutflow_yamls_dir}/{project}.yaml')) + print("can get project yaml but not the cutflow. Use default cutflow instead.") + self.cutflow = objsel_cf + except: + self.cutflow = objsel_cf + print(f"Warning: Cutflow {cutflow_name} not found. Use default cutflow instead.") + else: + print(f"Cutflow {cutflow_name} is used.") + self._n_minus_1 = n_minus_1 #sel mask will be n-1 cut + + self._samples = samples + self._wc_names_lst = wc_names_lst #wilson coefficients; not used + self._dtype = np.float32 #note that this has been changed from function input to this fixed defintion + + + # Create the dense axes for the histograms + self._dense_axes_dict = { + var_name: cfg["axis"] + for var_name, cfg in dense_variables_config.items() + } + + # Add histograms to dictionary that will be passed on to dict_accumulator + dout = {} + for dense_axis_name in self._dense_axes_dict.keys(): + dout[dense_axis_name] = hist.Hist( + hist.axis.StrCategory([], growth=True, name="process", label="process"), + hist.axis.StrCategory([], growth=True, name="category", label="category"), + hist.axis.StrCategory([], growth=True, name="systematic", label="systematic"), + hist.axis.StrCategory([], growth=True, name="year", label="year"), + self._dense_axes_dict[dense_axis_name], + storage="weight", # Keeps track of sumw2 + name="Counts", + ) + + # Set the accumulator + self._accumulator = processor.dict_accumulator(dout) + ''' + # Set the list of hists to fill + if hist_lst is None: + # If the hist list is none, assume we want to fill all hists + self._hist_lst = list(self._accumulator.keys()) + + else: + # Otherwise, just fill the specified subset of hists + for hist_to_include in hist_lst: + if hist_to_include not in self._accumulator.keys(): + raise Exception(f"Error: Cannot specify hist \"{hist_to_include}\", it is not defined in the processor.") + self._hist_lst = hist_lst # Which hists to fill + ''' + + @property + def accumulator(self): + return self._accumulator + + @property + def columns(self): + return self._columns + + # Main function: run on a given dataset + def process(self, events): + + ak.behavior.update(vector.behavior) + + #move triggers to sparse axes? later? + Flag_NoAK4BJet = events.Pass_NoAK4BJet + + objects = { + name: builder(events) + for name, builder in obj.items() + } + objects.update({ + name: builder(events,objects) + for name, builder in other_objs.items() + }) + #jets = objects['goodAK4Jets'] + #jets_sorted = jets[ak.argsort(jets.pt, axis=1, ascending=False)] + #objects['leading_jet'] = jets_sorted[:, 0] + + var = { + var_name: cfg["expr"](events, objects) + for var_name, cfg in dense_variables_config.items() + } + # Dataset parameters + json_name = events.metadata["dataset"] + isSig = self._samples[json_name]["isSig"] + isData = self._samples[json_name]["isData"] + ''' + isData = self._samples[json_name]["isData"] + histAxisName = self._samples[json_name]["histAxisName"] + year = self._samples[json_name]["year"] + xsec = self._samples[json_name]["xsec"] + sow = self._samples[json_name]["nSumOfWeights"] + ''' + + # An array of lenght events that is just 1 for each event + # Probably there's a better way to do this, but we use this method elsewhere so I guess why not.. + events['nom'] = ak.ones_like(var['nGoodAK4']) + + ################### Lepton selection #################### + + ######### Normalization and weights ########### + + + # These weights can go outside of the outside sys loop since they do not depend on pt of mu or jets + # We only calculate these values if not isData + # Note: add() will generally modify up/down weights, so if these are needed for any reason after this point, we should instead pass copies to add() + # Note: Here we will to the weights object the SFs that do not depend on any of the forthcoming loops + # If we're doing systematics and this isn't data, we will loop over the obj correction syst lst list + obj_corr_syst_var_list = ['nominal'] + + # Loop over the list of systematic variations (that impact object kinematics) that we've constructed + # Make a copy of the base weights object, so that each time through the loop we do not double count systs + # In this loop over systs that impact kinematics, we will add to the weights objects the SFs that depend on the object kinematics + + #################### Jets #################### + + # Put the variables we'll plot into a dictionary for easy access later + dense_variables_dict = var + + ######### Store boolean masks with PackedSelection ########## + + selections = PackedSelection(dtype='uint64') + ''' + for sel, crit_fn_tuple in self.cutflow.items(): + crit_fn = crit_fn_tuple[0] # get the function + selections.add(sel, crit_fn(events, var, obj)) + + for sel, crit_fn in objsel_cf.items(): + crit_fn = crit_fn_tuple[0] # get the function + selections.add(sel, crit_fn(events, var, obj))''' + + for sel, crit_fn in self.cutflow.items(): + mask = crit_fn(events, var, obj) + selections.add(sel, mask) + ''' + for sel, crit_fn_tuple in objsel_cf.items(): + crit_fn = crit_fn_tuple[0] # extract the function + mask = crit_fn(events, var, obj) # evaluate → bool array + selections.add(sel, mask) + ''' + ######### Fill histos ######### + + exclude_var_dict = {} # Any particular ones to skip + + # Set up the list of weight fluctuations to loop over + # For now the syst do not depend on the category, so we can figure this out outside of the filling loop + # Define all weight variations + wgt_var_dict = { + "nominal": events.weight, + #"count" : events.nom, + } + + if isSig: # or use the ak.firsts trick + print("Add other coupling points") + wgt_var_dict["SM"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,60],0) + wgt_var_dict["c2v_1p5"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) + daughters = VV_ndaughters(events) + n_had = ak.fill_none(daughters['n_had'], 0) + n_MET = ak.fill_none(daughters['n_MET'], 0) + mask1 = (n_had == 2) & (n_MET == 2) + mask2 = (n_had == 2) & (n_MET == 1) + wgt_var_dict["c2v_1p5_qqNuNu"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) * mask1 + wgt_var_dict["c2v_1p5_qqlNu"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) * mask2 + + # add other cp + # ----------------------------------------------------------- + + # Build cut mask dictionary for each selection + cut_mask_dict = {} + if self._n_minus_1: + # n-1 cutflow: exclude the current cut from the mask + for sel in self.cutflow: + if 'objsel' in self.cutflow: + exclusive_sels = [k for k in self.cutflow if k != sel] +['objsel'] #+ list(objsel_cf) + else: + exclusive_sels = [k for k in self.cutflow if k != sel] + list(objsel_cf) + cut_mask_dict[sel] = selections.all(*exclusive_sels) + else: + # normal cutflow: cumulative cuts + cumulative_cuts = [] + for sel in self.cutflow: + cumulative_cuts.append(sel) + cut_mask_dict[sel] = selections.all(*cumulative_cuts) + + # Produce histograms for each weight variation and each selection + for wgt_key, wgt in wgt_var_dict.items(): + for sel, sel_mask in cut_mask_dict.items(): + for dense_axis_name, dense_axis_vals in dense_variables_dict.items(): + axes_fill_info_dict = { + dense_axis_name : ak.fill_none(dense_axis_vals[sel_mask], 0), + "weight" : ak.fill_none(wgt[sel_mask], 0), + "process" : ak.fill_none(events.name[sel_mask], "unknown"), #changed from sample_name to name + "category" : sel, + "systematic" : wgt_key, + } + # Include year if needed (can be safely added here) + if "year" in self.accumulator[dense_axis_name].axes.name: + axes_fill_info_dict["year"] = ak.fill_none(events.year[sel_mask], "unknown")#changed from sample_year to year + + + self.accumulator[dense_axis_name].fill(**axes_fill_info_dict) + return self.accumulator + + def postprocess(self, accumulator): + return accumulator diff --git a/analysis/vbs_vvh/config/cutflow_config_handling.py b/analysis/vbs_vvh/config/cutflow_config_handling.py new file mode 100644 index 00000000..a5308659 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflow_config_handling.py @@ -0,0 +1,59 @@ +import yaml +import awkward as ak + +def get_cutflow(yaml_file, cutflow=None): + """ + Load cutflow from a YAML file. + If cutflow is None, return the full dictionary with lambdas. + """ + + def make_lambda(expr: str): + """ + Turn a YAML expression into a lambda for Coffea cutflow. + """ + return eval(f"lambda events, var, obj: {expr}", {"ak": ak}) + + with open(yaml_file, 'r') as f: + config = yaml.safe_load(f) + + cutflow_dict = {} + + for cf_name, cf_steps in config.items(): + step_dict = {} + for step, expr in cf_steps.items(): + # Always store as a 1‑tuple of lambdas + step_dict[step] = make_lambda(expr) + cutflow_dict[cf_name] = step_dict + + if cutflow is None: + print(f"read cutflow from project: {yaml_file}") + return cutflow_dict + else: + print(f"read cutflow from project: {yaml_file}/{cutflow}") + return cutflow_dict[cutflow] + + + +def export_cutflow_to_yaml(cutflow_dict, yaml_file): + import inspect + """ + Convert a cutflow_dict with lambda functions into YAML file with string expressions. + """ + yaml_dict = {} + for cf_name, cf_steps in cutflow_dict.items(): + yaml_dict[cf_name] = {} + for step_name, func in cf_steps.items(): + # Extract the source of the lambda function + try: + src = inspect.getsource(func).strip() + # Example: 'lambda events, var, obj: events.Pass_MetTriggers == 1' + expr = src.split(":", 1)[1].strip() # take only the expression part + except (OSError, TypeError): + expr = "" + yaml_dict[cf_name][step_name] = expr + + # Save to YAML + with open(yaml_file, 'w') as f: + yaml.dump(yaml_dict, f, sort_keys=False) + + print(f"Cutflow exported to {yaml_file}") diff --git a/analysis/vbs_vvh/config/cutflows/MET_basic copy.yaml b/analysis/vbs_vvh/config/cutflows/MET_basic copy.yaml new file mode 100644 index 00000000..516a0483 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/MET_basic copy.yaml @@ -0,0 +1,10 @@ +#basic cuts +MET_objsel: + all_events: "(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)" + Met_trigger: "events.Pass_MetTriggers == 1" + MET_pt: "events.Met_pt > 100" + MET_significance: "events.Met_significance > 20" + ak4_bveto: "events.Pass_NoAK4BJet == 1" + loose_H: "events.HiggsScore > 0.1" + loose_V1: "events.V1Score > 0.1" + has_vbs: "ak.all(events.vbs_idx_max_Mjj > -1, axis=1)" diff --git a/analysis/vbs_vvh/config/cutflows/MET_basic.yaml b/analysis/vbs_vvh/config/cutflows/MET_basic.yaml new file mode 100644 index 00000000..516a0483 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/MET_basic.yaml @@ -0,0 +1,10 @@ +#basic cuts +MET_objsel: + all_events: "(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)" + Met_trigger: "events.Pass_MetTriggers == 1" + MET_pt: "events.Met_pt > 100" + MET_significance: "events.Met_significance > 20" + ak4_bveto: "events.Pass_NoAK4BJet == 1" + loose_H: "events.HiggsScore > 0.1" + loose_V1: "events.V1Score > 0.1" + has_vbs: "ak.all(events.vbs_idx_max_Mjj > -1, axis=1)" diff --git a/analysis/vbs_vvh/config/cutflows/MET_c2v1p5.yaml b/analysis/vbs_vvh/config/cutflows/MET_c2v1p5.yaml new file mode 100644 index 00000000..5eb66498 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/MET_c2v1p5.yaml @@ -0,0 +1,119 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +c2v1p5_met300: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + +#using met 250 instead +c2v1p5_met250: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + +c2v1p5_vbsj_Mjj: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + +c2v1p5_vbsj_deta: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + +#want to try this later (nak4) +c2v1p5_nAK4: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + vbsj_nAK4: 'var["nAK4"]<=15' + +c2v1p5_leadAK8_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + +c2v1p5_V1MidScore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + +c2v1p5_nAK4: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + vbsj_nAK4: 'var["nAK4"]<12' + + +c2v1p5_HMET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + vbsj_nAK4: 'var["nAK4"]<12' + H_MET_dphi: 'var["HMET_dphi"] > 0.3' + +c2v1p5_V1MET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + vbsj_nAK4: 'var["nAK4"]<12' + H_MET_dphi: 'var["HMET_dphi"] > 0.3' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.3' + + +c2v1p5_Higgs_PNet_TvsQCD: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + nAK4_l12: 'var["nAK4"]<12' + H_MET_dphi: 'var["HMET_dphi"] > 0.3' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.3' + Higgs_PNet_TvsQCD: 'var["Higgs_particleNet_TvsQCD"]<0.5' + + +c2v1p5_mid: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_200: '(events.Met_pt > 200)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + leadAK8_met_dphi: 'var["leadAK8_MET_dphi"]>1.5' + cut_V1: 'events.V1Score > 0.6' + nAK4_l12: 'var["nAK4"]<12' + Higgs_PNet_TvsQCD: 'var["Higgs_particleNet_TvsQCD"]<0.5' + + +c2v1p5_mid_v2: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_200: '(events.Met_pt > 200)' + vbsj_Mjj_500: 'var["vbsj_Mjj"]>500' + vbsj_deta_2: 'var["vbsj_deta"]>2' + cut_V1: 'events.V1Score > 0.6' + nAK4_l12: 'var["nAK4"]<12' + Higgs_PNet_TvsQCD: 'var["Higgs_particleNet_TvsQCD"]<0.5' + dphi_diff_2p7: 'var["dphi_diff"]<2.7' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/MET_met300.yaml b/analysis/vbs_vvh/config/cutflows/MET_met300.yaml new file mode 100644 index 00000000..156527eb --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/MET_met300.yaml @@ -0,0 +1,241 @@ +#limiting met_pt cut to 300GeV +MET_met300_0: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + +MET_met300_1: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + +MET_met300_2: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + +MET_met300_2a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + +MET_met300_3: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + +MET_met300_3a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + + +MET_met300_4: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + +MET_met300_4a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + tight_H: 'events.HiggsScore > 0.9,' + + +MET_met300_5: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + cut_H: 'events.HiggsScore > 0.6,' + +MET_met300_5a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + tight_H: 'events.HiggsScore > 0.9,' + tight_V1: 'events.V1Score > 0.9,' + +MET_met300_5b: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + tight_H: 'events.HiggsScore > 0.9,' + +MET_met300_6: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + cut_H: 'events.HiggsScore > 0.6,' + cut_V1: 'events.V1Score > 0.6,' + +MET_met300_6c: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + max_deta_cut: 'var["max_deta"] > 6,' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + tight_H: 'events.HiggsScore > 0.9,' + tight_V1: 'events.V1Score > 0.9,' + +MET_met300_6a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + +MET_met300_6b: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + cut_H: 'events.HiggsScore > 0.6,' + cut_V1: 'events.V1Score > 0.6,' + +MET_met300_7: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + cut_H: 'events.HiggsScore > 0.6,' + cut_V1: 'events.V1Score > 0.6,' + vbsj_Mjj_1250: '(var["vbsj_Mjj"]>1250),' + +MET_met300_7a: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1),' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt_tight: 'events.Met_pt > 300,' + vbsj_deta: '(var["vbsj_deta"]>6),' + H_MET_dphi: 'var["HMET_dphi"] > 0.6,' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6,' + vbsj_Mjj_1250: '(var["vbsj_Mjj"]>1250),' + cut_V1: 'events.V1Score > 0.6,' + +MET_met300_full: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_tight: 'events.Met_pt > 300' + vbsj_deta: '(var["vbsj_deta"]>2.5)' + H_MET_dphi: 'var["HMET_dphi"] > 0.6' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6' + cut_H: 'events.HiggsScore > 0.6' + cut_V1: 'events.V1Score > 0.6' + +MET_met300_objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + MET_significance_loose: '(events.Met_significance > 20)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +MET_met300_2GoodAK8: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + nGoodAK8_2: '(events.nGoodAK8==2)' + MET_pt_tight: 'events.Met_pt > 300' + vbsj_deta: '(var["vbsj_deta"]>2.5)' + H_MET_dphi: 'var["HMET_dphi"] > 0.6' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6' + cut_H: 'events.HiggsScore > 0.6' + cut_V1: 'events.V1Score > 0.6' + + +MET_met300_objsel_reordered: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + MET_significance_loose: '(events.Met_significance > 20)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +MET_met300_checkmetsig: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + METsig: '(events.Met_significance > 20)' + MET_pt_tight: 'events.Met_pt > 300' + vbsj_deta: '(var["vbsj_deta"]>2.5)' + H_MET_dphi: 'var["HMET_dphi"] > 0.6' + V1_MET_dphi: 'var["V1MET_dphi"] > 0.6' + cut_H: 'events.HiggsScore > 0.6' + cut_V1: 'events.V1Score > 0.6' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/all.yaml b/analysis/vbs_vvh/config/cutflows/all.yaml new file mode 100644 index 00000000..5ab91089 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/all.yaml @@ -0,0 +1,233 @@ +MET_objsel: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 20,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' + has_vbs: 'lambda events, var, obj: ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' +MET_loose: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + has2GoodFJ: 'lambda events, var, obj: events.nGoodAK8 >= 2,' + has_vbs: 'lambda events, var, obj: ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 100,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' +MET_chan1: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + has2GoodFJ: 'lambda events, var, obj: events.nGoodAK8 >= 2,' + has_vbs: 'lambda events, var, obj: ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 100,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' + medium_H: 'lambda events, var, obj: events.HiggsScore > 0.5,' + medium_V1: 'lambda events, var, obj: events.V1Score > 0.5,' +MET_chan2: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + has2GoodFJ: 'lambda events, var, obj: events.nGoodAK8 >= 2,' + has_vbs: 'lambda events, var, obj: ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt: 'lambda events, var, obj: events.Met_pt > 500,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 100,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + vbsj_deta_3: 'lambda events, var, obj: (var["vbsj_deta"]>3),' + vbsj_Mjj_1000: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + V1_m: 'lambda events, var, obj: ((events.V1_msoftdrop > 50) & (events.V1_msoftdrop<200)),' + H_m: 'lambda events, var, obj: ((events.Higgs_msoftdrop > 50) & (events.Higgs_msoftdrop<200)),' +negative_check: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + has2GoodFJ: 'lambda events, var, obj: events.nGoodAK8 >= 2,' + has_vbs: 'lambda events, var, obj: ak.all(events.vbs_idx_max_Mjj > -1, axis=1),' + MET_pt: 'lambda events, var, obj: events.Met_pt > 500,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 100,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + vbsj_deta_3: 'lambda events, var, obj: (var["vbsj_deta"]>3),' + vbsj_Mjj_1000: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + V1_m: 'lambda events, var, obj: ((events.V1_msoftdrop > 50) & (events.V1_msoftdrop<200)),' + H_m: 'lambda events, var, obj: ((events.Higgs_msoftdrop > 50) & (events.Higgs_msoftdrop<200)),' + negative_weight: 'lambda events, var, obj: (events.weight<0),' +MET_med: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 20,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' + MET_pt_2: 'lambda events, var, obj: events.Met_pt > 600,' + MET_significance_2: 'lambda events, var, obj: events.Met_significance > 60,' + vbsj_deta: 'lambda events, var, obj: (var["vbsj_deta"]>6),' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + H_pt: 'lambda events, var, obj: events.Higgs_pt > 200,' + V1_pt: 'lambda events, var, obj: events.V1_pt > 200,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_m: 'lambda events, var, obj: ((events.V1_msoftdrop > 75)),' + H_m: 'lambda events, var, obj: ((events.Higgs_msoftdrop > 85)),' +MET_max: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 20,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 1000,' + MET_significance_tight: 'lambda events, var, obj: events.Met_significance > 60,' + H_pt: 'lambda events, var, obj: events.Higgs_pt > 600,' + V1_pt: 'lambda events, var, obj: events.V1_pt > 600,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + vbsj_deta: 'lambda events, var, obj: (var["vbsj_deta"]>6),' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + V1_m: 'lambda events, var, obj: ((events.V1_msoftdrop > 75)),' + H_m: 'lambda events, var, obj: ((events.Higgs_msoftdrop > 85)),' +MET_temp: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + Met_trigger: 'lambda events, var, obj: events.Pass_MetTriggers == 1,' + MET_pt: 'lambda events, var, obj: events.Met_pt > 100,' + MET_significance: 'lambda events, var, obj: events.Met_significance > 20,' + ak4_bveto: 'lambda events, var, obj: events.Pass_NoAK4BJet == 1,' + loose_H: 'lambda events, var, obj: events.HiggsScore > 0.1,' + loose_V1: 'lambda events, var, obj: events.V1Score > 0.1,' + MET_pt_2: 'lambda events, var, obj: events.Met_pt > 600,' + MET_significance_2: 'lambda events, var, obj: events.Met_significance > 60,' + vbsj_deta: 'lambda events, var, obj: (var["vbsj_deta"]>6),' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + H_pt: 'lambda events, var, obj: events.Higgs_pt > 200,' + V1_pt: 'lambda events, var, obj: events.V1_pt > 200,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_m: 'lambda events, var, obj: ((events.V1_msoftdrop > 75)),' + H_m: 'lambda events, var, obj: ((events.Higgs_msoftdrop > 85)),' + max_deta_cut: 'lambda events, var, obj: var["vbsj_deta"] > 8,' + HV_dR: 'lambda events, var, obj: var["HV1_dR"] > 8,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 8,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 8,' + MET_leadAK8_dphi: 'lambda events, var, obj: var["vbsj_deta"] > 8,' +MET_opti: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' +MET_opti2: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' +MET_opti3: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' +MET_opti4: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.HiggsScore > 0.9,' +MET_opti5: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' +MET_opti5a: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 0.35,' +MET_opti6: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 0.35,' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' +MET_opti6a: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 0.35,' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' +MET_opti7: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 0.35,' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' +MET_opti_f: + all_events: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1),' + objsel: 'lambda events, var, obj: (events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1),' + MET_pt_tight: 'lambda events, var, obj: events.Met_pt > 700,' + max_deta_cut: 'lambda events, var, obj: var["max_deta"] > 6,' + H_MET_dphi: 'lambda events, var, obj: var["HMET_dphi"] > 0.35,' + tight_V1: 'lambda events, var, obj: events.V1Score > 0.9,' + V1_MET_dphi: 'lambda events, var, obj: var["V1MET_dphi"] > 0.35,' + vbsj_Mjj: 'lambda events, var, obj: (var["vbsj_Mjj"]>1000),' + tight_H: 'lambda events, var, obj: events.HiggsScore > 0.9,' diff --git a/analysis/vbs_vvh/config/cutflows/c2v1p5_leptonstudy.yaml b/analysis/vbs_vvh/config/cutflows/c2v1p5_leptonstudy.yaml new file mode 100644 index 00000000..d0286675 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v1p5_leptonstudy.yaml @@ -0,0 +1,26 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +MLinput: + trigger: '(events.Pass_MetTriggers == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + + +tight5_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/c2v1p5_med.yaml b/analysis/vbs_vvh/config/cutflows/c2v1p5_med.yaml new file mode 100644 index 00000000..59b2894c --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v1p5_med.yaml @@ -0,0 +1,19 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +med0_objsel: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + + +med1_metpt: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_150: '(events.Met_pt>150)' + diff --git a/analysis/vbs_vvh/config/cutflows/c2v1p5_pt250cut.yaml b/analysis/vbs_vvh/config/cutflows/c2v1p5_pt250cut.yaml new file mode 100644 index 00000000..a94e89a0 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v1p5_pt250cut.yaml @@ -0,0 +1,67 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + + + +tight1_met300: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + +tight2_dphi_diff: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p25: '(var["dphi_diff"] <2.25)' + +tight2_HMET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + + +tight3_V1MET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + + +tight4_PNscores: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + +tight5_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + + +tight4v2_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p2: 'var["dphi_diff"] <2.2' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + + +tight_MLinput: + trigger: '(events.Pass_MetTriggers == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/c2v1p5_tight.yaml b/analysis/vbs_vvh/config/cutflows/c2v1p5_tight.yaml new file mode 100644 index 00000000..1f70e663 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v1p5_tight.yaml @@ -0,0 +1,63 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +tight1_met300: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + +tight2_dphi_diff: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p25: '(var["dphi_diff"] <2.25)' + +tight2_HMET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + + +tight3_V1MET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + + +tight4_PNscores: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + +tight5_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + + +tight4v2_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p2: 'var["dphi_diff"] <2.2' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + +tight_MLinput: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/c2v1p5_tight_test.yaml b/analysis/vbs_vvh/config/cutflows/c2v1p5_tight_test.yaml new file mode 100644 index 00000000..1f70e663 --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v1p5_tight_test.yaml @@ -0,0 +1,63 @@ +#limiting met_pt cut to 300GeV, test at c2v=1.5 +objsel: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_loose: '(events.Met_pt > 100)' + H_score_loose: '(events.HiggsScore > 0.1)' + V1_score_loose: '(events.V1Score > 0.1)' + AK4_Bveto: '(events.Pass_NoAK4BJet == 1)' + +tight1_met300: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + +tight2_dphi_diff: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p25: '(var["dphi_diff"] <2.25)' + +tight2_HMET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + + +tight3_V1MET_dphi: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + + +tight4_PNscores: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + +tight5_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + HMET_dphi_p65: '(var["HMET_dphi"] >0.65)' + V1MET_dphi_p65: '(var["V1MET_dphi"] >0.65)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + + +tight4v2_HiggsTscore: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + dphi_diff_2p2: 'var["dphi_diff"] <2.2' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.5)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' + +tight_MLinput: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_300: '(events.Met_pt > 300)' + Higgs_Tscore: 'var["Higgs_particleNet_TvsQCD"] <0.5' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/c2v2_pt250.yaml b/analysis/vbs_vvh/config/cutflows/c2v2_pt250.yaml new file mode 100644 index 00000000..aa61650f --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/c2v2_pt250.yaml @@ -0,0 +1,26 @@ +preselection: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)' + MET_trigger: '(events.Pass_MetTriggers == 1)' + nGoodAK8geq2: '(events.nGoodAK8 >= 2)' + has_vbs_pair: 'ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + MET_pt_presel: '(events.Met_pt > 100)' + MET_signif_presel: '(events.Met_significance > 20)' + loose_HScore: '(events.HiggsScore > 0.1)' + loose_VScore: '(events.V1Score > 0.1)' + ak4_bveto: '(events.Pass_NoAK4BJet == 1)' + +tight: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + HMET_dphi_p6: '(var["HMET_dphi"] >0.6)' + V1MET_dphi_p6: '(var["V1MET_dphi"] >0.6)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.6)' + +no_metsignif: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt > 100) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1) & (events.nGoodAK8 >= 2)' + MET_pt_250: '(events.Met_pt > 250)' + HMET_dphi_p6: '(var["HMET_dphi"] >0.6)' + V1MET_dphi_p6: '(var["V1MET_dphi"] >0.6)' + Hscore: '(events.HiggsScore > 0.6)' + V1score: '(events.V1Score > 0.6)' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/cutflows/test.yaml b/analysis/vbs_vvh/config/cutflows/test.yaml new file mode 100644 index 00000000..ff548b3d --- /dev/null +++ b/analysis/vbs_vvh/config/cutflows/test.yaml @@ -0,0 +1,13 @@ +test1: + all_events: '(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers + == 1)' + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + tight_cut: '(events.Met_pt > 500) & (var["vbsj_deta"]>2.5) & (events.HiggsScore > 0.6) & (events.V1Score > 0.6)' + +test2: + objsel: '(events.Pass_MetTriggers == 1) & (events.Met_pt + > 100) & (events.Met_significance > 20) & (events.Pass_NoAK4BJet == 1) & (events.HiggsScore + > 0.1) & (events.V1Score > 0.1) & ak.all(events.vbs_idx_max_Mjj > -1, axis=1)' + tight_cut: '(events.Met_pt > 500) & (var["vbsj_deta"]>2.5) & (events.HiggsScore > 0.6) & (events.V1Score > 0.6)' \ No newline at end of file diff --git a/analysis/vbs_vvh/config/paths.py b/analysis/vbs_vvh/config/paths.py new file mode 100644 index 00000000..bd483242 --- /dev/null +++ b/analysis/vbs_vvh/config/paths.py @@ -0,0 +1,14 @@ + +import os +script_dir = os.path.dirname(os.path.abspath(__file__)) +base_dir = os.path.dirname(script_dir) + +#scripts +cutflow_yamls_dir = os.path.join(base_dir,'config','cutflows') + +#var stuffs +basic_cutflow = f'{cutflow_yamls_dir}/test.yaml' +from config.cutflow_config_handling import get_cutflow +objsel_cf = get_cutflow(basic_cutflow,'test1') +default_cutflow_yaml = f'{cutflow_yamls_dir}/all.yaml' +default_output_dir = './' diff --git a/analysis/vbs_vvh/config/plotting_config.py b/analysis/vbs_vvh/config/plotting_config.py new file mode 100644 index 00000000..ad2361cd --- /dev/null +++ b/analysis/vbs_vvh/config/plotting_config.py @@ -0,0 +1,25 @@ +# config/plotting_config.py + +PROC_MAP_CSV = "config/sample_names.csv" +REF_VAR = 'nGoodAK8' # var for getting yield and stuff + +SIG_COLOR = "red" +DATA_COLOR = "blue" + +DEFAULT_OUTPUT_DIRNAME = "plots" + +LOG_LEVEL = "INFO" +LOG_FORMAT = "%(message)s" + +FIGURE_STYLE = { + "figsize": (6, 5), + "dpi": 120, +} + +SUBPLOTS = ("dataMC", "significance") + +FIG_RATIO = { + "main": 4, + "dataMC": 1, + "significance": 1, +} diff --git a/analysis/vbs_vvh/config/produce_sample_name_csv.py b/analysis/vbs_vvh/config/produce_sample_name_csv.py new file mode 100644 index 00000000..bc2acb42 --- /dev/null +++ b/analysis/vbs_vvh/config/produce_sample_name_csv.py @@ -0,0 +1,168 @@ +# for producing process map (sample_name.csv) that links process to bkg type +# use for plotting + +import os +import json +import csv +import argparse +import subprocess +from collections import defaultdict + +# Constants +years = ['2016preVFP', '2016postVFP', '2017', '2018'] +categories = ['sig', 'bkg', 'data'] +input_json_template = "input_{}_{}.json" +output_csv = "sample_names.csv" + +sample_colour_mpl = { + "data": "blue", + "sig": "red", + "WWH_OS": "crimson", + "WWH_SS": "lightcoral", + "WZH": "orangered", + "ZZH": "tomato", + "bkg_total": "dimgray", + "DY": "palegreen", + "ttbar": "mediumturquoise", + "ttbar_had": "powderblue", + "ttbar_SL": "deepskyblue", + "ttx": "cadetblue", + "ttX": "cadetblue", + "ST": "lightsteelblue", + "WJets": "darkorange", + "WJet": "darkorange", + "ZJets": "lightyellow", + "ZJet": "lightyellow", + "EWKV": "skyblue", + "EWK": "skyblue", + "QCD": "lightcyan", + "bosons": "sandybrown", + "Other": "orchid", +} + + +def run_makeConfig(): + for cat in categories: + for year in years: + print(f"Running: python3 etc/makeConfig.py --categories {cat} --sample_year {year}") + subprocess.run(["python3", "etc/makeConfig.py", "--categories", cat, "--sample_year", year], check=True) + + +def extract_samples(): + signal_tags = ['WWH_OS', 'WWH_SS', 'WZH', 'ZZH'] + sample_list = [] + + for cat in categories: + for year in years: + json_file = input_json_template.format(year, cat) + if not os.path.isfile(json_file): + print(f"Missing {json_file}, skipping.") + continue + + with open(json_file) as f: + json_data = json.load(f) + + samples = json_data.get("samples", {}) + for sample_key, sample_info in samples.items(): + metadata = sample_info.get("metadata", {}) + + sample_year = metadata.get("sample_year", year) + sample_category = metadata.get("sample_category", cat) + sample_name = metadata.get("sample_name", sample_key) + + # Determine sample_type + if sample_category == 'sig': + matched_type = next((tag for tag in signal_tags if tag in sample_key), "unknown_sig") + sample_type = matched_type + else: + sample_type = metadata.get("sample_type", "unknown") + + sample_list.append({ + "sample_year": sample_year, + "sample_category": sample_category, + "sample_type": sample_type, + "sample_name": sample_name, + "xsec": metadata.get("xsec", -1), + "lumi": metadata.get("lumi", -1), + "sumws": metadata.get("sumws", -1), + }) + return sample_list + +def assign_codes_and_colours(sample_list): + # Assign codes + code_prefix = {'sig': 1, 'bkg': 2, 'data': 3} + sample_code_counter = defaultdict(dict) + final_sample_list = [] + + current_counter = defaultdict(int) + # sort by sample_category, then sample_type + sample_list.sort(key=lambda x: (code_prefix[x['sample_category']], x['sample_type'], x['sample_name'])) + + for sample in sample_list: + cat = sample['sample_category'] + name = sample['sample_name'] + if name not in sample_code_counter[cat]: + sample_code_counter[cat][name] = current_counter[cat] + current_counter[cat] += 1 + sample_code = f"{code_prefix[cat]}{sample_code_counter[cat][name]}" + sample['sample_code'] = sample_code + + # Determine plotting color + sample_type = sample["sample_type"] + sample["plotting_colour"] = sample_colour_mpl.get(sample_type, "black") + final_sample_list.append(sample) + + return final_sample_list + + +def write_csv(sample_list): + header = ["sample_year", "sample_category", "sample_type", "sample_name", + "xsec", "lumi", "sumws", "sample_code", "plotting_colour"] + with open(output_csv, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=header) + writer.writeheader() + for sample in sample_list: + writer.writerow(sample) + + +def write_sample_code_map(sample_list, output_path="sample_code.csv"): + # Use a set to avoid duplicates + seen = set() + rows = [] + + for sample in sample_list: + key = (sample["sample_category"], sample["sample_type"], sample["sample_name"], sample["sample_code"], ) + if key not in seen: + seen.add(key) + rows.append({"sample_category": sample["sample_category"], "sample_type": sample["sample_type"], "sample_name": sample["sample_name"], "sample_code": sample["sample_code"]}) + + # Sort by code + rows.sort(key=lambda x: int(x["sample_code"])) + + with open(output_path, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=["sample_code","sample_category","sample_type","sample_name"]) + writer.writeheader() + writer.writerows(rows) + +def main(): + + #run_makeConfig() + + print("Extracting samples from JSON...") + raw_sample_list = extract_samples() + + print("Assigning codes and plotting colors...") + processed_sample_list = assign_codes_and_colours(raw_sample_list) + + print(f"Writing to {output_csv}...") + write_csv(processed_sample_list) + + + print("Writing sample_code.csv...") + write_sample_code_map(processed_sample_list) + + print("Done.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/analysis/vbs_vvh/config/sample_names.csv b/analysis/vbs_vvh/config/sample_names.csv new file mode 100644 index 00000000..0cb90a5f --- /dev/null +++ b/analysis/vbs_vvh/config/sample_names.csv @@ -0,0 +1,235 @@ +sample_year,sample_category,sample_type,sample_name,xsec,lumi,sumws,sample_code,plotting_colour +2016preVFP,sig,WWH_OS,VBSWWH_OS_VBSCuts,0.002828,19.52,98500.0,00,crimson +2016postVFP,sig,WWH_OS,VBSWWH_OS_VBSCuts,0.002828,16.81,99200.0,00,crimson +2017,sig,WWH_OS,VBSWWH_OS_VBSCuts,0.002828,41.529,98300.0,00,crimson +2018,sig,WWH_OS,VBSWWH_OS_VBSCuts,0.002828,59.7,98700.0,00,crimson +2016preVFP,sig,WWH_SS,VBSWWH_SS_VBSCuts,0.001526,19.52,99800.0,01,lightcoral +2016postVFP,sig,WWH_SS,VBSWWH_SS_VBSCuts,0.001526,16.81,99900.0,01,lightcoral +2017,sig,WWH_SS,VBSWWH_SS_VBSCuts,0.001526,41.529,99800.0,01,lightcoral +2018,sig,WWH_SS,VBSWWH_SS_VBSCuts,0.001526,59.7,99800.0,01,lightcoral +2016preVFP,sig,WZH,VBSWZH_VBSCuts,0.001637,19.52,99000.0,02,orangered +2016postVFP,sig,WZH,VBSWZH_VBSCuts,0.001637,16.81,99700.0,02,orangered +2017,sig,WZH,VBSWZH_VBSCuts,0.001637,41.529,99800.0,02,orangered +2018,sig,WZH,VBSWZH_VBSCuts,0.001637,59.7,99600.0,02,orangered +2016preVFP,sig,ZZH,VBSZZH_VBSCuts,0.001132,19.52,99700.0,03,tomato +2016postVFP,sig,ZZH,VBSZZH_VBSCuts,0.001132,16.81,98400.0,03,tomato +2017,sig,ZZH,VBSZZH_VBSCuts,0.001132,41.529,99800.0,03,tomato +2018,sig,ZZH,VBSZZH_VBSCuts,0.001132,59.7,99400.0,03,tomato +2016preVFP,bkg,EWK,EWKWminus2Jets_WToQQ_dipoleRecoilOn,10.67,19.52,4999000.0,10,"#ff8800" +2016postVFP,bkg,EWK,EWKWminus2Jets_WToQQ_dipoleRecoilOn,10.67,16.81,4998130.0,10,"#ff8800" +2017,bkg,EWK,EWKWminus2Jets_WToQQ_dipoleRecoilOn,10.67,41.529,7874000.0,10,"#ff8800" +2018,bkg,EWK,EWKWminus2Jets_WToQQ_dipoleRecoilOn,10.67,59.7,9655000.0,10,"#ff8800" +2016preVFP,bkg,EWK,EWKWplus2Jets_WToQQ_dipoleRecoilOn,10.67,19.52,4943000.0,11,"#ff8800" +2016postVFP,bkg,EWK,EWKWplus2Jets_WToQQ_dipoleRecoilOn,10.67,16.81,4998122.0,11,"#ff8800" +2017,bkg,EWK,EWKWplus2Jets_WToQQ_dipoleRecoilOn,10.67,41.529,7994000.0,11,"#ff8800" +2018,bkg,EWK,EWKWplus2Jets_WToQQ_dipoleRecoilOn,10.67,59.7,9991000.0,11,"#ff8800" +2016preVFP,bkg,EWK,EWKZ2Jets_ZToLL_M-50,6.22,19.52,500000.0,12,"#ff8800" +2016postVFP,bkg,EWK,EWKZ2Jets_ZToLL_M-50,6.22,16.81,453000.0,12,"#ff8800" +2017,bkg,EWK,EWKZ2Jets_ZToLL_M-50,6.22,41.529,579000.0,12,"#ff8800" +2018,bkg,EWK,EWKZ2Jets_ZToLL_M-50,6.22,59.7,1000000.0,12,"#ff8800" +2016preVFP,bkg,EWK,EWKZ2Jets_ZToNuNu_M-50,10.72,19.52,1450000.0,13,"#ff8800" +2016postVFP,bkg,EWK,EWKZ2Jets_ZToNuNu_M-50,10.72,16.81,1500000.0,13,"#ff8800" +2017,bkg,EWK,EWKZ2Jets_ZToNuNu_M-50,10.72,41.529,2976000.0,13,"#ff8800" +2018,bkg,EWK,EWKZ2Jets_ZToNuNu_M-50,10.72,59.7,2645000.0,13,"#ff8800" +2016preVFP,bkg,EWK,EWKZ2Jets_ZToQQ_dipoleRecoilOn,10.67,19.52,4979000.0,14,"#ff8800" +2016postVFP,bkg,EWK,EWKZ2Jets_ZToQQ_dipoleRecoilOn,10.67,16.81,4928082.0,14,"#ff8800" +2017,bkg,EWK,EWKZ2Jets_ZToQQ_dipoleRecoilOn,10.67,41.529,8000000.0,14,"#ff8800" +2018,bkg,EWK,EWKZ2Jets_ZToQQ_dipoleRecoilOn,10.67,59.7,9985000.0,14,"#ff8800" +2016preVFP,bkg,EWK,WZJJ_EWK_InclusivePolarization,0.01701,19.52,30747.70926882,15,"#ff8800" +2016postVFP,bkg,EWK,WZJJ_EWK_InclusivePolarization,0.01701,16.81,28529.296494900005,15,"#ff8800" +2017,bkg,EWK,WZJJ_EWK_InclusivePolarization,0.01701,41.529,31195.062866480002,15,"#ff8800" +2018,bkg,EWK,WZJJ_EWK_InclusivePolarization,0.01701,59.7,29237.15565968,15,"#ff8800" +2016preVFP,bkg,Other,VHToNonbb_M125,2.207,19.52,2561276.2952920003,16,"#ffee00" +2016postVFP,bkg,Other,VHToNonbb_M125,2.207,16.81,1917613.6729720004,16,"#ffee00" +2017,bkg,Other,VHToNonbb_M125,2.207,41.529,5066302.999084999,16,"#ffee00" +2018,bkg,Other,VHToNonbb_M125,2.207,59.7,7023767.516864001,16,"#ffee00" +2016preVFP,bkg,Other,WWTo1L1Nu2Q_4f,51.65,19.52,1691013737.80942,17,"#ffee00" +2016postVFP,bkg,Other,WWTo1L1Nu2Q_4f,51.65,16.81,1684137687.5030298,17,"#ffee00" +2017,bkg,Other,WWTo1L1Nu2Q_4f,51.65,41.529,3364342782.3126493,17,"#ffee00" +2018,bkg,Other,WWTo1L1Nu2Q_4f,51.65,59.7,3393645436.41983,17,"#ffee00" +2016preVFP,bkg,Other,WWTo4Q_4f,51.79,19.52,1672345627.9435396,18,"#ffee00" +2016postVFP,bkg,Other,WWTo4Q_4f,51.79,16.81,1691051698.51794,18,"#ffee00" +2017,bkg,Other,WWTo4Q_4f,51.79,41.529,3366860424.1632786,18,"#ffee00" +2018,bkg,Other,WWTo4Q_4f,51.79,59.7,3375431134.00944,18,"#ffee00" +2016preVFP,bkg,Other,WWW_4F,0.2086,19.52,1120296.2527460596,19,"#ffee00" +2016postVFP,bkg,Other,WWW_4F,0.2086,16.81,897983.0426318999,19,"#ffee00" +2017,bkg,Other,WWW_4F,0.2086,41.529,2127100.3534672004,19,"#ffee00" +2018,bkg,Other,WWW_4F,0.2086,59.7,2135751.021314641,19,"#ffee00" +2016preVFP,bkg,Other,WWZ_4F,0.1651,19.52,866227.8943972798,110,"#ffee00" +2016postVFP,bkg,Other,WWZ_4F,0.1651,16.81,784713.6799941598,110,"#ffee00" +2017,bkg,Other,WWZ_4F,0.1651,41.529,30386.291709359997,110,"#ffee00" +2018,bkg,Other,WWZ_4F,0.1651,59.7,1701326.7746178,110,"#ffee00" +2016preVFP,bkg,Other,WZTo1L1Nu2Q_4f,49.997,19.52,55837849.198033005,111,"#ffee00" +2016postVFP,bkg,Other,WZTo1L1Nu2Q_4f,49.997,16.81,55731090.928037,111,"#ffee00" +2017,bkg,Other,WZTo1L1Nu2Q_4f,49.997,41.529,110858039.00628796,111,"#ffee00" +2018,bkg,Other,WZTo1L1Nu2Q_4f,49.997,59.7,111635546.18117303,111,"#ffee00" +2016preVFP,bkg,Other,WZTo2Q2L_mllmin4p0,5.6,19.52,150461589.64956802,112,"#ffee00" +2016postVFP,bkg,Other,WZTo2Q2L_mllmin4p0,5.6,16.81,129756627.88206398,112,"#ffee00" +2017,bkg,Other,WZTo2Q2L_mllmin4p0,5.6,41.529,279011957.77799195,112,"#ffee00" +2018,bkg,Other,WZTo2Q2L_mllmin4p0,5.6,59.7,274146243.456856,112,"#ffee00" +2016preVFP,bkg,Other,WZZ,0.05565,19.52,308416.900468794,113,"#ffee00" +2016postVFP,bkg,Other,WZZ,0.05565,16.81,239577.66254908202,113,"#ffee00" +2017,bkg,Other,WZZ,0.05565,41.529,565804.4159150762,113,"#ffee00" +2018,bkg,Other,WZZ,0.05565,59.7,571468.6796678341,113,"#ffee00" +2016preVFP,bkg,Other,WminusH_HToBB_WToLNu_M-125,0.04901236122,19.52,456328.808984,114,"#ffee00" +2016postVFP,bkg,Other,WminusH_HToBB_WToLNu_M-125,0.04901236122,16.81,396666.03461200005,114,"#ffee00" +2017,bkg,Other,WminusH_HToBB_WToLNu_M-125,0.04901236122,41.529,838429.3056330001,114,"#ffee00" +2018,bkg,Other,WminusH_HToBB_WToLNu_M-125,0.04901236122,59.7,862915.6701140001,114,"#ffee00" +2016preVFP,bkg,Other,WplusH_HToBB_WToLNu_M-125,0.08487599411,19.52,745714.0028800002,115,"#ffee00" +2016postVFP,bkg,Other,WplusH_HToBB_WToLNu_M-125,0.08487599411,16.81,630589.0213000001,115,"#ffee00" +2017,bkg,Other,WplusH_HToBB_WToLNu_M-125,0.08487599411,41.529,1379069.7327400006,115,"#ffee00" +2018,bkg,Other,WplusH_HToBB_WToLNu_M-125,0.08487599411,59.7,1377077.4378200006,115,"#ffee00" +2016preVFP,bkg,Other,ZH_HToBB_ZToQQ_M-125,0.1713336282,19.52,3031835.56525,116,"#ffee00" +2016postVFP,bkg,Other,ZH_HToBB_ZToQQ_M-125,0.1713336282,16.81,2574972.266824,116,"#ffee00" +2017,bkg,Other,ZH_HToBB_ZToQQ_M-125,0.1713336282,41.529,5523315.584057998,116,"#ffee00" +2018,bkg,Other,ZH_HToBB_ZToQQ_M-125,0.1713336282,59.7,5611322.638808001,116,"#ffee00" +2016preVFP,bkg,Other,ZZTo2Nu2Q_5f,4.58725,19.52,18741128.384986002,117,"#ffee00" +2016postVFP,bkg,Other,ZZTo2Nu2Q_5f,4.58725,16.81,18744054.077794,117,"#ffee00" +2017,bkg,Other,ZZTo2Nu2Q_5f,4.58725,41.529,37461109.65704401,117,"#ffee00" +2018,bkg,Other,ZZTo2Nu2Q_5f,4.58725,59.7,37590250.764148004,117,"#ffee00" +2016preVFP,bkg,Other,ZZTo2Q2L_mllmin4p0,3.28,19.52,88594639.63439178,118,"#ffee00" +2016postVFP,bkg,Other,ZZTo2Q2L_mllmin4p0,3.28,16.81,75775135.8073346,118,"#ffee00" +2017,bkg,Other,ZZTo2Q2L_mllmin4p0,3.28,41.529,162895522.45623606,118,"#ffee00" +2018,bkg,Other,ZZTo2Q2L_mllmin4p0,3.28,59.7,161924457.81939876,118,"#ffee00" +2016preVFP,bkg,Other,ZZTo4Q_5f,3.451,19.52,7087208.439165003,119,"#ffee00" +2016postVFP,bkg,Other,ZZTo4Q_5f,3.451,16.81,7271593.462734001,119,"#ffee00" +2017,bkg,Other,ZZTo4Q_5f,3.451,41.529,14363539.312917003,119,"#ffee00" +2018,bkg,Other,ZZTo4Q_5f,3.451,59.7,14508182.721835006,119,"#ffee00" +2016preVFP,bkg,Other,ZZZ,0.01398,19.52,78317.0634226,120,"#ffee00" +2016postVFP,bkg,Other,ZZZ,0.01398,16.81,66938.69670588801,120,"#ffee00" +2017,bkg,Other,ZZZ,0.01398,41.529,140629.403262712,120,"#ffee00" +2018,bkg,Other,ZZZ,0.01398,59.7,146027.50457004798,120,"#ffee00" +2016preVFP,bkg,QCD,QCD_HT1000to1500,1206.0,19.52,4773503.0,121,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT1000to1500,1206.0,16.81,4365993.0,121,"#a3d42f" +2017,bkg,QCD,QCD_HT1000to1500,1206.0,41.529,10186734.0,121,"#a3d42f" +2018,bkg,QCD,QCD_HT1000to1500,1206.0,59.7,14394786.0,121,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT100to200,27849880.0,19.52,26312661.0,122,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT100to200,27849880.0,16.81,23717410.0,122,"#a3d42f" +2017,bkg,QCD,QCD_HT100to200,27849880.0,41.529,54381393.0,122,"#a3d42f" +2018,bkg,QCD,QCD_HT100to200,27849880.0,59.7,84434559.0,122,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT1500to2000,98.71,19.52,3503675.0,123,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT1500to2000,98.71,16.81,3003707.0,123,"#a3d42f" +2017,bkg,QCD,QCD_HT1500to2000,98.71,41.529,7701876.0,123,"#a3d42f" +2018,bkg,QCD,QCD_HT1500to2000,98.71,59.7,10411831.0,123,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT2000toInf,20.2,19.52,1629000.0,124,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT2000toInf,20.2,16.81,1847781.0,124,"#a3d42f" +2017,bkg,QCD,QCD_HT2000toInf,20.2,41.529,4112573.0,124,"#a3d42f" +2018,bkg,QCD,QCD_HT2000toInf,20.2,59.7,5374711.0,124,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT200to300,1716997.0,19.52,16524587.0,125,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT200to300,1716997.0,16.81,17569141.0,125,"#a3d42f" +2017,bkg,QCD,QCD_HT200to300,1716997.0,41.529,42714435.0,125,"#a3d42f" +2018,bkg,QCD,QCD_HT200to300,1716997.0,59.7,57336623.0,125,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT300to500,351302.0,19.52,15341307.0,126,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT300to500,351302.0,16.81,16747056.0,126,"#a3d42f" +2017,bkg,QCD,QCD_HT300to500,351302.0,41.529,43429979.0,126,"#a3d42f" +2018,bkg,QCD,QCD_HT300to500,351302.0,59.7,61609663.0,126,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT500to700,31630.0,19.52,15775001.0,127,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT500to700,31630.0,16.81,14212819.0,127,"#a3d42f" +2017,bkg,QCD,QCD_HT500to700,31630.0,41.529,36194860.0,127,"#a3d42f" +2018,bkg,QCD,QCD_HT500to700,31630.0,59.7,49184771.0,127,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT50to100,185300000.0,19.52,12233035.0,128,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT50to100,185300000.0,16.81,11197186.0,128,"#a3d42f" +2017,bkg,QCD,QCD_HT50to100,185300000.0,41.529,26243010.0,128,"#a3d42f" +2018,bkg,QCD,QCD_HT50to100,185300000.0,59.7,38599389.0,128,"#a3d42f" +2016preVFP,bkg,QCD,QCD_HT700to1000,6802.0,19.52,15808790.0,129,"#a3d42f" +2016postVFP,bkg,QCD,QCD_HT700to1000,6802.0,16.81,13750537.0,129,"#a3d42f" +2017,bkg,QCD,QCD_HT700to1000,6802.0,41.529,32934816.0,129,"#a3d42f" +2018,bkg,QCD,QCD_HT700to1000,6802.0,59.7,48506751.0,129,"#a3d42f" +2016preVFP,bkg,ST,ST_t-channel_antitop_4f_InclusiveDecays,80.95,19.52,1983864521.5964818,130,"#00dbbd" +2016postVFP,bkg,ST,ST_t-channel_antitop_4f_InclusiveDecays,80.95,16.81,1957283270.7582045,130,"#00dbbd" +2017,bkg,ST,ST_t-channel_antitop_4f_InclusiveDecays,80.95,41.529,4462869081.826046,130,"#00dbbd" +2018,bkg,ST,ST_t-channel_antitop_4f_InclusiveDecays,80.95,59.7,6114949908.582957,130,"#00dbbd" +2016preVFP,bkg,ST,ST_t-channel_top_4f_InclusiveDecays,136.02,19.52,5948135224.056,131,"#00dbbd" +2016postVFP,bkg,ST,ST_t-channel_top_4f_InclusiveDecays,136.02,16.81,6703802049.126,131,"#00dbbd" +2017,bkg,ST,ST_t-channel_top_4f_InclusiveDecays,136.02,41.529,13808000809.116,131,"#00dbbd" +2018,bkg,ST,ST_t-channel_top_4f_InclusiveDecays,136.02,59.7,18955983507.893997,131,"#00dbbd" +2016preVFP,bkg,ST,ST_tW_antitop_5f_inclusiveDecays,19.559,19.52,74766343.79400003,132,"#00dbbd" +2016postVFP,bkg,ST,ST_tW_antitop_5f_inclusiveDecays,19.559,16.81,83024149.93800001,132,"#00dbbd" +2017,bkg,ST,ST_tW_antitop_5f_inclusiveDecays,19.559,41.529,184446313.3,132,"#00dbbd" +2018,bkg,ST,ST_tW_antitop_5f_inclusiveDecays,19.559,59.7,251902163.21,132,"#00dbbd" +2016preVFP,bkg,ST,ST_tW_top_5f_inclusiveDecays,19.559,19.52,74624666.33600001,133,"#00dbbd" +2016postVFP,bkg,ST,ST_tW_top_5f_inclusiveDecays,19.559,16.81,80821432.59200002,133,"#00dbbd" +2017,bkg,ST,ST_tW_top_5f_inclusiveDecays,19.559,41.529,183284888.00640002,133,"#00dbbd" +2018,bkg,ST,ST_tW_top_5f_inclusiveDecays,19.559,59.7,258137398.5808,133,"#00dbbd" +2016preVFP,bkg,WJets,WJetsToQQ_HT-200to400,2549.0,19.52,8000572.0,134,"#5476a8" +2016postVFP,bkg,WJets,WJetsToQQ_HT-200to400,2549.0,16.81,7065076.0,134,"#5476a8" +2017,bkg,WJets,WJetsToQQ_HT-200to400,2549.0,41.529,15968057.0,134,"#5476a8" +2018,bkg,WJets,WJetsToQQ_HT-200to400,2549.0,59.7,14494966.0,134,"#5476a8" +2016preVFP,bkg,WJets,WJetsToQQ_HT-400to600,276.5,19.52,5153588.0,135,"#5476a8" +2016postVFP,bkg,WJets,WJetsToQQ_HT-400to600,276.5,16.81,4455853.0,135,"#5476a8" +2017,bkg,WJets,WJetsToQQ_HT-400to600,276.5,41.529,9927793.0,135,"#5476a8" +2018,bkg,WJets,WJetsToQQ_HT-400to600,276.5,59.7,9335298.0,135,"#5476a8" +2016preVFP,bkg,WJets,WJetsToQQ_HT-600to800,59.25,19.52,7673043.0,136,"#5476a8" +2016postVFP,bkg,WJets,WJetsToQQ_HT-600to800,59.25,16.81,6804490.0,136,"#5476a8" +2017,bkg,WJets,WJetsToQQ_HT-600to800,59.25,41.529,14510253.0,136,"#5476a8" +2018,bkg,WJets,WJetsToQQ_HT-600to800,59.25,59.7,13633226.0,136,"#5476a8" +2016preVFP,bkg,WJets,WJetsToQQ_HT-800toInf,28.75,19.52,7758476.0,137,"#5476a8" +2016postVFP,bkg,WJets,WJetsToQQ_HT-800toInf,28.75,16.81,6769101.0,137,"#5476a8" +2017,bkg,WJets,WJetsToQQ_HT-800toInf,28.75,41.529,14722417.0,137,"#5476a8" +2018,bkg,WJets,WJetsToQQ_HT-800toInf,28.75,59.7,13581343.0,137,"#5476a8" +2016preVFP,bkg,ZJets,ZJetsToQQ_HT-200to400,1012.0,19.52,8750744.0,138,"#8C00FF" +2016postVFP,bkg,ZJets,ZJetsToQQ_HT-200to400,1012.0,16.81,7285673.0,138,"#8C00FF" +2017,bkg,ZJets,ZJetsToQQ_HT-200to400,1012.0,41.529,15256299.0,138,"#8C00FF" +2018,bkg,ZJets,ZJetsToQQ_HT-200to400,1012.0,59.7,15002757.0,138,"#8C00FF" +2016preVFP,bkg,ZJets,ZJetsToQQ_HT-400to600,114.2,19.52,7709128.0,139,"#8C00FF" +2016postVFP,bkg,ZJets,ZJetsToQQ_HT-400to600,114.2,16.81,6942718.0,139,"#8C00FF" +2017,bkg,ZJets,ZJetsToQQ_HT-400to600,114.2,41.529,14884962.0,139,"#8C00FF" +2018,bkg,ZJets,ZJetsToQQ_HT-400to600,114.2,59.7,13930474.0,139,"#8C00FF" +2016preVFP,bkg,ZJets,ZJetsToQQ_HT-600to800,25.34,19.52,6093849.0,140,"#8C00FF" +2016postVFP,bkg,ZJets,ZJetsToQQ_HT-600to800,25.34,16.81,5529896.0,140,"#8C00FF" +2017,bkg,ZJets,ZJetsToQQ_HT-600to800,25.34,41.529,11702567.0,140,"#8C00FF" +2018,bkg,ZJets,ZJetsToQQ_HT-600to800,25.34,59.7,12029507.0,140,"#8C00FF" +2016preVFP,bkg,ZJets,ZJetsToQQ_HT-800toInf,12.99,19.52,3726992.0,141,"#8C00FF" +2016postVFP,bkg,ZJets,ZJetsToQQ_HT-800toInf,12.99,16.81,4388402.0,141,"#8C00FF" +2017,bkg,ZJets,ZJetsToQQ_HT-800toInf,12.99,41.529,9400574.0,141,"#8C00FF" +2018,bkg,ZJets,ZJetsToQQ_HT-800toInf,12.99,59.7,9681521.0,141,"#8C00FF" +2016preVFP,bkg,ttbar,TTToHadronic,377.96,19.52,30530922535.48401,142,"#ff82ff" +2016postVFP,bkg,ttbar,TTToHadronic,377.96,16.81,33608820216.924007,142,"#ff82ff" +2017,bkg,ttbar,TTToHadronic,377.96,41.529,73140765879.47005,142,"#ff82ff" +2018,bkg,ttbar,TTToHadronic,377.96,59.7,104910439190.40399,142,"#ff82ff" +2016preVFP,bkg,ttbar,TTToSemiLeptonic,365.34,19.52,39772305735.140015,143,"#ff82ff" +2016postVFP,bkg,ttbar,TTToSemiLeptonic,365.34,16.81,43548253725.28402,143,"#ff82ff" +2017,bkg,ttbar,TTToSemiLeptonic,365.34,41.529,104129959042.42807,143,"#ff82ff" +2018,bkg,ttbar,TTToSemiLeptonic,365.34,59.7,143354137521.54007,143,"#ff82ff" +2016preVFP,bkg,ttx,TTWJetsToQQ,0.4377,19.52,184218.52841280002,144,"#00db6e" +2016postVFP,bkg,ttx,TTWJetsToQQ,0.4377,16.81,209107.00335840002,144,"#00db6e" +2017,bkg,ttx,TTWJetsToQQ,0.4377,41.529,444333.9716704001,144,"#00db6e" +2018,bkg,ttx,TTWJetsToQQ,0.4377,59.7,656374.2728368,144,"#00db6e" +2016preVFP,bkg,ttx,TTWW,0.0115,19.52,278000.0,145,"#00db6e" +2016postVFP,bkg,ttx,TTWW,0.0115,16.81,309000.0,145,"#00db6e" +2017,bkg,ttx,TTWW,0.0115,41.529,698000.0,145,"#00db6e" +2018,bkg,ttx,TTWW,0.0115,59.7,944000.0,145,"#00db6e" +2016preVFP,bkg,ttx,TTWZ,0.003884,19.52,140000.0,146,"#00db6e" +2016postVFP,bkg,ttx,TTWZ,0.003884,16.81,159000.0,146,"#00db6e" +2017,bkg,ttx,TTWZ,0.003884,41.529,350000.0,146,"#00db6e" +2018,bkg,ttx,TTWZ,0.003884,59.7,498000.0,146,"#00db6e" +2016preVFP,bkg,ttx,TTbb_4f_TTToHadronic,377.96,19.52,46015105.035600014,147,"#00db6e" +2016postVFP,bkg,ttx,TTbb_4f_TTToHadronic,377.96,16.81,51913709.81100002,147,"#00db6e" +2017,bkg,ttx,TTbb_4f_TTToHadronic,377.96,41.529,113736166.15320003,147,"#00db6e" +2018,bkg,ttx,TTbb_4f_TTToHadronic,377.96,59.7,160749265.96440002,147,"#00db6e" +2016preVFP,bkg,ttx,ttHToNonbb_M125,0.215,19.52,990602.6741519999,148,"#00db6e" +2016postVFP,bkg,ttx,ttHToNonbb_M125,0.215,16.81,1122813.932604,148,"#00db6e" +2017,bkg,ttx,ttHToNonbb_M125,0.215,41.529,2540302.943178,148,"#00db6e" +2018,bkg,ttx,ttHToNonbb_M125,0.215,59.7,3671562.4015979995,148,"#00db6e" +2016preVFP,bkg,ttx,ttHTobb_M125,0.1279,19.52,2315362.28742,149,"#00db6e" +2016postVFP,bkg,ttx,ttHTobb_M125,0.1279,16.81,2473448.328624,149,"#00db6e" +2017,bkg,ttx,ttHTobb_M125,0.1279,41.529,3919780.948356,149,"#00db6e" +2018,bkg,ttx,ttHTobb_M125,0.1279,59.7,4843451.7560519995,149,"#00db6e" +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016preVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016postVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016postVFP,data,MET,MET,1.0,1.0,1.0,20,black +2016postVFP,data,MET,MET,1.0,1.0,1.0,20,black +2017,data,MET,MET,1.0,1.0,1.0,20,black +2017,data,MET,MET,1.0,1.0,1.0,20,black +2017,data,MET,MET,1.0,1.0,1.0,20,black +2017,data,MET,MET,1.0,1.0,1.0,20,black +2017,data,MET,MET,1.0,1.0,1.0,20,black +2018,data,MET,MET,1.0,1.0,1.0,20,black +2018,data,MET,MET,1.0,1.0,1.0,20,black +2018,data,MET,MET,1.0,1.0,1.0,20,black +2018,data,MET,MET,1.0,1.0,1.0,20,black diff --git a/analysis/vbs_vvh/config/variables/extra_definitions.py b/analysis/vbs_vvh/config/variables/extra_definitions.py new file mode 100644 index 00000000..e3d66d5e --- /dev/null +++ b/analysis/vbs_vvh/config/variables/extra_definitions.py @@ -0,0 +1,90 @@ + +import numpy as np +import awkward as ak + +def VV_final_state(events): + """Define per-event final state categories from daughter PDG IDs.""" + + # Helper function + def is_plep(pid): return (pid == 11) | (pid == 13) | (pid == 15) | (pid == 17) + def is_nlep(pid): return (pid == -11) | (pid == -13) | (pid == -15) | (pid == -17) + def is_had(pid): return (abs(pid) <= 9) | (pid == 21) + def is_MET(pid): return (abs(pid) == 12) | (abs(pid) == 14) | (abs(pid) == 16) + + # Build all daughter-level flags + daughters = ["tV1d1_id", "tV1d2_id", "tV2d1_id", "tV2d2_id"] + plep = [is_plep(events[name]) for name in daughters] + nlep = [is_nlep(events[name]) for name in daughters] + had = [is_had(events[name]) for name in daughters] + met = [is_MET(events[name]) for name in daughters] + + # Count per-event + n_plep = sum(plep) + n_nlep = sum(nlep) + n_had = sum(had) + n_MET = sum(met) + n_lep = n_plep + n_nlep + + # Define categories + had_2lep = (n_had == 2) & (n_lep == 2) & (n_MET == 0) + had_1lep = (n_had == 2) & (n_lep == 1) & (n_MET == 1) + had_MET = (n_had == 2) & (n_lep == 0) & (n_MET == 2) + total_MET = (n_MET == 4) + lep_3 = (n_lep == 3) & (n_had == 0) & (n_MET == 1) + lep_4 = (n_lep == 4) & (n_had == 0) & (n_MET == 0) + nohad_oslep= (n_plep == 1) & (n_nlep == 1) & (n_MET == 2) + ZZ_oslep = nohad_oslep & (abs(events.tV1_id) == 23) + WW_oslep = nohad_oslep & (abs(events.tV1_id) == 24) + WW_sslep = ((n_plep == 2) | (n_nlep == 2)) & (n_MET == 2) + MET_1lep = (n_MET == 3) & (n_lep == 1) + + return { + "had_2lep": had_2lep, + "had_1lep": had_1lep, + "had_MET": had_MET, + "total_MET": total_MET, + "lep_3": lep_3, + "lep_4": lep_4, + "nohad_oslep": nohad_oslep, + "ZZ_oslep": ZZ_oslep, + "WW_oslep": WW_oslep, + "WW_sslep": WW_sslep, + "MET_1lep": MET_1lep, + "n_had": n_had, + "n_lep": n_lep, + "n_MET": n_MET, + } + +def Higgs_final_state(events): + return 0 + + +def VV_ndaughters(events): + """Define per-event final state categories from daughter PDG IDs.""" + + # Helper function + def is_plep(pid): return (pid == 11) | (pid == 13) | (pid == 15) | (pid == 17) + def is_nlep(pid): return (pid == -11) | (pid == -13) | (pid == -15) | (pid == -17) + def is_had(pid): return (abs(pid) <= 9) | (pid == 21) + def is_MET(pid): return (abs(pid) == 12) | (abs(pid) == 14) | (abs(pid) == 16) + + # Build all daughter-level flags + daughters = ["tV1d1_id", "tV1d2_id", "tV2d1_id", "tV2d2_id"] + plep = [is_plep(events[name]) for name in daughters] + nlep = [is_nlep(events[name]) for name in daughters] + had = [is_had(events[name]) for name in daughters] + met = [is_MET(events[name]) for name in daughters] + + # Count per-event + n_plep = sum(plep) + n_nlep = sum(nlep) + n_had = sum(had) + n_MET = sum(met) + n_lep = n_plep + n_nlep + + return { + "n_plep" :n_plep, + "n_nlep" :n_nlep, + "n_had" :n_had, + "n_MET" :n_MET + } diff --git a/analysis/vbs_vvh/config/variables/var_config.py b/analysis/vbs_vvh/config/variables/var_config.py new file mode 100644 index 00000000..2740a0d2 --- /dev/null +++ b/analysis/vbs_vvh/config/variables/var_config.py @@ -0,0 +1,597 @@ +from hist import axis +from coffea.nanoevents.methods import vector +import awkward as ak +ak.behavior.update(vector.behavior) +import numpy as np + +obj = { #reconstruction of H, V1, V2, vbsjs and MET + "Higgs": lambda events: ak.zip( + { + "pt": events.Higgs_pt, + "mass": events.Higgs_msoftdrop, + "eta": events.Higgs_eta, + "phi": events.Higgs_phi, + "score": events.HiggsScore, + # "area": events.Higgs_area, + # "deepTagMD_TvsQCD": events.Higgs_deepTagMD_TvsQCD, + # "deepTagMD_WvsQCD": events.Higgs_deepTagMD_WvsQCD, + # "particleNetMD_QCD": events.Higgs_particleNetMD_QCD, + # "particleNet_TvsQCD": events.Higgs_particleNet_TvsQCD, + # "particleNet_WvsQCD": events.Higgs_particleNet_WvsQCD, + # "particleNet_mass": events.Higgs_particleNet_mass, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "V1": lambda events: ak.zip( + { + "pt": events.V1_pt, + "mass": events.V1_msoftdrop, + "eta": events.V1_eta, + "phi": events.V1_phi, + "score": events.V1Score, + # "area": events.V1_area, + # "deepTagMD_TvsQCD": events.V1_deepTagMD_TvsQCD, + # "deepTagMD_WvsQCD": events.V1_deepTagMD_WvsQCD, + # "particleNetMD_QCD": events.V1_particleNetMD_QCD, + # "particleNet_TvsQCD": events.V1_particleNet_TvsQCD, + # "particleNet_WvsQCD": events.V1_particleNet_WvsQCD, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "vbsj1": lambda events: ak.zip( + { + "pt": events.vbsj1_pt, + "mass": events.vbsj1_m, + "eta": events.vbsj1_eta, + "phi": events.vbsj1_phi, + # "area": ak.firsts(events.goodVBSJets_area[events.vbs_idx_max_Mjj[:,0]]), + # "nConstituents": ak.firsts(events.goodVBSJets_nConstituents[events.vbs_idx_max_Mjj[:,0]]), + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "vbsj2": lambda events: ak.zip( + { + "pt": events.vbsj2_pt, + "mass": events.vbsj2_m, + "eta": events.vbsj2_eta, + "phi": events.vbsj2_phi, + # "area": ak.firsts(events.goodVBSJets_area[events.vbs_idx_max_Mjj[:,1]]), + # "nConstituents": ak.firsts(events.goodVBSJets_nConstituents[events.vbs_idx_max_Mjj[:,1]]), + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "MET": lambda events: ak.zip( + { + "pt": events.Met_pt, + "phi": events.Met_phi, + "eta": ak.zeros_like(events.Met_pt), # Set to 0 to ensure valid LorentzVector + "mass": ak.zeros_like(events.Met_pt), # Same for mass + "significance": events.Met_significance, + 'sumEt': events.Met_sumEt, + 'sumPtUnclustered': events.Met_sumPtUnclustered, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "leadAK8": lambda events: ak.zip( + { + "pt": events.leadAK8_pt, + "mass": events.leadAK8_msoftdrop, + "eta": events.leadAK8_eta, + "phi": events.leadAK8_phi, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "goodAK4Jets": lambda events: ak.zip( #for getting lead ak4 dphi met + { + "pt": events.goodAK4Jets_pt, + "mass": events.goodAK4Jets_mass, + "eta": events.goodAK4Jets_eta, + "phi": events.goodAK4Jets_phi, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "goodAK8Jets": lambda events: ak.zip( + { + "pt": events.goodAK8Jets_pt, + "mass": events.goodAK8Jets_msoftdrop, + "eta": events.goodAK8Jets_eta, + "phi": events.goodAK8Jets_phi, + # "HbbScore":events.goodAK8Jets_HbbScore, + # "WqqScore":events.goodAK8Jets_WqqScore, + # "area": events.goodAK8Jets_area, + # "deepTagMD_TvsQCD": events.goodAK8Jets_deepTagMD_TvsQCD, + # "deepTagMD_WvsQCD": events.goodAK8Jets_deepTagMD_WvsQCD, + # "particleNetMD_QCD": events.goodAK8Jets_particleNetMD_QCD, + # "particleNet_TvsQCD": events.goodAK8Jets_particleNet_TvsQCD, + # "particleNet_WvsQCD": events.goodAK8Jets_particleNet_WvsQCD, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + # "electrons": lambda events: ak.zip( + # { + # "pt": events.Electron_pt, + # "mass": events.Electron_mass, + # "eta": events.Electron_eta, + # "phi": events.Electron_phi, + # }, + # with_name="PtEtaPhiMLorentzVector", + # behavior=vector.behavior + # ), + # "muons": lambda events: ak.zip( + # { + # "pt": events.Muon_pt, + # "mass": events.Muon_mass, + # "eta": events.Muon_eta, + # "phi": events.Muon_phi, + # }, + # with_name="PtEtaPhiMLorentzVector", + # behavior=vector.behavior + # ), + # "taus": lambda events: ak.zip( + # { + # "pt": events.Tau_pt, + # "mass": events.Tau_mass, + # "eta": events.Tau_eta, + # "phi": events.Tau_phi, + # }, + # with_name="PtEtaPhiMLorentzVector", + # behavior=vector.behavior + # ), +} + +other_objs = { #reconstruction of all (good) jets and stuffs if needed + "goodVBSJets": lambda events,objects: ak.zip( + { + "pt": events.goodAK4Jets_pt, + "mass": events.goodAK4Jets_mass, + "eta": events.goodAK4Jets_eta, + "phi": events.goodAK4Jets_phi, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "V2": lambda events,objects: ak.zip( #for met2FJ channel, V2 should not exist + { + "pt": events.V2_pt, + "mass": events.V2_msoftdrop, + "eta": events.V2_eta, + "phi": events.V2_phi, + "score": events.V2Score, + }, + with_name="PtEtaPhiMLorentzVector", + behavior=vector.behavior + ), + "leadAK4": lambda events,objects: get_leading_jet(objects['goodAK4Jets']), +} + +def get_leading_jet(jets): + jets_sorted = jets[ak.argsort(jets.pt, axis=1, ascending=False)] + return ak.firsts(jets_sorted) + + +dense_variables_config = { #name of axis must be same as key + "nGoodAK4": { + "axis": axis.Regular(25, 0, 25, name="nGoodAK4", label="nGoodAK4"), + "expr": lambda events, objects: events.nGoodAK4, + }, + "nGoodAK8": { + "axis": axis.Regular(6, 0, 6, name="nGoodAK8", label="nGoodAK8"), + "expr": lambda events, objects: events.nGoodAK8, + }, + "nAK4": { + "axis": axis.Regular(25, 0, 25, name="nAK4", label="nAK4"), + "expr": lambda events, objects: events.nAK4 + }, + "nAK8": { + "axis": axis.Regular(6, 0, 6, name="nAK8", label="nAK8"), + "expr": lambda events, objects: events.nAK8, + }, + "Higgs_pt": { + "axis": axis.Regular(50, 0, 2000, name="Higgs_pt", label="Higgs pt (GeV)"), + "expr": lambda events, objects: objects["Higgs"].pt, + }, + "Higgs_phi": { + "axis": axis.Regular(50, -3.5, 3.5, name="Higgs_phi", label="Higgs phi"), + "expr": lambda events, objects: objects["Higgs"].phi, + }, + "Higgs_eta": { + "axis": axis.Regular(50, 0,3, name="Higgs_eta", label="Higgs eta"), + "expr": lambda events, objects: objects["Higgs"].eta, + }, + "Higgs_mass": { + "axis": axis.Regular(50, 0, 400, name="Higgs_mass", label="Higgs mass (GeV)"), + "expr": lambda events, objects: objects["Higgs"].mass, + }, + "Higgs_score": { + "axis": axis.Regular(50, 0, 1, name="Higgs_score", label="Higgs score"), + "expr": lambda events, objects: objects["Higgs"].score, + }, + "V1_pt": { + "axis": axis.Regular(50, 0, 2000, name="V1_pt", label="V1 pt (GeV)"), + "expr": lambda events, objects: objects["V1"].pt, + }, + "V1_phi": { + "axis": axis.Regular(50, -3.5, 3.5, name="V1_phi", label="V1 phi"), + "expr": lambda events, objects: objects["V1"].phi, + }, + "V1_eta": { + "axis": axis.Regular(50, 0,3, name="V1_eta", label="V1 eta"), + "expr": lambda events, objects: objects["V1"].eta, + }, + "V1_mass": { + "axis": axis.Regular(50, 0, 400, name="V1_mass", label="V1 mass (GeV)"), + "expr": lambda events, objects: objects["V1"].mass, + }, + "V1_score": { + "axis": axis.Regular(50, 0, 1, name="V1_score", label="V1 score"), + "expr": lambda events, objects: objects["V1"].score, + }, + "Met_pt":{ + "axis": axis.Regular(50, 0, 2000, name="Met_pt", label="MET pt (GeV)"), + "expr": lambda events, objects: objects["MET"].pt, + }, + "Met_pt_low":{ + "axis": axis.Regular(80, 0, 800, name="Met_pt_low", label="MET pt (GeV)"), + "expr": lambda events, objects: objects["MET"].pt, + }, + "Met_significance":{ + "axis": axis.Regular(50, 0, 2000, name="Met_significance", label="MET significance"), + "expr": lambda events, objects: objects["MET"].significance, + }, + "Met_significance_fine":{ + "axis": axis.Regular(50, 0, 200, name="Met_significance_fine", label="MET significance zoomed in"), + "expr": lambda events, objects: objects["MET"].significance, + }, + "Met_significance_fine10":{ + "axis": axis.Regular(20, 0, 200, name="Met_significance_fine10", label="MET significance zoomed in"), + "expr": lambda events, objects: objects["MET"].significance, + }, + "Met_significance_lowrange":{ + "axis": axis.Regular(20, 0, 50, name="Met_significance_lowrange", label="MET significance in <50 reg."), + "expr": lambda events, objects: objects["MET"].significance, + }, + "Met_phi":{ + "axis": axis.Regular(50, -3.5, 3.5, name="Met_phi", label="MET phi"), + "expr": lambda events, objects: objects["MET"].phi, + }, + "HV1_dR": { + "axis": axis.Regular(50, 0, 6, name="HV1_dR", label="dR(Higgs, V1)"), + "expr": lambda events, objects: objects["Higgs"].delta_r(objects["V1"]), + }, + "HMET_dphi": { + "axis": axis.Regular(50, 0, 3.5, name="HMET_dphi", label="dphi(Higgs, MET)"), + "expr": lambda events, objects: deltaPhi(objects["Higgs"],objects["MET"]), + }, + "V1MET_dphi": { + "axis": axis.Regular(50, 0, 3.5, name="V1MET_dphi", label="dphi(V1, MET)"), + "expr": lambda events, objects: deltaPhi(objects["V1"],objects["MET"]), + }, + "HVMET_sum_pt": { + "axis": axis.Regular(50, 0, 2000, name="HVMET_sum_pt", label="pt(H + V1) (GeV)"), + "expr": lambda events, objects: (objects["Higgs"] + objects["V1"] + objects["MET"]).pt, + }, + "sum_bosonHT":{ + "axis": axis.Regular(50, 0, 5000, name="sum_bosonHT", label="(Higgs_pt+V1_pt+MET_pt)"), + "expr": lambda events, objects: objects["Higgs"].pt + objects["V1"].pt +objects["MET"].pt, + }, + "vbsj_deta":{ + "axis": axis.Regular(50, 0, 10, name="vbsj_deta", label="vbsj_deta"), + "expr": lambda events, objects: np.abs(objects["vbsj1"].eta - objects["vbsj2"].eta), + }, + "vbsj_Mjj":{ + "axis": axis.Regular(50, 0, 5000, name="vbsj_Mjj", label="vbsj_Mjj (GeV)"), + "expr": lambda events, objects: (objects["vbsj1"] + objects["vbsj2"]).mass, + }, + + "goodAK4Jets_maxAbsDeltaEta":{ + "axis": axis.Regular(50, 0, 10, name="goodAK4Jets_maxAbsDeltaEta", label="goodAK4Jets_maxAbsDeltaEta"), + "expr": lambda events, objects: events.goodAK4Jets_maxAbsDeltaEta, + }, + "leadAK8_MET_dphi":{ + "axis": axis.Regular(50, 0, 3.5, name="leadAK8_MET_dphi", label="dphi(leadAK8, MET)"), + "expr": lambda events, objects: deltaPhi(objects["leadAK8"],objects["MET"]), + }, + + "leadAK4_MET_dphi":{ #objects['goodAK4Jets'] + "axis": axis.Regular(50, 0, 3.5, name="leadAK4_MET_dphi", label="dphi(leadAK4, MET)"), + "expr": lambda events, objects: + deltaPhi(objects['leadAK4'],objects["MET"]), + }, + + "vbsj1_pt": { + "axis": axis.Regular(50, 0, 500, name="vbsj1_pt", label="vbsj1 pt (GeV)"), + "expr": lambda events, objects: objects["vbsj1"].pt, + }, + "vbsj1_phi": { + "axis": axis.Regular(50, -3.5, 3.5, name="vbsj1_phi", label="vbsj1 phi"), + "expr": lambda events, objects: objects["vbsj1"].phi, + }, + "vbsj1_eta": { + "axis": axis.Regular(50, 0,5, name="vbsj1_eta", label="vbsj1 eta"), + "expr": lambda events, objects: objects["vbsj1"].eta, + }, + "vbsj1_mass": { + "axis": axis.Regular(50, 0, 100, name="vbsj1_mass", label="vbsj1 mass (GeV)"), + "expr": lambda events, objects: objects["vbsj1"].mass, + }, + "vbsj2_pt": { + "axis": axis.Regular(50, 0, 500, name="vbsj2_pt", label="vbsj2 pt (GeV)"), + "expr": lambda events, objects: objects["vbsj2"].pt, + }, + "vbsj2_phi": { + "axis": axis.Regular(50, -3.5, 3.5, name="vbsj2_phi", label="vbsj2 phi"), + "expr": lambda events, objects: objects["vbsj2"].phi, + }, + "vbsj2_eta": { + "axis": axis.Regular(50, 0,5, name="vbsj2_eta", label="vbsj2 eta"), + "expr": lambda events, objects: objects["vbsj2"].eta, + }, + "vbsj2_mass": { + "axis": axis.Regular(50, 0, 100, name="vbsj2_mass", label="vbsj2 mass (GeV)"), + "expr": lambda events, objects: objects["vbsj2"].mass, + }, + "met_goodak4_min_dphi": { + "axis": axis.Regular(50, 0, 3.5, name="met_goodak4_min_dphi", label="met_goodak4_min_dphi"), + "expr": lambda events, objects: get_min(deltaPhi(objects['goodAK4Jets'],objects["MET"])), + }, + "met_goodak8_min_dphi": { + "axis": axis.Regular(50, 0, 3.5, name="met_goodak8_min_dphi", label="met_goodak8_min_dphi"), + "expr": lambda events, objects: get_min(deltaPhi(objects['goodAK8Jets'],objects["MET"])), + }, + + # "nlep": { + # "axis": axis.Regular(15, 0, 15, name="nlep", label="n(electron+muon)"), + # "expr": lambda events, objects: events.nlep, + # }, + # "nTau": { + # "axis": axis.Regular(5, 0, 5, name="nTau", label="nTau"), + # "expr": lambda events, objects: events.nTau, + # }, + # "subjet_ratio": { + # "axis": axis.Regular(20, 0, 4, name="subjet_ratio", label="nSubjet/nFatJet"), + # "expr": lambda events, objects: events.subjet_ratio, + # }, + # "nGoodAK4FromMediumBJet": { + # "axis": axis.Regular(5, 0, 5, name="nGoodAK4FromMediumBJet", label="nGoodAK4FromMediumBJet"), + # "expr": lambda events, objects: events.ngoodAK4FromMediumBJet, #ak.sum(events.goodAK4FromMediumBJet, axis=1),# + # }, + # "nGoodAK4FromLooseBJet": { + # "axis": axis.Regular(5, 0, 5, name="nGoodAK4FromLooseBJet", label="nGoodAK4FromLooseBJet"), + # "expr": lambda events, objects: events.ngoodAK4FromLooseBJet, #ak.sum(events.goodAK4FromLooseBJet, axis=1),# + # }, + # "Higgs_area": { + # "axis": axis.Regular(20, 1.5,2.5, name="Higgs_area", label="Higgs area"), + # "expr": lambda events, objects: objects["Higgs"].area, + # }, + # "Higgs_deepTagMD_TvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="Higgs_deepTagMD_TvsQCD", label="Higgs score deepTagMD_TvsQCD"), + # "expr": lambda events, objects: objects["Higgs"].deepTagMD_TvsQCD, + # }, + # "Higgs_deepTagMD_WvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="Higgs_deepTagMD_WvsQCD", label="Higgs deepTagMD_WvsQCD"), + # "expr": lambda events, objects: objects["Higgs"].deepTagMD_WvsQCD, + # }, + # "Higgs_particleNetMD_QCD": { + # "axis": axis.Regular(50, 0, 1, name="Higgs_particleNetMD_QCD", label="Higgs particleNetMD_QCD"), + # "expr": lambda events, objects: objects["Higgs"].particleNetMD_QCD, + # }, + # "Higgs_particleNet_TvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="Higgs_particleNet_TvsQCD", label="Higgs particleNet_TvsQCD"), + # "expr": lambda events, objects: objects["Higgs"].particleNet_TvsQCD, + # }, + # "Higgs_particleNet_WvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="Higgs_particleNet_WvsQCD", label="Higgs particleNet_WvsQCD"), + # "expr": lambda events, objects: objects["Higgs"].particleNet_WvsQCD, + # }, + # "V1_area": { + # "axis": axis.Regular(50, 1.5,2.5, name="V1_area", label="V1 area"), + # "expr": lambda events, objects: objects["V1"].area, + # }, + # "V1_deepTagMD_TvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="V1_deepTagMD_TvsQCD", label="V1 score deepTagMD_TvsQCD"), + # "expr": lambda events, objects: objects["V1"].deepTagMD_TvsQCD, + # }, + # "V1_deepTagMD_WvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="V1_deepTagMD_WvsQCD", label="V1 deepTagMD_WvsQCD"), + # "expr": lambda events, objects: objects["V1"].deepTagMD_WvsQCD, + # }, + # "V1_particleNetMD_QCD": { + # "axis": axis.Regular(50, 0, 1, name="V1_particleNetMD_QCD", label="V1 particleNetMD_QCD"), + # "expr": lambda events, objects: objects["V1"].particleNetMD_QCD, + # }, + # "V1_particleNet_TvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="V1_particleNet_TvsQCD", label="V1 particleNet_TvsQCD"), + # "expr": lambda events, objects: objects["V1"].particleNet_TvsQCD, + # }, + # "V1_particleNet_WvsQCD": { + # "axis": axis.Regular(50, 0, 1, name="V1_particleNet_WvsQCD", label="V1 particleNet_WvsQCD"), + # "expr": lambda events, objects: objects["V1"].particleNet_WvsQCD, + # }, + + # "vbsj1_area": { + # "axis": axis.Regular(50, 0.3, 0.7, name="vbsj1_area", label="vbsj2 area"), + # "expr": lambda events, objects: objects["vbsj1"].area, + # }, + # "vbsj1_nConstituents": { + # "axis": axis.Regular(30, 0, 60, name="vbsj1_nConstituents", label="vbsj2 nConstituents"), + # "expr": lambda events, objects: objects["vbsj1"].nConstituents, + # }, + # "vbsj2_area": { + # "axis": axis.Regular(50, 0.3, 0.7, name="vbsj2_area", label="vbsj2 area"), + # "expr": lambda events, objects: objects["vbsj2"].area, + # }, + # "vbsj2_nConstituents": { + # "axis": axis.Regular(30, 0, 60, name="vbsj2_nConstituents", label="vbsj2 nConstituents"), + # "expr": lambda events, objects: objects["vbsj2"].nConstituents, + # }, + # 'dphi_diff': { + # "axis": axis.Regular(50, 0, 3, name="dphi_diff", label="max - min of dphi(MET,H,V1)"), + # "expr": lambda events, objects: ( + # ak.max( + # ak.concatenate( + # [ + # ak.singletons(deltaPhi(objects["Higgs"], objects["MET"])), + # ak.singletons(deltaPhi(objects["V1"], objects["MET"])), + # ak.singletons(deltaPhi(objects["Higgs"], objects["V1"])), + # ], + # axis=1, + # ), + # axis=1, + # ) + # - ak.min( + # ak.concatenate( + # [ + # ak.singletons(deltaPhi(objects["Higgs"], objects["MET"])), + # ak.singletons(deltaPhi(objects["V1"], objects["MET"])), + # ak.singletons(deltaPhi(objects["Higgs"], objects["V1"])), + # ], + # axis=1, + # ), + # axis=1, + # ) + # ), + # }, + # #to remove + + # "Met_sumEt": { + # "axis": axis.Regular(50, 0, 6000, name="Met_sumEt", label="Met_sumEt"), + # "expr": lambda events, objects: objects["MET"].sumEt, + # }, + # "Met_sumPtUnclustered": { + # "axis": axis.Regular(50, 0, 6000, name="Met_sumPtUnclustered", label="Met_sumPtUnclustered"), + # "expr": lambda events, objects: objects["MET"].sumPtUnclustered, + # }, + # "bosonMET_HT": { + # "axis": axis.Regular(50, 0, 3000, name="bosonMET_HT", label="bosonMET_HT"), + # "expr": lambda events, objects: events.bosonMET_HT, + # }, + # "ht_ak4": { + # "axis": axis.Regular(50, 0, 3000, name="ht_ak4", label="ht_ak4"), + # "expr": lambda events, objects: events.ht_ak4, + # }, + # "ht_ak8": { + # "axis": axis.Regular(50, 0, 3000, name="ht_ak8", label="ht_ak8"), + # "expr": lambda events, objects: events.ht_ak8, + # }, + # "ht_goodAK4Jets": { + # "axis": axis.Regular(50, 0, 3000, name="ht_goodAK4Jets", label="ht_goodAK4Jets"), + # "expr": lambda events, objects: events.ht_goodAK4Jets, + # }, + # "ht_goodAK8Jets": { + # "axis": axis.Regular(50, 0, 3000, name="ht_goodAK8Jets", label="ht_goodAK8Jets"), + # "expr": lambda events, objects: events.ht_goodAK8Jets, + # }, + # "nCentralAK4": { + # "axis": axis.Regular(25, 0, 25, name="nCentralAK4", label="nCentralAK4"), + # "expr": lambda events, objects: events.nCentralAK4, + # }, + # "nGoodVBS": { + # "axis": axis.Regular(25, 0, 25, name="nGoodVBS", label="nGoodVBS"), + # "expr": lambda events, objects: events.nGoodVBS, + # }, + # "sum_AK4_pt": { + # "axis": axis.Regular(50, 0, 3000, name="sum_AK4_pt", label="sum_AK4_pt"), + # "expr": lambda events, objects: events.sum_AK4_pt, + # }, + # "sum_AK8_pt": { + # "axis": axis.Regular(50, 0, 3000, name="sum_AK8_pt", label="sum_AK8_pt"), + # "expr": lambda events, objects: events.sum_AK8_pt, + # }, + # "vbsj1_dRH": { + # "axis": axis.Regular(50, 0, 7, name="vbsj1_dRH", label="vbsj1_dRH"), + # "expr": lambda events, objects: events.vbsj1_dRH, + # }, + # "vbsj1_dRV1": { + # "axis": axis.Regular(50, 0, 7, name="vbsj1_dRV1", label="vbsj1_dRV1"), + # "expr": lambda events, objects: events.vbsj1_dRV1, + # }, + # "vbsj2_dRH": { + # "axis": axis.Regular(50, 0, 7, name="vbsj2_dRH", label="vbsj2_dRH"), + # "expr": lambda events, objects: events.vbsj2_dRH, + # }, + # "vbsj2_dRV1": { + # "axis": axis.Regular(50, 0, 7, name="vbsj2_dRV1", label="vbsj2_dRV1"), + # "expr": lambda events, objects: events.vbsj2_dRV1, + # }, + # "HVMET_pt": { + # "axis": axis.Regular(50, 0, 3000, name="HVMET_pt", label="HVMET_pt"), + # "expr": lambda events, objects: events.HVMET_pt, + # }, + # "vbsj_dphi": { + # "axis": axis.Regular(50, 0, 3.5, name="vbsj_dphi", label="vbsj_dphi"), + # "expr": lambda events, objects: deltaPhi(objects['vbsj1'],objects["vbsj2"]) + # }, + + # "electron_goodAK4_min_dR": { + # "axis": axis.Regular(50, 0, 5, name="electron_goodAK4_min_dR", label="electron_goodAK4_min_dR"), + # "expr": lambda events, objects: events.electron_goodAK4_min_dR, + # }, + # "muon_goodAK4_min_dR": { + # "axis": axis.Regular(50, 0, 5, name="muon_goodAK4_min_dR", label="muon_AK4_min_dR"), + # "expr": lambda events, objects: events.muon_goodAK4_min_dR, + # }, + # "tau_goodAK4_min_dR": { + # "axis": axis.Regular(50, 0, 5, name="tau_goodAK4_min_dR", label="tau_AK4_min_dR"), + # "expr": lambda events, objects: events.tau_goodAK4_min_dR, + # }, + # "tau_H_dR": { + # "axis": axis.Regular(50, 0, 5, name="tau_H_dR", label="tau_H_dR"), + # "expr": lambda events, objects: events.tau_H_dR, + # }, + # "tau_V1_dR": { + # "axis": axis.Regular(50, 0, 5, name="tau_V1_dR", label="tau_V1_dR"), + # "expr": lambda events, objects: events.tau_V1_dR, + # }, +} + +def get_min(array): + arr_sort = ak.pad_none(ak.sort(array, axis = 1, ascending=True),1, axis=1) + return arr_sort[:,0] + +# seems delta_r is the only available function for coffea? +def deltaR(v1, v2): + return v1.delta_r(v2) + +def min_dR(v1, v2): + dR = [] + for i in range(ak.num(v1, axis=0)): + if ak.num(v1, axis=1)== 0 or ak.num(v2, axis=1) == 0: + dR.append(None) + else: + k1, k2 = ak.unzip(ak.cartesian([v1, v2])) + dR.append(get_min(k1.deltaR(k2))) + return ak.flatten(dR) + + +def deltaPhi(v1, v2): + phi1 = v1.phi + phi2 = v2.phi + abs_diff = np.abs(phi1 - phi2) + dphi = ak.where( + abs_diff < np.pi, + abs_diff, + 2 * np.pi - abs_diff + ) #compare element-wise + return dphi + +def deltaPhi_1d(phi1, phi2): + abs_diff = np.abs(phi1 - phi2) + dphi = ak.where( + abs_diff < np.pi, + abs_diff, + 2 * np.pi - abs_diff + ) #compare element-wise + return dphi + +def deltaEta(v1,v2): + eta1 = v1.eta + eta2 = v2.eta + return np.abs(eta1-eta2) \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/UL16APV_bkg.json b/analysis/vbs_vvh/input_2FJMET/UL16APV_bkg.json new file mode 100644 index 00000000..24e81173 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/UL16APV_bkg.json @@ -0,0 +1,28 @@ +{ + "xsec": "185300000", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_bkg", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples/2016preVFP_bkg.root" + ], + "nEvents": 0, + "nGenEvents": 11197186, + "nSumOfWeights": 11197186.0, + "isData": false, + "isSig":false, + "path": "./", + "nSumOfLheWeights": [ + 10948530.407120068, + 12877374.841483139, + 14437860.003244445, + 9520996.062486636, + 11197186.0, + 12553321.974128349, + 8430814.468117546, + 9914294.028903218, + 11114549.949698707 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/UL16APV_sig.json b/analysis/vbs_vvh/input_2FJMET/UL16APV_sig.json new file mode 100644 index 00000000..2b37a3e6 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/UL16APV_sig.json @@ -0,0 +1,28 @@ +{ + "xsec": "0.002828", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_VBSWWH_OS_VBSCuts", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples/2016preVFP_sig.root" + ], + "nEvents": 98500, + "nGenEvents": 98500, + "nSumOfWeights": 98500.0, + "isData": false, + "isSig":true, + "path": "./", + "nSumOfLheWeights": [ + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/all_250ptcut.cfg b/analysis/vbs_vvh/input_2FJMET/all_250ptcut.cfg new file mode 100644 index 00000000..1f4ebf86 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/all_250ptcut.cfg @@ -0,0 +1,6 @@ +# Sig +/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/input_2FJMET/sig_250ptcut.json +# Bkg +/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/input_2FJMET/bkg_250ptcut.json +#data +/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/input_2FJMET/data_250ptcut.json \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/all_year.cfg b/analysis/vbs_vvh/input_2FJMET/all_year.cfg new file mode 100644 index 00000000..6cfaeb8f --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/all_year.cfg @@ -0,0 +1,8 @@ +# Sig +/data/userdata/pyli/projects/VVHjj/coffea/vvhjj_coffea/analysis/vbsvvh/configs/sig.json + +# Bkg +/data/userdata/pyli/projects/VVHjj/coffea/vvhjj_coffea/analysis/vbsvvh/configs/bkg.json + +#data +/data/userdata/pyli/projects/VVHjj/coffea/vvhjj_coffea/analysis/vbsvvh/configs/data.json \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/bkg.json b/analysis/vbs_vvh/input_2FJMET/bkg.json new file mode 100644 index 00000000..65c8adb2 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/bkg.json @@ -0,0 +1,31 @@ +{ + "xsec": "185300000", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_bkg", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples/2016preVFP_bkg.root", + "/home/users/pyli/VVHjj/ntuples/2016postVFP_bkg.root", + "/home/users/pyli/VVHjj/ntuples/2017_bkg.root", + "/home/users/pyli/VVHjj/ntuples/2018_bkg.root" + ], + "nEvents": 0, + "nGenEvents": 11197186, + "nSumOfWeights": 11197186.0, + "isData": false, + "isSig":false, + "path": "./", + "nSumOfLheWeights": [ + 10948530.407120068, + 12877374.841483139, + 14437860.003244445, + 9520996.062486636, + 11197186.0, + 12553321.974128349, + 8430814.468117546, + 9914294.028903218, + 11114549.949698707 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/bkg_250ptcut.json b/analysis/vbs_vvh/input_2FJMET/bkg_250ptcut.json new file mode 100644 index 00000000..8bae5d67 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/bkg_250ptcut.json @@ -0,0 +1,31 @@ +{ + "xsec": "185300000", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_bkg", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016preVFP_bkg.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016postVFP_bkg.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2017_bkg.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2018_bkg.root" + ], + "nEvents": 0, + "nGenEvents": 11197186, + "nSumOfWeights": 11197186.0, + "isData": false, + "isSig":false, + "path": "./", + "nSumOfLheWeights": [ + 10948530.407120068, + 12877374.841483139, + 14437860.003244445, + 9520996.062486636, + 11197186.0, + 12553321.974128349, + 8430814.468117546, + 9914294.028903218, + 11114549.949698707 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/data.json b/analysis/vbs_vvh/input_2FJMET/data.json new file mode 100644 index 00000000..29afd926 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/data.json @@ -0,0 +1,31 @@ +{ + "xsec": "0.002828", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_VBSWWH_OS_VBSCuts", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples/2016preVFP_data.root", + "/home/users/pyli/VVHjj/ntuples/2016postVFP_data.root", + "/home/users/pyli/VVHjj/ntuples/2017_data.root", + "/home/users/pyli/VVHjj/ntuples/2018_data.root" + ], + "nEvents": 98500, + "nGenEvents": 98500, + "nSumOfWeights": 98500.0, + "isData": true, + "isSig":false, + "path": "./", + "nSumOfLheWeights": [ + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/data_250ptcut.json b/analysis/vbs_vvh/input_2FJMET/data_250ptcut.json new file mode 100644 index 00000000..9e45af7f --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/data_250ptcut.json @@ -0,0 +1,31 @@ +{ + "xsec": "0.002828", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_VBSWWH_OS_VBSCuts", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016preVFP_data.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016postVFP_data.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2017_data.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2018_data.root" + ], + "nEvents": 98500, + "nGenEvents": 98500, + "nSumOfWeights": 98500.0, + "isData": true, + "isSig":false, + "path": "./", + "nSumOfLheWeights": [ + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/sig.json b/analysis/vbs_vvh/input_2FJMET/sig.json new file mode 100644 index 00000000..aada81b5 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/sig.json @@ -0,0 +1,31 @@ +{ + "xsec": "0.002828", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_VBSWWH_OS_VBSCuts", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples/2016preVFP_sig.root", + "/home/users/pyli/VVHjj/ntuples/2016postVFP_sig.root", + "/home/users/pyli/VVHjj/ntuples/2017_sig.root", + "/home/users/pyli/VVHjj/ntuples/2018_sig.root" + ], + "nEvents": 98500, + "nGenEvents": 98500, + "nSumOfWeights": 98500.0, + "isData": false, + "isSig":true, + "path": "./", + "nSumOfLheWeights": [ + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/sig_250ptcut.json b/analysis/vbs_vvh/input_2FJMET/sig_250ptcut.json new file mode 100644 index 00000000..2d17a52f --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/sig_250ptcut.json @@ -0,0 +1,31 @@ +{ + "xsec": "0.002828", + "year": "2016APV", + "treeName": "Events", + "histAxisName": "UL16APV_VBSWWH_OS_VBSCuts", + "options": "", + "WCnames": [], + "files": [ + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016preVFP_sig.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2016postVFP_sig.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2017_sig.root", + "/home/users/pyli/VVHjj/ntuples_AK8PT250/2018_sig.root" + ], + "nEvents": 98500, + "nGenEvents": 98500, + "nSumOfWeights": 98500.0, + "isData": false, + "isSig":true, + "path": "./", + "nSumOfLheWeights": [ + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495, + 116784.26267291914, + 98500.0, + 85066.50340997495 + ] +} \ No newline at end of file diff --git a/analysis/vbs_vvh/input_2FJMET/test.cfg b/analysis/vbs_vvh/input_2FJMET/test.cfg new file mode 100644 index 00000000..04006f48 --- /dev/null +++ b/analysis/vbs_vvh/input_2FJMET/test.cfg @@ -0,0 +1,4 @@ +# Sig +/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/input/UL16APV_sig.json +# Bkg +/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/input/UL16APV_bkg.json \ No newline at end of file diff --git a/analysis/vbs_vvh/output_reader.py b/analysis/vbs_vvh/output_reader.py new file mode 100644 index 00000000..6a920890 --- /dev/null +++ b/analysis/vbs_vvh/output_reader.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +import os +import argparse +import config.plotting_config as CONFIG + +from plotter_utils.helpers.general_io import resolve_input_list,resolve_outdir,print_yield_nicely,save_yield_dict +from plotter_utils.histo_reader.loader import load_hist_collection,unpack_hist,cut_order +from plotter_utils.histo_reader.sample_info import ProcessMap +from plotter_utils.plotting.draw import draw + +import warnings +warnings.filterwarnings("ignore", message="List indexing selection is experimental.*") + +import logging +def setup_logging(): + logging.basicConfig( + level=getattr(logging, CONFIG.LOG_LEVEL), + format=CONFIG.LOG_FORMAT, + ) +logger = logging.getLogger(__name__) + +def parse_args(): + parser = argparse.ArgumentParser("make_plots") + + parser.add_argument( + "inputs", + nargs="+", + help="Input .pkl.gz files or dirname. should all have the same set of cuts (category axis) ", + ) + parser.add_argument( + "-o", "--output", + default=None, + help="Output directory. will use the dir of the first pkl.gz file if not provided", + ) + parser.add_argument( + "-p", "--plots", + action="store_true", + help="Make plots", + ) + parser.add_argument( + "-y", "--yields", + action="store_true", + help="Print yields", + ) + parser.add_argument( + "-c", "--cutname", + default=None, + help="choose which cut (selection) to draw, default will draw all", + ) + + return parser.parse_args() + +def make_all_plots(all_hist_collection, proc_map, outdir): + for cut,hist_collection in all_hist_collection.items(): + subdir = os.path.join(outdir, cut) if cut is not None else os.path.join(outdir, 'no_category') + + os.makedirs(subdir, exist_ok=True) + logger.info(f'start producing plots for cut: {cut}') + logger.info(f'output will be saved at {subdir}') + + for var in hist_collection.variables_all: + sig,bkg,data = unpack_hist(hist_collection,var,proc_map) + fig = draw( + sig=sig, + bkg=bkg, + data=data, + proc_map=proc_map, + config={"SUBPLOTS": CONFIG.SUBPLOTS,"FIG_RATIO": CONFIG.FIG_RATIO}, + title=var, + ) + fig.savefig(os.path.join(subdir,f"{var}.png"), bbox_inches="tight") + logger.info(f" - Saved {var}.png") + +def save_yield_json(all_hist_collection, proc_map, output_json_name): + yield_ref_var = CONFIG.REF_VAR + logger.info(f"Using reference variable: {yield_ref_var}") + yield_dict = {} #initialize yield dict + uncer_dict = {} + for cutname,hist_collection in all_hist_collection.items(): #cutname = cat in old script + sig,bkg,data = unpack_hist(hist_collection,yield_ref_var,proc_map) + yield_dict[cutname]={ + "Signal":sig.total_yield(), + "Background":bkg.total_yield(), + "Data":data.total_yield(), + } + bkg_dict = bkg.get_yield_per_type() + yield_dict[cutname].update(bkg_dict) + + uncer_dict[cutname]={ + "Signal":sig.uncertainty(), + "Background":bkg.uncertainty(), + "Data":data.uncertainty() + } + bkg_uncert = bkg.get_uncertainty_per_type() + uncer_dict[cutname].update(bkg_uncert) + outdict = {} + for cutname in all_hist_collection.keys(): + outdict[cutname] = { + proc:(yield_dict[cutname][proc],uncer_dict[cutname][proc]) for proc in yield_dict[cutname].keys() + } + ordered_cuts = cut_order(all_hist_collection,yield_ref_var,proc_map) + outdict_ordered = {k: outdict[k] for k in ordered_cuts} + + for cutname in ordered_cuts: + logger.info(f'cut: {cutname}') + print_yield_nicely(outdict_ordered[cutname]) + + save_yield_dict(outdict_ordered, output_json_name) + +def main(args): + setup_logging() + + input_list = resolve_input_list(args.inputs) + outdir = resolve_outdir(args.output,input_list,CONFIG.DEFAULT_OUTPUT_DIRNAME) + + #get proc_map that link process to sample type e.g. QCDpt to QCD + proc_map = ProcessMap.from_csv(CONFIG.PROC_MAP_CSV) + + #load all hist in given files + all_hist_collection = load_hist_collection( + pkl_paths=input_list, + proc_map=proc_map, + cutname=args.cutname, + ) + + if args.plots: + make_all_plots(all_hist_collection, proc_map, outdir) + if args.yields: + output_json_name = os.path.join(outdir,'yield.json') + save_yield_json(all_hist_collection, proc_map, output_json_name) + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/analysis/vbs_vvh/plotter_utils/helpers/general_io.py b/analysis/vbs_vvh/plotter_utils/helpers/general_io.py new file mode 100644 index 00000000..eefbee2f --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/helpers/general_io.py @@ -0,0 +1,102 @@ +import csv,json +import os + +from plotter_utils.helpers.get_input_filename import get_all_pkl_in_folder + +import logging +logger = logging.getLogger(__name__) + +def save_array_to_csv(arr, file_name): + import csv + """ + Save a 2D array to a CSV file. + Assumes arr[0] is the header row, arr[1:] are data rows. + """ + with open(file_name, mode='w', newline='') as f: + writer = csv.writer(f) + for row in arr: + writer.writerow(row) + +def save_yield_dict(outdict, file_name): + """ + Save yield dictionary to JSON. + + Expected input structure: + outdict[cutname][proc] = (yield, variance) + + Output JSON structure: + outdict[cutname][proc] = [yield, error] + + where error = sqrt(variance) if take_sqrt_variance=True + """ + + json_out = {} + + for cutname, proc_dict in outdict.items(): + json_out[cutname] = {} + for proc, yv in proc_dict.items(): + if yv is None: + json_out[cutname][proc] = None + continue + + yld, err = yv + json_out[cutname][proc] = [yld, err] + + with open(file_name, "w") as f: + json.dump(json_out, f, indent=2) + + logger.info("Saved yield JSON to %s", file_name) + +def resolve_input_list(inputs): + """ + unpack any dir and get the list of pkl.gz files + """ + input_list = [] + for name in inputs: + if name.endswith(".pkl.gz"): + input_list.append(name) + elif os.path.isdir(name): + files = get_all_pkl_in_folder(name) + input_list+=files + if len(files)==0: + logger.warning(f'input dir does not contain pkl.gz files {name}') + else: + logger.warning(f'input file is not pkl.gz file nor dir: {name}') + return input_list + +def resolve_outdir(output,input_list,default_output_dirname): + """ + figure out output folder name + """ + if output is None: + outname = os.path.dirname(input_list[0]) + outdir = os.path.join(outname,default_output_dirname) + else: + outdir = output + return outdir + +def print_yield_nicely(yield_dict,indent = 10): + """ + yield_dict can be: + - {proc_grp: (yield, variance)} + - {proc_grp: {proc: (yield, variance)}} + """ + + def format_yield(y): + if y is None: + return "None" + if isinstance(y, (list, tuple)) and len(y) == 2: + return f"{y[0]:.3f} +/- {y[1]:.3f}" + if isinstance(y, float): + return f"{y:.3f} +/- ?" + return f"{y}" + + for proc_grp, d in yield_dict.items(): + if isinstance(d, dict): + logger.info("%s", proc_grp) + for proc, y in d.items(): + # align process name to max_proc_len + logger.info(" - %-*s : %s", indent, proc, format_yield(y)) + else: + logger.info(" %-*s : %s", indent, proc_grp, format_yield(d)) + \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/helpers/get_input_filename.py b/analysis/vbs_vvh/plotter_utils/helpers/get_input_filename.py new file mode 100644 index 00000000..d449b6c4 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/helpers/get_input_filename.py @@ -0,0 +1,35 @@ +""" +get input pkl name by the following methods +- if a file name or multiple file names in .pkl.gz is provided, directly use it +- elif a directory is provided, search for .pkl.gz in that directory (not including subdir) +- elif none is provided, but project and cutflow is provided, guess the file name +- else the code should complain +""" +import os + +def pkl_name_from_project(project_name,cutname,n_minus_1=False): + """ + get individual file name from a given project name and cut name + """ + from config.paths import default_output_dir + if not n_minus_1: + return f'{default_output_dir}/histos/{project_name}/histos/{cutname}_histos.pkl.gz' + else: + return f'{default_output_dir}/histos/{project_name}/histos/{cutname}_histos_m.pkl.gz' + +def get_all_pkl_in_folder(dirpath): + import os + """ + Return an array of filenames for all .pkl.gz files directly inside `dirpath` + Does NOT search subdirectories. + """ + if not os.path.isdir(dirpath): + raise ValueError(f"Not a directory: {dirpath}") + + return [ + os.path.join(dirpath,fname) + for fname in os.listdir(dirpath) + if fname.endswith(".pkl.gz") + and os.path.isfile(os.path.join(dirpath, fname)) + ] + diff --git a/analysis/vbs_vvh/plotter_utils/helpers/test_funcs.py b/analysis/vbs_vvh/plotter_utils/helpers/test_funcs.py new file mode 100644 index 00000000..cade022e --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/helpers/test_funcs.py @@ -0,0 +1,30 @@ +import copy +import hist + + + +def combine(hist_slices: dict) -> hist.Hist: + #working on it not used for now + """ + Combine multiple single-category Hist objects into a single Hist + with an axis containing all categories. Works with Weight() storage. + """ + if not hist_slices: + raise ValueError("No histograms to combine.") + + first_hist = next(iter(hist_slices.values())) + cat_axis_name = first_hist.axes[0].name # assume first axis is categorical + dense_axes = [ax for ax in first_hist.axes if ax.name != cat_axis_name] + + categories = list(hist_slices.keys()) + hnew = hist.Hist( + hist.axis.StrCategory(categories, name=cat_axis_name), + *dense_axes, + storage=first_hist.storage_type() + ) + + for cat in categories: + # Use dict-style slicing to assign values safely + hnew[{cat_axis_name: cat}] = hist_slices[cat] + + return hnew \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_objects/__init__.py b/analysis/vbs_vvh/plotter_utils/histo_objects/__init__.py new file mode 100644 index 00000000..233e1b7d --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_objects/__init__.py @@ -0,0 +1,4 @@ +from .base import BaseHist +from .signal import SignalHist +from .background import BackgroundHist +from .data import DataHist \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_objects/background.py b/analysis/vbs_vvh/plotter_utils/histo_objects/background.py new file mode 100644 index 00000000..ad085cb3 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_objects/background.py @@ -0,0 +1,123 @@ +from plotter_utils.histo_objects.base import BaseHist +import ewkcoffea.modules.plotting_tools as plt_tools # type: ignore +import numpy as np # type: ignore + + +class BackgroundHist(BaseHist): + """ + Background histogram. + + Differences from BaseHist: + - always nominal systematic + - keeps individual background process_grp for stacked plotting + - provides total background histogram + """ + + def __init__( + self, + hist, + *, + years, + process_grouping, + background_groups, + ): + self.background_groups = background_groups + super().__init__( + hist, + years=years, + process_grouping=process_grouping, + ) + + # After preparing the histogram, generate the "total" histogram + self._hist_total = self._make_total_background(self._hist) + + def _prepare_hist(self): + import copy + h = super()._prepare_hist() + + # Nominal only + h = h[{"systematic": "nominal"}] + h = plt_tools.merge_overflow(h) + return h + + def _make_total_background(self, h): + """ + Collapse all background processes into one for total yield calculations. + """ + h_total = plt_tools.group( + h, + "process_grp", + "sample_category", + {"Background": list(self.background_groups)}, + ) + h_total = self._safe_merge_overflow(h_total) + return h_total + + @property + def hist_total(self): + """ + Collapsed total background histogram. + """ + return self._hist_total + + def values_total(self, flow=True): + return self._hist_total.values(flow=flow) + + def cumulative_total(self): + return self.values_total(flow=False).cumsum() + + def cumulative(self): + """ + Cumulative per-process background yield (stacked) + """ + return self.values(flow=False).cumsum() + + def get_yield_per_type(self): + """ + Yield per background group (process_grp). + """ + out = {} + for grp in self.hist.axes["process_grp"]: + h_grp = self.hist[{"process_grp": [grp]}] + out[grp] = self._yield_from_hist(h_grp) + return out + + def get_yield_per_process(self): + """ + Yield per process, nested under background group. + """ + out = {} + + # Loop over background groups (EWK, QCD, ...) + for grp, proc_list in self.process_grouping.items(): + out[grp] = {} + for proc in proc_list: + # regroup THIS process alone + h_proc = plt_tools.group( + self._raw_hist, + "process", + "process_grp", + {proc: [proc]}, + ) + # apply same pipeline steps + h_proc = self._sum_years(h_proc) + h_proc = h_proc[{"systematic": "nominal"}] + h_proc = self._safe_merge_overflow(h_proc) + + out[grp][proc] = self._yield_from_hist(h_proc) + + return out + + def get_variance_per_type(self): + out = {} + for grp in self.hist.axes["process_grp"]: + h_grp = self.hist[{"process_grp": [grp]}] + out[grp] = self._variance_from_hist(h_grp) + return out + + def get_uncertainty_per_type(self): + out = {} + for grp in self.hist.axes["process_grp"]: + h_grp = self.hist[{"process_grp": [grp]}] + out[grp] = np.sqrt(self._variance_from_hist(h_grp)) + return out \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_objects/base.py b/analysis/vbs_vvh/plotter_utils/histo_objects/base.py new file mode 100644 index 00000000..504e8176 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_objects/base.py @@ -0,0 +1,144 @@ +import numpy as np +import copy +import ewkcoffea.modules.plotting_tools as plt_tools + + +class BaseHist: + """ + Base class that handles transformations common to + signal / background / data histograms. + + This class: + - sums over years + - groups raw processes into physics groups + - exposes basic histogram information + - provides a safe way to merge overflow bins + """ + + def __init__( + self, + hist, + *, + years, + process_grouping, + ): + self._raw_hist = copy.deepcopy(hist) # freeze input + self.years = years + self.process_grouping = copy.deepcopy(process_grouping) + + # Prepare the final histogram immediately + self._hist = self._prepare_hist() + + # ------------------------------------------------------------------ + # Core transformation pipeline + # ------------------------------------------------------------------ + + def _prepare_hist(self): + """ + Apply transformations common to all histogram types. + """ + h = self._raw_hist + + # 1) Sum over years + h = self._sum_years(h) + + # 2) Group raw processes into physics processes + h = self._group_processes(h) + + return h + + def _safe_merge_overflow(self, h, axis_slice=None): + """ + Call merge_overflow safely: + - optionally only on a subset of categories (axis_slice) + - merge if dense axis has >= 2 bins + """ + if axis_slice is not None: + h = h[axis_slice] + + vals = h.values(flow=True) + dense_bins = vals.shape[-1] + + if dense_bins >= 2: + return plt_tools.merge_overflow(h) + return h + + def _sum_years(self, h): + """ + Collapse year axis into a single bin. + """ + h = plt_tools.group( + h, + "year", + "year_sum", + {"all_years": self.years}, + ) + return h[{"year_sum": "all_years"}] + + def _group_processes(self, h): + """ + Group raw MC samples into physics processes. + """ + return plt_tools.group( + h, + "process", + "process_grp", + self.process_grouping, + ) + + def _yield_from_hist(self, h): + return float(h.values(flow=True).sum()) + + def _variance_from_hist(self, h): + return float(h.variances(flow=True).sum()) + + # ------------------------------------------------------------------ + # Public accessors (used by plotting & metrics) + # ------------------------------------------------------------------ + + @property + def hist(self): + return self._hist + + def values(self, flow=True): + return self._hist.values(flow=flow) + + def variances(self, flow=True): + return self._hist.variances(flow=flow) + + def total_yield(self): + #return float(np.sum(self.values(flow=True))) + return self._yield_from_hist(self._hist) + + def total_variance(self): + return float(np.sum(self.variances(flow=True))) + + def uncertainty(self): + return float(np.sqrt(self.total_variance())) + + def uncertainty_per_type(self): + pass + + def bin_edges(self): + return self._hist.axes[0].edges + + def bin_centers(self): + return self._hist.axes[0].centers + + def get_yield_per_type(self): + """ + Return yields grouped by 'type'. + Must be implemented by subclasses. + """ + raise NotImplementedError + + def get_yield_per_process(self): + """ + Return yields grouped by type -> process. + Must be implemented by subclasses. + """ + raise NotImplementedError + + def get_variance_per_type(self): + """Return yields grouped by 'type'.""" + raise NotImplementedError \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_objects/data.py b/analysis/vbs_vvh/plotter_utils/histo_objects/data.py new file mode 100644 index 00000000..c1ced900 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_objects/data.py @@ -0,0 +1,35 @@ +from plotter_utils.histo_objects.base import BaseHist + + +class DataHist(BaseHist): + """ + Data histogram. + + Differences from BaseHist: + - select Data process + - nominal only + """ + + def _prepare_hist(self): + h = super()._prepare_hist() + return h[ + { + "process_grp": ["Data"], + "systematic": "nominal", + } + ] + + def get_yield_per_type(self): + return self.get_yield_per_process()['Data'] + + def get_yield_per_process(self): + out = {"Data": {}} + + for proc in self._raw_hist.axes["process"]: + h_proc = self._raw_hist[{"process": [proc]}] + out["Data"][proc] = self._yield_from_hist(h_proc) + + return out + + def get_variance_per_type(self): + return {"Data":self.total_variance()} \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_objects/signal.py b/analysis/vbs_vvh/plotter_utils/histo_objects/signal.py new file mode 100644 index 00000000..8b2a972c --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_objects/signal.py @@ -0,0 +1,58 @@ +from plotter_utils.histo_objects.base import BaseHist + + +class SignalHist(BaseHist): + """ + Signal histogram. + + Differences from BaseHist: + - select ONLY signal process + - select coupling-dependent systematic + """ + + def __init__( + self, + hist, + *, + years, + process_grouping, + coupling, + ): + self.coupling = coupling + super().__init__( + hist, + years=years, + process_grouping=process_grouping, + ) + + def _prepare_hist(self): + h = super()._prepare_hist() + + # Select signal + coupling systematic + h = h[ + { + "process_grp": ["Signal"], + "systematic": self.coupling, + } + ] + + # Safe merge overflow for signal + h = self._safe_merge_overflow(h) + return h + + def cumulative(self): + return self.values(flow=False).cumsum() + + def get_yield_per_type(self): + # No type layer → alias to per_process + return self.get_yield_per_process()['Signal'] + + def get_yield_per_process(self): + out = {"Signal": {}} + for proc in self._raw_hist.axes["process"]: + h_proc = self._raw_hist[{"process": [proc]}] + out["Signal"][proc] = self._yield_from_hist(h_proc) + return out + + def get_variance_per_type(self): + return {"Signal":self.total_variance()} \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_reader/histcollection.py b/analysis/vbs_vvh/plotter_utils/histo_reader/histcollection.py new file mode 100644 index 00000000..e428e1ba --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_reader/histcollection.py @@ -0,0 +1,17 @@ +# variable view ( sig+bkg+data per var ) + +from dataclasses import dataclass +from typing import Dict, List, Mapping + + +@dataclass +class HistCollection: + views: Dict[str, Dict[str, object]] + + variables_sig_only: List[str] + variables_mc_only: List[str] + variables_all: List[str] + proc_map: Mapping + + def has_data(self, var): + return self.views[var]["data"] is not None \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_reader/loader.py b/analysis/vbs_vvh/plotter_utils/histo_reader/loader.py new file mode 100644 index 00000000..a48c78ac --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_reader/loader.py @@ -0,0 +1,224 @@ +# this gets the histo dict from a list of files (filename.pkl.gz) +# only read the file, get the required cut, and determine the list of var, sig,bkg,data but not doing any transformation on the histos +# check same list of year for sig, bkg, data +# get list of var for sig_only, mc_only = sig+bkg, all +# verbose show list of var. also print if any variable is not sig_only/mc_only/all + +import gzip +import pickle +from collections import defaultdict + +from .histcollection import HistCollection +from plotter_utils.histo_objects import SignalHist, BackgroundHist, DataHist + +import logging +logger = logging.getLogger(__name__) + + +def load_hist_collection(pkl_paths,proc_map,cutname=None): + """ + Load one or more coffea output pkl.gz files and separate variables into signal, background, and data. + Should have the given cut, or the same set of cuts + Return {cutname:histCollection} for cutname, or over all cuts in category axis if cutname is not given + + Will check that sig/bkg/data have the same list of years for each variable. + + Parameters + ---------- + pkl_paths : list[str] + List of .pkl.gz files or dir + proc_map : ProcessMap object as defined in sample_info + cutname : str + 'category' axis which is the cutname. Use None only when this axis does not exist + + """ + # ---- load & merge ---- + hist_dicts = [_load_one_file(p) for p in pkl_paths] + #hists = _merge_hist_dicts(hist_dicts) + + if cutname is not None: + allcut={cutname} + else: + allcut=_get_category_axis(hist_dicts[0]) + for hist in hist_dicts[1:]: + if _get_category_axis(hist)!=allcut: + logger.error(f'different set of cuts \n{allcut} \n{_get_category_axis(hist)}\n exit') + import sys + sys.exit(1) + + + all_hist_collection = { + cut:_get_hist_collection_single_cut(hist_dicts,proc_map,cut) for cut in allcut + } + + return all_hist_collection + +def unpack_hist(hist_collection,var,proc_map): + #need to test when none. + """distribute into sig,bkg,dataHist for a single variable""" + view = hist_collection.views[var] + years = view["years"] + if view["signal"] is not None: + sig = SignalHist( + view["signal"], + years=years, + process_grouping={"Signal": proc_map.signal}, + coupling="nominal", + ) + else: + sig = None + + if view["background"] is not None: + bkg = BackgroundHist( + view["background"], + years=years, + process_grouping=proc_map.background_groups, + background_groups=list(proc_map.background_groups), + ) + else: + bkg = None + + if view["data"] is not None: + data = DataHist( + view["data"], + years=years, + process_grouping={"Data": proc_map.data}, + ) + else: + data = None + + return sig,bkg,data + +def cut_order(all_hist_collection,ref_var,proc_map): + """assume at least one of sig/bkg/data exist for all the cuts""" + yield_dict = {} + + # Collect yields per cut + for cutname, hist_collection in all_hist_collection.items(): + sig, bkg, data = unpack_hist(hist_collection, ref_var, proc_map) + + yield_dict[cutname] = { + "Signal": sig.total_yield() if sig is not None else None, + "Background": bkg.total_yield() if bkg is not None else None, + "Data": data.total_yield() if data is not None else None, + } + + # Determine which reference exists for ALL cuts + reference = None + for candidate in ("Signal", "Background", "Data"): + if all(yield_dict[cut][candidate] is not None for cut in yield_dict): + reference = candidate + break + + if reference is None: + raise RuntimeError( + "No common reference (Signal / Background / Data) " + "exists for all cuts." + ) + + # Sort cut names by descending yield + ordered_cuts = sorted( + yield_dict.keys(), + key=lambda cut: yield_dict[cut][reference], + reverse=True, + ) + + return ordered_cuts + +def _load_one_file(filename): + """ + Return loaded histogram dict from a .pkl.gz file + """ + logger.info(f"Reading file: {filename}") + histo_dict = pickle.load(gzip.open(filename)) + return histo_dict + + +def _get_years(hist): + """ + for checking years list consistency over sig/bkg/data + """ + if hist is None: + return None + return set(hist.axes["year"]) + +def _get_category_axis(hist): + temp_hist = hist[list(hist.keys())[0]] + if "category" in temp_hist.axes.name: + cuts = set(temp_hist.axes["category"]) + logger.info(f'discovered cuts {cuts}') + return cuts + else: + logger.info(f'category is not in axis') + return None + + + + +def _get_hist_collection_single_cut(hist_dicts,proc_map,cutname=None): + + views = {} + vars_sig_only = [] + vars_mc_only = [] # sig&bkg but not data + vars_all = [] + + # ---- inspect each variable ---- + + for hists in hist_dicts: + for var, hist_precut in hists.items(): + + if cutname is not None: + hist = hist_precut[{"category":cutname}] + else: + hist = hist_precut + + if var not in views: + views[var] = {"signal": None, "background": None, "data": None, "years": []} + + processes = set(hist.axes["process"]) + if processes & set(proc_map.signal): + views[var]["signal"] = hist[{"process": proc_map.signal}] + if processes & set(proc_map.background): + available_processes = list(set(proc_map.background) & set(processes)) + views[var]["background"] = hist[{"process": available_processes}] + if processes & set(proc_map.data): + views[var]["data"] = hist[{"process": proc_map.data}] + + # ---- check year consistency, classify var ---- + for var, vdict in views.items(): + year_sets = [ys for ys in [_get_years(vdict["signal"]), + _get_years(vdict["background"]), + _get_years(vdict["data"])] if ys is not None] + if len(year_sets) > 1 and not all(ys == year_sets[0] for ys in year_sets): + logger.error(f"sig: {_get_years(vdict['signal'])}\n"\ + f"bkg: {_get_years(vdict['background'])}\n"\ + f"data: {_get_years(vdict['data'])}\n") + raise RuntimeError(f"[{var}] Year mismatch across sig/bkg/data") + + vdict["years"] = sorted(year_sets[0]) if year_sets else [] + + if vdict["signal"] and not vdict["background"]: + vars_sig_only.append(var) + elif vdict["signal"] and vdict["background"] and not vdict["data"]: + vars_mc_only.append(var) + elif vdict["signal"] and vdict["background"] and vdict["data"]: + vars_all.append(var) + + # ---- print summary ---- + logger.debug("=== HistCollection summary ===") + logger.debug(f"Total variables: {len(views)}") + logger.debug(f"sig-only vars :({len(vars_sig_only)}) {vars_sig_only}") + logger.debug(f"sig + bkg vars :({len(vars_mc_only)}) {vars_mc_only}") + logger.debug(f"sig + bkg + data vars :({len(vars_all)}) {vars_all}") + + others = set(views) - set(vars_all) - set(vars_mc_only) - set(vars_sig_only) + if others: + logger.debug(f"variables with unexpected content: {others}") + + return HistCollection( + views=views, + variables_sig_only=vars_sig_only, + variables_mc_only=vars_mc_only, + variables_all=vars_all, + proc_map=proc_map + ) \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/histo_reader/sample_info.py b/analysis/vbs_vvh/plotter_utils/histo_reader/sample_info.py new file mode 100644 index 00000000..76e55412 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/histo_reader/sample_info.py @@ -0,0 +1,96 @@ +# from sample_name.csv group sample_name into sig/bkg/data and get plotting colors + +import csv +from dataclasses import dataclass +from collections import defaultdict +from typing import Dict, List, Set, Optional + + +DEFAULT_SAMPLE_CSV = "/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/config/sample_names.csv" + +#get list of years +def get_all_years(csv_path=None): + """ + get set of year in the sample_name.csv + (should be 2016preVFP,... for run2) + since years are usually summed, order is not considered + """ + if csv_path is None: + csv_path = DEFAULT_SAMPLE_CSV + + all_years = set() + with open(csv_path, newline="") as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + sample_year = row["sample_year"].strip() + all_years.add(sample_year) + return all_years + +@dataclass +class ProcessMap: + """ + From sample_name.csv, get + - the list of processes (sample_name) for signal, bkg, data + - the list of bkg groups and process name under each group + - color for plotting + + :var csv_path: Description + :vartype csv_path: str + """ + signal: List[str] + background: List[str] + data: List[str] + + # extra, useful metadata + background_groups: Dict[str, List[str]] + background_colors: Dict[str, str] + + @classmethod + def from_csv( + cls, + csv_path: Optional[str] = None + ): + """ + Build ProcessMap from sample CSV. + + Parameters + ---------- + csv_path : str, optional + Path to sample CSV (uses default if None) + """ + if csv_path is None: + csv_path = DEFAULT_SAMPLE_CSV + + grp_dict = defaultdict(set) + bkg_color_map = {} + + with open(csv_path, newline="") as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + cat = row["sample_category"].strip().lower() + sample_type = row["sample_type"].strip() + sample_name = row["sample_name"].strip() + color = row["plotting_colour"].strip() + + if cat == "sig": + grp_dict["Signal"].add(sample_name) + + elif cat == "bkg": + grp_dict[sample_type].add(sample_name) + bkg_color_map.setdefault(sample_type, color) + + elif cat == "data": + grp_dict["Data"].add(sample_name) + + signal = list(grp_dict.get("Signal")) + data = list(grp_dict.get("Data")) + background_groups = {k: list(v) for k, v in grp_dict.items() if k not in ("Signal", "Data")} + background = list(p for procs in background_groups.values() for p in procs) + + return cls( + signal=signal, + background=background, + data=data, + background_groups=background_groups, + background_colors=bkg_color_map, + ) \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/metrics/base.py b/analysis/vbs_vvh/plotter_utils/metrics/base.py new file mode 100644 index 00000000..0fff9e27 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/metrics/base.py @@ -0,0 +1,63 @@ +# plotter_utils/metrics/base.py + +import numpy as np +from abc import ABC, abstractmethod + +import logging +log = logging.getLogger(__name__) + + +class MetricResult: + """ + Container passed from metrics -> plotting. + """ + def __init__(self, x, y, yerr=None, label=None): + self.x = x + self.y = y + self.yerr = yerr + self.label = label + + +class MetricBase(ABC): + name = None + requires = () # ("sig", "bkg", "data") + y_range = None # (ymin, ymax) + + def __init__(self, *, sig=None, bkg=None, data=None): + self.sig = sig + self.bkg = bkg + self.data = data + + def available(self): + missing = [k for k in self.requires if getattr(self, k) is None] + return len(missing) == 0, missing + + @staticmethod + def safe_divide(num, den): + return np.divide( + num, den, + out=np.full_like(num, np.nan, dtype=float), + where=den > 0 + ) + + @abstractmethod + def compute(self): + """ + Return MetricResult + """ + pass + + @staticmethod + def to_1d(arr): + """ + Collapse all non-dense axes. + Expects dense axis to be the last one. + """ + arr = np.asarray(arr) + + # Sum over all axes except the last (dense) one + if arr.ndim > 1: + axes = tuple(range(arr.ndim - 1)) + arr = arr.sum(axis=axes) + + return arr \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/metrics/dataMC.py b/analysis/vbs_vvh/plotter_utils/metrics/dataMC.py new file mode 100644 index 00000000..1bad4e2b --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/metrics/dataMC.py @@ -0,0 +1,26 @@ +# plotter_utils/metrics/dataMC.py + +import numpy as np +from .base import MetricBase, MetricResult + +import logging +log = logging.getLogger(__name__) + +class DataMCMetric(MetricBase): + name = "dataMC" + requires = ("data", "bkg") + y_range = (0.3, 3) + + def compute(self): + data_vals = self.to_1d(self.data.values(flow=False)) + bkg_vals = self.to_1d(self.bkg.values_total(flow=False)) + + ratio = self.safe_divide(data_vals, bkg_vals) + + x = self.data.hist.axes[-1].centers + + return MetricResult( + x=x, + y=ratio, + label="Data / MC" + ) diff --git a/analysis/vbs_vvh/plotter_utils/metrics/significance.py b/analysis/vbs_vvh/plotter_utils/metrics/significance.py new file mode 100644 index 00000000..e9a18ed9 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/metrics/significance.py @@ -0,0 +1,36 @@ +# plotter_utils/metrics/significance.py + +import numpy as np +from .base import MetricBase, MetricResult + +import logging +log = logging.getLogger(__name__) + +class SignificanceMetric(MetricBase): + name = "significance" + requires = ("sig", "bkg") + y_range = (0, 0.1) + + def compute(self): + sig_vals = self.to_1d(self.sig.values(flow=False)) + bkg_vals = self.to_1d(self.bkg.values_total(flow=False)) + + log.debug(f"sig shape {sig_vals.shape}") + log.debug(f"bkg shape {bkg_vals.shape}") + log.debug(f"sig_vals {sig_vals[:10]}") + log.debug(f"bkg_vals {bkg_vals[:10]}") + + y = self.safe_divide(sig_vals, np.sqrt(bkg_vals)) + log.debug(f"signif(y) shape {y.shape}") + + x = self.sig.hist.axes[-1].centers + log.debug(f"bin(x) shape {x.shape}") + + log.debug(f"y {y[:10]}") + log.debug(f"x {x[:10]}") + + return MetricResult( + x=x, + y=y, + label=r"$S/\sqrt{B}$" + ) diff --git a/analysis/vbs_vvh/plotter_utils/plotting/draw.py b/analysis/vbs_vvh/plotter_utils/plotting/draw.py new file mode 100644 index 00000000..67a3ee15 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/plotting/draw.py @@ -0,0 +1,49 @@ +# plotter_utils/plotting/draw.py + +import matplotlib.pyplot as plt +from plotter_utils.metrics.significance import SignificanceMetric +from plotter_utils.metrics.dataMC import DataMCMetric +from .main_plot import draw_main_plot +from .subplots import draw_metric_subplot + + + +METRIC_REGISTRY = { + "significance": SignificanceMetric, + "dataMC": DataMCMetric, +} + + +def draw( + *, + sig=None, + bkg=None, + data=None, + proc_map=None, + config=None, + title=None, +): + subplots = config["SUBPLOTS"] + ratios = config["FIG_RATIO"] + + heights = [ratios["main"]] + [ratios[s] for s in subplots] + nrows = 1 + len(subplots) + + fig, axes = plt.subplots( + nrows=nrows, + ncols=1, + figsize=(7, 3 + 2 * nrows), + gridspec_kw={"height_ratios": heights}, + sharex=True, + ) + + ax_main = axes[0] + draw_main_plot(ax_main, sig=sig, bkg=bkg, data=data, proc_map=proc_map) + ax_main.set_title(title) + + for ax, name in zip(axes[1:], subplots): + metric_cls = METRIC_REGISTRY[name] + metric = metric_cls(sig=sig, bkg=bkg, data=data) + draw_metric_subplot(ax, metric, name) + + return fig diff --git a/analysis/vbs_vvh/plotter_utils/plotting/main_plot.py b/analysis/vbs_vvh/plotter_utils/plotting/main_plot.py new file mode 100644 index 00000000..b35480b0 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/plotting/main_plot.py @@ -0,0 +1,114 @@ +# plotter_utils/plotting/main_plot.py + +import numpy as np + +import logging +logger = logging.getLogger(__name__) + + +def order_backgrounds(bkg, mode="yield"): + """ + Order background process groups. + """ + if mode == "yield": + yields = { + p: bkg.hist[{"process_grp": [p]}].values(flow=True).sum() + for p in bkg.hist.axes["process_grp"] + } + return sorted(yields, key=yields.get) + return list(bkg.hist.axes["process_grp"]) + + +def get_scaled_signal(sig, bkg, mode="match_bkg"): + """ + Scale signal to background yield for overlay. + """ + if sig is None or bkg is None: + return sig.hist, 1.0 + + if mode == "match_bkg": + s = sig.total_yield() + b = bkg.total_yield() + if s > 0: + factor = b / s + return sig.hist * factor, factor + + return sig.hist, 1.0 + + +def draw_bkg_uncertainty(ax, bkg): + """ + Draw MC statistical uncertainty band. + """ + htot = bkg.hist_total + logger.debug(f'bkg: {bkg.hist.shape}') + logger.debug(f'htot: {htot.shape}') + + vals = htot.values(flow=False) + errs = np.sqrt(htot.variances(flow=False)) + + edges = htot.axes[-1].edges + err_up = np.append(vals + errs, 0) + err_dn = np.append(vals - errs, 0) + logger.debug(f'vals: {vals.shape}\n errs {errs.shape}\n vals[-1] {vals[-1]}') + + logger.debug(f'edges: {edges.shape}') + logger.debug(f'errup/dn: {err_up.shape} {err_dn.shape}') + + ax.fill_between( + edges, + err_dn, + err_up, + step="post", + facecolor="none", + edgecolor="gray", + hatch="////", + linewidth=0.0, + alpha=0.5, + label="MC stat", + ) + + +def draw_main_plot(ax, *, sig=None, bkg=None, data=None, proc_map=None): + """ + Draw the main stacked distribution panel. + """ + + # ---- Background stack ---- + if bkg is not None: + order = order_backgrounds(bkg) + colors = [proc_map.background_colors[p] for p in order] + + bkg.hist[{"process_grp": order}].plot1d( + ax=ax, + stack=True, + histtype="fill", + color=colors, + label=order, + zorder=100, + ) + + draw_bkg_uncertainty(ax, bkg) + + # ---- Signal overlay ---- + if sig is not None: + sig_plot, scale = get_scaled_signal(sig, bkg) + sig_plot.plot1d( + ax=ax, + color="red", + linewidth=2, + label=f"Signal × {scale:.1f}", + zorder=101, + ) + + # ---- Data ---- + if data is not None: + data.hist.plot1d( + ax=ax, + color="blue", + label="Data", + zorder=102, + ) + + ax.legend() + ax.set_ylabel("Events") diff --git a/analysis/vbs_vvh/plotter_utils/plotting/subplots.py b/analysis/vbs_vvh/plotter_utils/plotting/subplots.py new file mode 100644 index 00000000..80f9b983 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/plotting/subplots.py @@ -0,0 +1,41 @@ +# plotter_utils/plotting/subplots.py + +import logging + +log = logging.getLogger(__name__) + + +def draw_metric_subplot(ax, metric, metric_name): + ok, missing = metric.available() + if metric.y_range: + ax.set_ylim(*metric.y_range) + + logging.debug(f"in draw_metric_subplot we have\n {metric_name}: {metric.compute().x}\n{metric.compute().y}") + + if not ok: + msg = f"{metric_name}: {missing} not available" + log.warning(msg) + ax.text( + 0.5, 0.5, msg, + ha="center", va="center", + transform=ax.transAxes, + ) + ax.set_axis_off() + return + + result = metric.compute() + + if result.yerr is not None: + ax.errorbar( + result.x, + result.y, + yerr=result.yerr, + fmt="o", + ) + else: + ax.step(result.x, result.y, where="mid") + + ax.axhline(1.0 if metric_name == "dataMC" else 0.0, + color="gray", linestyle="--") + + ax.set_ylabel(result.label) diff --git a/analysis/vbs_vvh/plotter_utils/test/test_histo_obj.py b/analysis/vbs_vvh/plotter_utils/test/test_histo_obj.py new file mode 100644 index 00000000..b4c85b39 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/test/test_histo_obj.py @@ -0,0 +1,104 @@ + +#this runs up until histo_objects + +import numpy as np + +from plotter_utils.histo_reader.loader import load_hist_collection +from plotter_utils.histo_reader.sample_info import ProcessMap,get_all_years +from plotter_utils.histo_objects import ( + SignalHist, + BackgroundHist, + DataHist, +) + +import config.plotting_config as cfg + +import logging +logging.basicConfig( + level=cfg.LOG_LEVEL, # Set the minimum level to capture INFO messages + format=cfg.LOG_FORMAT, + datefmt='%Y-%m-%d %H:%M:%S' +) + +# --------------------------------------------------------- +# 1. Load histograms the SAME way make_plot.py does +# --------------------------------------------------------- + +cutname='objsel' # change if needed +years = get_all_years() +proc_map = ProcessMap.from_csv() + + +hist_collection = load_hist_collection( + pkl_paths=['/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/test2/test2_bkg.pkl.gz','/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/test2/test2_sig.pkl.gz'], + proc_map=proc_map, + cutname=cutname # change if needed +) + +# Pick something simple +#var = hist_collection.variables_mc_only[0] +var = 'Higgs_pt' +view = hist_collection.views[var] + +print(f"Testing variable: {var}, category: {cutname}") + +# --------------------------------------------------------- +# 2. Process map (years + grouping) +# --------------------------------------------------------- + + +# --------------------------------------------------------- +# 3. Construct histogram objects +# --------------------------------------------------------- + +sig = SignalHist( + view["signal"], + years=years, + process_grouping={"Signal": proc_map.signal}, + coupling="nominal", # or your real coupling +) +bkg = BackgroundHist( + view["background"], + years=years, + process_grouping=proc_map.background_groups, + background_groups=list(proc_map.background_groups.keys()), +) + + +# --------------------------------------------------------- +# 4. Sanity checks (prints, not asserts) +# --------------------------------------------------------- + +print("\n--- Signal ---") +print("Total yield:", sig.total_yield()) +print("Bin count:", len(sig.values(flow=False))) +print("Has only Signal:", sig.hist.axes["process_grp"]) + +print("\n--- Background ---") +print("Total yield:", bkg.total_yield()) +print("Bin count:", len(bkg.values(flow=False))) +print("Process groups:", bkg.hist.axes["process_grp"]) + +# --------------------------------------------------------- +# 5. Numerical consistency checks +# --------------------------------------------------------- + +print("\n--- Cross checks ---") + +print("sig obj info") +print(sig,type(sig)) +print("sig methods") +print(dir(sig)) +print("bkg obj info") +print(bkg,type(bkg)) +print("bkg methods") +print(dir(bkg)) + +# No NaNs +assert not np.isnan(sig.values()).any(), "NaN in signal values" +assert not np.isnan(bkg.values()).any(), "NaN in background values" + +# Background should be >= 0 everywhere +#assert (bkg.values(flow=False) >= 0).all(), "Negative background bin!" + +print("All basic checks passed ") diff --git a/analysis/vbs_vvh/plotter_utils/test/test_import.py b/analysis/vbs_vvh/plotter_utils/test/test_import.py new file mode 100644 index 00000000..b9ee5f87 --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/test/test_import.py @@ -0,0 +1,8 @@ +import pickle, gzip +import json +import os, sys + +test_file_path = '/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/histos/test1_hists.pkl.gz' +def test_file(): + print(f'importing test file from {test_file_path}') + return pickle.load(gzip.open(test_file_path)) \ No newline at end of file diff --git a/analysis/vbs_vvh/plotter_utils/test/test_plotting.py b/analysis/vbs_vvh/plotter_utils/test/test_plotting.py new file mode 100644 index 00000000..26427c7a --- /dev/null +++ b/analysis/vbs_vvh/plotter_utils/test/test_plotting.py @@ -0,0 +1,111 @@ +# test_plotting.py + +import logging +from plotter_utils.histo_reader.loader import load_hist_collection +from plotter_utils.histo_reader.sample_info import ProcessMap, get_all_years +from plotter_utils.histo_objects import SignalHist, BackgroundHist, DataHist +from plotter_utils.plotting.draw import draw + + +def setup_logging(): + logging.basicConfig( + level=logging.DEBUG, + format="%(levelname)s %(name)s: %(message)s" + ) + + for noisy in [ + "matplotlib", + "matplotlib.font_manager", + "PIL", + "numpy", + "uproot", + "coffea", + "numba", + "boost_histogram", + ]: + logging.getLogger(noisy).setLevel(logging.WARNING) + +setup_logging() + +# --------------------------------------------------------- +# Minimal "config" replacement +# --------------------------------------------------------- + +SUBPLOTS = ("dataMC", "significance") +FIG_RATIO = { + "main": 3, + "dataMC": 1, + "significance": 1, +} + +CONFIG = { + "SUBPLOTS": SUBPLOTS, + "FIG_RATIO": FIG_RATIO, +} + +# --------------------------------------------------------- +# Load histograms +# --------------------------------------------------------- + +cutname = "objsel" +years = get_all_years() +proc_map = ProcessMap.from_csv() +logging.debug(f"{proc_map.background_colors}") + +hist_collection = load_hist_collection( + pkl_paths=[ + "/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/test2/test2_bkg.pkl.gz", + "/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/test2/test2_sig.pkl.gz", + "/home/users/pyli/projects/analysis_VVH/coffea/ewkcoffea/analysis/vbs_vvh/histos/test/test2/test2_data.pkl.gz" + ], + proc_map=proc_map, + cutname=cutname, +) + +# Pick variable +var = "Higgs_pt" +view = hist_collection.views[var] + +# --------------------------------------------------------- +# Build histogram objects +# --------------------------------------------------------- + +sig = SignalHist( + view["signal"], + years=years, + process_grouping={"Signal": proc_map.signal}, + coupling="nominal", +) + +bkg = BackgroundHist( + view["background"], + years=years, + process_grouping=proc_map.background_groups, + background_groups=list(proc_map.background_groups.keys()), +) + +data = DataHist( + view["data"], + years=years, + process_grouping={"Data": proc_map.data}, +) + +logging.debug(f"in main sig {sig.hist}") +logging.debug(f"in main bkg {bkg.hist}") +logging.debug(f"in main data {data.hist}") + +# --------------------------------------------------------- +# Draw +# --------------------------------------------------------- +logging.debug(f"in test_main proc_map bkg is {proc_map.background_colors}") +fig = draw( + sig=sig, + bkg=bkg, + data=data, + proc_map=proc_map, + config=CONFIG, + title=f"{cutname} : {var}", +) + +fig.savefig(f"test_{var}.png", bbox_inches="tight") +print(f"Saved test_{var}.png") diff --git a/analysis/vbs_vvh/run_analysis.py b/analysis/vbs_vvh/run_analysis.py index 29c1b467..80f28178 100644 --- a/analysis/vbs_vvh/run_analysis.py +++ b/analysis/vbs_vvh/run_analysis.py @@ -9,13 +9,17 @@ import socket from coffea import processor from coffea.nanoevents import NanoAODSchema +from coffea.nanoevents.schemas import BaseSchema + NanoAODSchema.warn_missing_crossrefs = False import topcoffea.modules.remote_environment as remote_environment LST_OF_KNOWN_EXECUTORS = ["futures","work_queue","iterative"] -LST_OF_KNOWN_PROCESSORS = ["semilep","semilep_nano","simple_gen"] - +LST_OF_KNOWN_PROCESSORS = ["semilep","semilep_nano","simple_gen",'2FJMET'] +import warnings +warnings.filterwarnings("ignore", message=".*as it is not interpretable by Uproot.*") +#NanoAODSchema.error_missing_event_ids = False if __name__ == '__main__': parser = argparse.ArgumentParser(description='You can customize your run') @@ -39,6 +43,11 @@ parser.add_argument('--processor', '-p', default='semilep', help = 'Which processor to execute', choices=LST_OF_KNOWN_PROCESSORS) parser.add_argument('--rwgt-to-sm', action='store_true', help = '') + # make output easier + parser.add_argument('--project', default=None, help = 'useful when trying combinations of cutflow (name of cutflows(.yaml) file)') + parser.add_argument('--cutflow', default=None, help = "Specify which cutflow to use") + parser.add_argument('--minus', action='store_true', help = "Use this to plot n-1 plots instead of cutflow") + args = parser.parse_args() jsonFiles = args.jsonFiles @@ -58,13 +67,23 @@ rwgt_to_sm = args.rwgt_to_sm wc_lst = args.wc_list if args.wc_list is not None else [] + project = args.project + cutflow_name = args.cutflow + n_minus_1 = args.minus + # Import the proper processor, based on option specified if args.processor == "semilep": import analysis_processor_semilep as analysis_processor + defaultSchema = NanoAODSchema elif args.processor == "semilep_nano": import analysis_processor_semilep_fromnano as analysis_processor + defaultSchema = NanoAODSchema elif args.processor == "simple_gen": import analysis_processor_gen_fromnano as analysis_processor + defaultSchema = NanoAODSchema + elif args. processor == "2FJMET": + import analysis_processor_2FJMET_fromRDF as analysis_processor + defaultSchema = BaseSchema # Check that if on UF login node, we're using WQ hostname = socket.gethostname() @@ -202,7 +221,10 @@ def LoadJsonToSampleName(jsonFile, prefix): else: print('No Wilson coefficients specified') - processor_instance = analysis_processor.AnalysisProcessor(samplesdict,wc_lst,hist_lst,do_systs,skip_obj_systs,skip_sr,skip_cr,siphon_bdt_data=siphon,rwgt_to_sm=rwgt_to_sm) + if args.processor == "2FJMET": + processor_instance = analysis_processor.AnalysisProcessor(samplesdict,wc_lst,n_minus_1,project,cutflow_name) + else: + processor_instance = analysis_processor.AnalysisProcessor(samplesdict,wc_lst,hist_lst,do_systs,skip_obj_systs,skip_sr,skip_cr,siphon_bdt_data=siphon,rwgt_to_sm=rwgt_to_sm) if executor == "work_queue": executor_args = { @@ -282,18 +304,27 @@ def LoadJsonToSampleName(jsonFile, prefix): # Run the processor and get the output tstart = time.time() - if executor == "futures": exec_instance = processor.FuturesExecutor(workers=nworkers, merging=(1, 30, 10000)) - runner = processor.Runner(exec_instance, schema=NanoAODSchema, chunksize=chunksize, maxchunks=nchunks) + print(exec_instance) + runner = processor.Runner( + executor=exec_instance, + schema=defaultSchema, + chunksize=chunksize, + maxchunks=nchunks + ) elif executor == "iterative": exec_instance = processor.IterativeExecutor() - runner = processor.Runner(exec_instance, schema=NanoAODSchema, chunksize=chunksize, maxchunks=nchunks) + runner = processor.Runner(exec_instance, schema=defaultSchema, chunksize=chunksize, maxchunks=nchunks) elif executor == "work_queue": executor = processor.WorkQueueExecutor(**executor_args) - runner = processor.Runner(executor, schema=NanoAODSchema, chunksize=chunksize, maxchunks=nchunks, skipbadfiles=False, xrootdtimeout=180) + runner = processor.Runner(executor, schema=defaultSchema, chunksize=chunksize, maxchunks=nchunks, skipbadfiles=False, xrootdtimeout=180) - output = runner(flist, treename, processor_instance) + output = runner( + fileset=flist, + treename=treename, + processor_instance=processor_instance + ) dt = time.time() - tstart @@ -307,9 +338,38 @@ def LoadJsonToSampleName(jsonFile, prefix): if executor == "futures": print("Processing time: %1.2f s with %i workers (%.2f s cpu overall)" % (dt, nworkers, dt*nworkers, )) + if args.processor == "2FJMET": + #add project and cutflow to name + if project is not None: #add a subdir + outpath = outpath+f'/{project}' + if cutflow_name is not None: + outpath = outpath+f'/{cutflow_name}' + else: + outpath = outpath + + if outname != 'plotsTopEFT': + if args.cutflow is not None: + outname = f'{args.cutflow}_{outname}' + elif args.project is not None: + outname = f'{args.project}_{outname}' + else: + outname = 'hists_'+outname + else: + if args.cutflow is not None: + outname = f'{args.cutflow}_hists' + + + # Save the output if not os.path.isdir(outpath): os.system("mkdir -p %s"%outpath) out_pkl_file = os.path.join(outpath,outname+".pkl.gz") + + + if args.processor == "2FJMET": + if n_minus_1: + # Save the output + out_pkl_file = os.path.join(outpath,outname+"_m.pkl.gz") + print(f"\nSaving output in {out_pkl_file}...") with gzip.open(out_pkl_file, "wb") as fout: cloudpickle.dump(output, fout)