-
Notifications
You must be signed in to change notification settings - Fork 6
added 2FJJMET analyser and stuff #97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Polly-Li
wants to merge
7
commits into
cmstas:main
Choose a base branch
from
Polly-Li:main
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
fce5412
added 2FJJMET analyser and stuff
Polly-Li 53ebb63
removed unnecessary files
Polly-Li 0d9abb7
further remove/rename files
Polly-Li ff3d129
fixed config, config/paths(missing default output)
Polly-Li 62d5e31
finish basis of plotter script (working script: test_plotting.py)
Polly-Li 5a54c04
complete output_reader (make plot and yield.json)
Polly-Li 9b04961
project and cutflow are only used in 2FJMET channel
Polly-Li File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,6 +51,7 @@ coverage.xml | |
| .pytest_cache/ | ||
| cover/ | ||
|
|
||
|
|
||
| # Translations | ||
| *.mo | ||
| *.pot | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,240 @@ | ||
| #!/usr/bin/env python | ||
| #import sys | ||
| import coffea | ||
| import numpy as np | ||
| import awkward as ak | ||
| np.seterr(divide='ignore', invalid='ignore', over='ignore') | ||
| from coffea import processor | ||
| import hist | ||
| from hist import axis | ||
| from coffea.analysis_tools import PackedSelection | ||
|
|
||
| from coffea.nanoevents.methods import vector | ||
| from config.variables.var_config import obj,other_objs, dense_variables_config | ||
|
|
||
| from config.paths import objsel_cf,get_cutflow,cutflow_yamls_dir,default_cutflow_yaml | ||
| from config.variables.extra_definitions import VV_ndaughters, Higgs_final_state | ||
|
|
||
| class AnalysisProcessor(processor.ProcessorABC): | ||
|
|
||
| def __init__(self, samples, wc_names_lst=[], n_minus_1=False,project=None, cutflow_name=None): | ||
| print('debug',project,cutflow_name) | ||
|
|
||
| if cutflow_name is None: | ||
| print(f"default cutflow is used.") | ||
| self.cutflow = objsel_cf | ||
| else: | ||
| print(f'{cutflow_yamls_dir}/{project}.yaml',cutflow_name) | ||
| try: #try project, then try all.yaml, finally just use basic | ||
| self.cutflow = get_cutflow(f'{cutflow_yamls_dir}/{project}.yaml',cutflow_name) | ||
| except: | ||
| try: | ||
| self.cutflow = get_cutflow(default_cutflow_yaml,cutflow_name) | ||
| print(f"Cannot get {cutflow_name} in {str(project)}.yaml. Found cutflow in defualt yaml and use it instead") | ||
| except: | ||
| try: | ||
| print(get_cutflow(f'{cutflow_yamls_dir}/{project}.yaml')) | ||
| print("can get project yaml but not the cutflow. Use default cutflow instead.") | ||
| self.cutflow = objsel_cf | ||
| except: | ||
| self.cutflow = objsel_cf | ||
| print(f"Warning: Cutflow {cutflow_name} not found. Use default cutflow instead.") | ||
| else: | ||
| print(f"Cutflow {cutflow_name} is used.") | ||
| self._n_minus_1 = n_minus_1 #sel mask will be n-1 cut | ||
|
|
||
| self._samples = samples | ||
| self._wc_names_lst = wc_names_lst #wilson coefficients; not used | ||
| self._dtype = np.float32 #note that this has been changed from function input to this fixed defintion | ||
|
|
||
|
|
||
| # Create the dense axes for the histograms | ||
| self._dense_axes_dict = { | ||
| var_name: cfg["axis"] | ||
| for var_name, cfg in dense_variables_config.items() | ||
| } | ||
|
|
||
| # Add histograms to dictionary that will be passed on to dict_accumulator | ||
| dout = {} | ||
| for dense_axis_name in self._dense_axes_dict.keys(): | ||
| dout[dense_axis_name] = hist.Hist( | ||
| hist.axis.StrCategory([], growth=True, name="process", label="process"), | ||
| hist.axis.StrCategory([], growth=True, name="category", label="category"), | ||
| hist.axis.StrCategory([], growth=True, name="systematic", label="systematic"), | ||
| hist.axis.StrCategory([], growth=True, name="year", label="year"), | ||
| self._dense_axes_dict[dense_axis_name], | ||
| storage="weight", # Keeps track of sumw2 | ||
| name="Counts", | ||
| ) | ||
|
|
||
| # Set the accumulator | ||
| self._accumulator = processor.dict_accumulator(dout) | ||
| ''' | ||
| # Set the list of hists to fill | ||
| if hist_lst is None: | ||
| # If the hist list is none, assume we want to fill all hists | ||
| self._hist_lst = list(self._accumulator.keys()) | ||
|
|
||
| else: | ||
| # Otherwise, just fill the specified subset of hists | ||
| for hist_to_include in hist_lst: | ||
| if hist_to_include not in self._accumulator.keys(): | ||
| raise Exception(f"Error: Cannot specify hist \"{hist_to_include}\", it is not defined in the processor.") | ||
| self._hist_lst = hist_lst # Which hists to fill | ||
| ''' | ||
|
|
||
| @property | ||
| def accumulator(self): | ||
| return self._accumulator | ||
|
|
||
| @property | ||
| def columns(self): | ||
| return self._columns | ||
|
|
||
| # Main function: run on a given dataset | ||
| def process(self, events): | ||
|
|
||
| ak.behavior.update(vector.behavior) | ||
|
|
||
| #move triggers to sparse axes? later? | ||
| Flag_NoAK4BJet = events.Pass_NoAK4BJet | ||
|
|
||
| objects = { | ||
| name: builder(events) | ||
| for name, builder in obj.items() | ||
| } | ||
| objects.update({ | ||
| name: builder(events,objects) | ||
| for name, builder in other_objs.items() | ||
| }) | ||
| #jets = objects['goodAK4Jets'] | ||
| #jets_sorted = jets[ak.argsort(jets.pt, axis=1, ascending=False)] | ||
| #objects['leading_jet'] = jets_sorted[:, 0] | ||
|
|
||
| var = { | ||
| var_name: cfg["expr"](events, objects) | ||
| for var_name, cfg in dense_variables_config.items() | ||
| } | ||
| # Dataset parameters | ||
| json_name = events.metadata["dataset"] | ||
| isSig = self._samples[json_name]["isSig"] | ||
| isData = self._samples[json_name]["isData"] | ||
| ''' | ||
| isData = self._samples[json_name]["isData"] | ||
| histAxisName = self._samples[json_name]["histAxisName"] | ||
| year = self._samples[json_name]["year"] | ||
| xsec = self._samples[json_name]["xsec"] | ||
| sow = self._samples[json_name]["nSumOfWeights"] | ||
| ''' | ||
|
|
||
| # An array of lenght events that is just 1 for each event | ||
| # Probably there's a better way to do this, but we use this method elsewhere so I guess why not.. | ||
| events['nom'] = ak.ones_like(var['nGoodAK4']) | ||
|
|
||
| ################### Lepton selection #################### | ||
|
|
||
| ######### Normalization and weights ########### | ||
|
|
||
|
|
||
| # These weights can go outside of the outside sys loop since they do not depend on pt of mu or jets | ||
| # We only calculate these values if not isData | ||
| # Note: add() will generally modify up/down weights, so if these are needed for any reason after this point, we should instead pass copies to add() | ||
| # Note: Here we will to the weights object the SFs that do not depend on any of the forthcoming loops | ||
| # If we're doing systematics and this isn't data, we will loop over the obj correction syst lst list | ||
| obj_corr_syst_var_list = ['nominal'] | ||
|
|
||
| # Loop over the list of systematic variations (that impact object kinematics) that we've constructed | ||
| # Make a copy of the base weights object, so that each time through the loop we do not double count systs | ||
| # In this loop over systs that impact kinematics, we will add to the weights objects the SFs that depend on the object kinematics | ||
|
|
||
| #################### Jets #################### | ||
|
|
||
| # Put the variables we'll plot into a dictionary for easy access later | ||
| dense_variables_dict = var | ||
|
|
||
| ######### Store boolean masks with PackedSelection ########## | ||
|
|
||
| selections = PackedSelection(dtype='uint64') | ||
| ''' | ||
| for sel, crit_fn_tuple in self.cutflow.items(): | ||
| crit_fn = crit_fn_tuple[0] # get the function | ||
| selections.add(sel, crit_fn(events, var, obj)) | ||
|
|
||
| for sel, crit_fn in objsel_cf.items(): | ||
| crit_fn = crit_fn_tuple[0] # get the function | ||
| selections.add(sel, crit_fn(events, var, obj))''' | ||
|
|
||
| for sel, crit_fn in self.cutflow.items(): | ||
| mask = crit_fn(events, var, obj) | ||
| selections.add(sel, mask) | ||
| ''' | ||
| for sel, crit_fn_tuple in objsel_cf.items(): | ||
| crit_fn = crit_fn_tuple[0] # extract the function | ||
| mask = crit_fn(events, var, obj) # evaluate → bool array | ||
| selections.add(sel, mask) | ||
| ''' | ||
| ######### Fill histos ######### | ||
|
|
||
| exclude_var_dict = {} # Any particular ones to skip | ||
|
|
||
| # Set up the list of weight fluctuations to loop over | ||
| # For now the syst do not depend on the category, so we can figure this out outside of the filling loop | ||
| # Define all weight variations | ||
| wgt_var_dict = { | ||
| "nominal": events.weight, | ||
| #"count" : events.nom, | ||
| } | ||
|
|
||
| if isSig: # or use the ak.firsts trick | ||
| print("Add other coupling points") | ||
| wgt_var_dict["SM"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,60],0) | ||
| wgt_var_dict["c2v_1p5"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) | ||
| daughters = VV_ndaughters(events) | ||
| n_had = ak.fill_none(daughters['n_had'], 0) | ||
| n_MET = ak.fill_none(daughters['n_MET'], 0) | ||
| mask1 = (n_had == 2) & (n_MET == 2) | ||
| mask2 = (n_had == 2) & (n_MET == 1) | ||
| wgt_var_dict["c2v_1p5_qqNuNu"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) * mask1 | ||
| wgt_var_dict["c2v_1p5_qqlNu"] = events.weight * ak.fill_none(events.LHEReweightingWeight[:,72],0) * mask2 | ||
|
|
||
| # add other cp | ||
| # ----------------------------------------------------------- | ||
|
|
||
| # Build cut mask dictionary for each selection | ||
| cut_mask_dict = {} | ||
| if self._n_minus_1: | ||
| # n-1 cutflow: exclude the current cut from the mask | ||
| for sel in self.cutflow: | ||
| if 'objsel' in self.cutflow: | ||
| exclusive_sels = [k for k in self.cutflow if k != sel] +['objsel'] #+ list(objsel_cf) | ||
| else: | ||
| exclusive_sels = [k for k in self.cutflow if k != sel] + list(objsel_cf) | ||
| cut_mask_dict[sel] = selections.all(*exclusive_sels) | ||
| else: | ||
| # normal cutflow: cumulative cuts | ||
| cumulative_cuts = [] | ||
| for sel in self.cutflow: | ||
| cumulative_cuts.append(sel) | ||
| cut_mask_dict[sel] = selections.all(*cumulative_cuts) | ||
|
|
||
| # Produce histograms for each weight variation and each selection | ||
| for wgt_key, wgt in wgt_var_dict.items(): | ||
| for sel, sel_mask in cut_mask_dict.items(): | ||
| for dense_axis_name, dense_axis_vals in dense_variables_dict.items(): | ||
| axes_fill_info_dict = { | ||
| dense_axis_name : ak.fill_none(dense_axis_vals[sel_mask], 0), | ||
| "weight" : ak.fill_none(wgt[sel_mask], 0), | ||
| "process" : ak.fill_none(events.name[sel_mask], "unknown"), #changed from sample_name to name | ||
| "category" : sel, | ||
| "systematic" : wgt_key, | ||
| } | ||
| # Include year if needed (can be safely added here) | ||
| if "year" in self.accumulator[dense_axis_name].axes.name: | ||
| axes_fill_info_dict["year"] = ak.fill_none(events.year[sel_mask], "unknown")#changed from sample_year to year | ||
|
|
||
|
|
||
| self.accumulator[dense_axis_name].fill(**axes_fill_info_dict) | ||
| return self.accumulator | ||
|
|
||
| def postprocess(self, accumulator): | ||
| return accumulator |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| import yaml | ||
| import awkward as ak | ||
|
|
||
| def get_cutflow(yaml_file, cutflow=None): | ||
| """ | ||
| Load cutflow from a YAML file. | ||
| If cutflow is None, return the full dictionary with lambdas. | ||
| """ | ||
|
|
||
| def make_lambda(expr: str): | ||
| """ | ||
| Turn a YAML expression into a lambda for Coffea cutflow. | ||
| """ | ||
| return eval(f"lambda events, var, obj: {expr}", {"ak": ak}) | ||
|
|
||
| with open(yaml_file, 'r') as f: | ||
| config = yaml.safe_load(f) | ||
|
|
||
| cutflow_dict = {} | ||
|
|
||
| for cf_name, cf_steps in config.items(): | ||
| step_dict = {} | ||
| for step, expr in cf_steps.items(): | ||
| # Always store as a 1‑tuple of lambdas | ||
| step_dict[step] = make_lambda(expr) | ||
| cutflow_dict[cf_name] = step_dict | ||
|
|
||
| if cutflow is None: | ||
| print(f"read cutflow from project: {yaml_file}") | ||
| return cutflow_dict | ||
| else: | ||
| print(f"read cutflow from project: {yaml_file}/{cutflow}") | ||
| return cutflow_dict[cutflow] | ||
|
|
||
|
|
||
|
|
||
| def export_cutflow_to_yaml(cutflow_dict, yaml_file): | ||
| import inspect | ||
| """ | ||
| Convert a cutflow_dict with lambda functions into YAML file with string expressions. | ||
| """ | ||
| yaml_dict = {} | ||
| for cf_name, cf_steps in cutflow_dict.items(): | ||
| yaml_dict[cf_name] = {} | ||
| for step_name, func in cf_steps.items(): | ||
| # Extract the source of the lambda function | ||
| try: | ||
| src = inspect.getsource(func).strip() | ||
| # Example: 'lambda events, var, obj: events.Pass_MetTriggers == 1' | ||
| expr = src.split(":", 1)[1].strip() # take only the expression part | ||
| except (OSError, TypeError): | ||
| expr = "<unknown>" | ||
| yaml_dict[cf_name][step_name] = expr | ||
|
|
||
| # Save to YAML | ||
| with open(yaml_file, 'w') as f: | ||
| yaml.dump(yaml_dict, f, sort_keys=False) | ||
|
|
||
| print(f"Cutflow exported to {yaml_file}") |
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this copy needed? How does it differ from |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| #basic cuts | ||
| MET_objsel: | ||
| all_events: "(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)" | ||
| Met_trigger: "events.Pass_MetTriggers == 1" | ||
| MET_pt: "events.Met_pt > 100" | ||
| MET_significance: "events.Met_significance > 20" | ||
| ak4_bveto: "events.Pass_NoAK4BJet == 1" | ||
| loose_H: "events.HiggsScore > 0.1" | ||
| loose_V1: "events.V1Score > 0.1" | ||
| has_vbs: "ak.all(events.vbs_idx_max_Mjj > -1, axis=1)" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| #basic cuts | ||
| MET_objsel: | ||
| all_events: "(events.Pass_MetTriggers == 1) | ~(events.Pass_MetTriggers == 1)" | ||
| Met_trigger: "events.Pass_MetTriggers == 1" | ||
| MET_pt: "events.Met_pt > 100" | ||
| MET_significance: "events.Met_significance > 20" | ||
| ak4_bveto: "events.Pass_NoAK4BJet == 1" | ||
| loose_H: "events.HiggsScore > 0.1" | ||
| loose_V1: "events.V1Score > 0.1" | ||
| has_vbs: "ak.all(events.vbs_idx_max_Mjj > -1, axis=1)" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you remove this added line? It's usually best to avoid adding extraneous edits to files (as it can make the git history more confusing and less clean)