diff --git a/.flake8 b/.flake8 index b3b27469..2fd2d95d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,10 +1,4 @@ [flake8] - -# line length of 100 is recommended, but set it to a forgiving value -max-line-length = 120 - -# codes of errors to ignore +max-line-length = 119 ignore = E128, E306, E402, E722, E731, W504, Q003 - -# enforce double quotes -inline-quotes = double +inline-quotes = double \ No newline at end of file diff --git a/.gitignore b/.gitignore index af9a513f..6e4de8b8 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,7 @@ docs/_build tmp store software -data +columnflow_venv .data .law .setups diff --git a/.markdownlint b/.markdownlint new file mode 100644 index 00000000..adf6a3b6 --- /dev/null +++ b/.markdownlint @@ -0,0 +1 @@ +modules/columnflow/.markdownlint \ No newline at end of file diff --git a/.setups/template.sh b/.setups/template.sh new file mode 100644 index 00000000..accd8835 --- /dev/null +++ b/.setups/template.sh @@ -0,0 +1,36 @@ +export CF_CERN_USER="kjaffel" +export CF_CERN_USER_FIRSTCHAR="${CF_CERN_USER:0:1}" +export CF_DATA="$CF_REPO_BASE/columnflow_venv" +export CF_SOFTWARE_BASE="$CF_DATA/software" +export CF_VENV_BASE="$CF_SOFTWARE_BASE/venvs" +export CF_STORE_NAME="cf_store" +export CF_WLCG_USE_CACHE="true" +export CF_WLCG_CACHE_CLEANUP="false" +export CF_VENV_SETUP_MODE_UPDATE="false" +export CF_VENV_SETUP_MODE="update" +export CF_INTERACTIVE_VENV_FILE="" +export CF_LOCAL_SCHEDULER="true" +export CF_SCHEDULER_HOST="127.0.0.1" +export CF_SCHEDULER_PORT="8082" +export CF_FLAVOR="cms" +export LAW_CMS_VO="cms" + +# on manivald +export CF_CRAB_STORAGE_ELEMENT="T2_EE_Estonia" +export CF_SLURM_FLAVOR="manivald" +export CF_SLURM_PARTITION="main" +export CF_CLUSTER_LOCAL_PATH="/local/$CF_CERN_USER/HHMultilepton_Run3/" +export CF_CRAB_BASE_DIRECTORY="/store/user/$CF_CERN_USER/HHMultilepton_Run3/cf_crab_outputs" +export TMPDIR="/scratch/local/$CF_CERN_USER" + +# on lxplus +#export CF_CRAB_STORAGE_ELEMENT="T2_CH_CERN" +#export CF_HTCONDOR_FLAVOR="cern_el9" # or "cern" for older versions of lxplus not using ELMA9 +#export CF_CLUSTER_LOCAL_PATH="/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/HHMultilepton_Run3/" +#export CF_CRAB_BASE_DIRECTORY="$CF_CLUSTER_LOCAL_PATH/cf_crab_outputs" +#export TMPDIR="/tmp/$CF_CERN_USER" + + +export CF_STORE_LOCAL="$CF_CLUSTER_LOCAL_PATH/$CF_STORE_NAME" +export CF_WLCG_CACHE_ROOT="$CF_CLUSTER_LOCAL_PATH/cf_scratch" +export CF_JOB_BASE="$CF_CLUSTER_LOCAL_PATH/cf_jobs" diff --git a/LICENSE b/LICENSE index 4188f26b..4a24adff 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,3 @@ All rights reserved. -The UHH HH -> bbtautau developers. +The HH -> Multilepton developers. diff --git a/README.md b/README.md index 9cb52610..dc58a794 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,125 @@ -# HH → bb𝜏𝜏 +# HH (H → WW/ZZ/𝜏𝜏) → Multi-Leptons Analysis -[![Lint and test](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml/badge.svg)](https://github.com/uhh-cms/hh2bbtautau/actions/workflows/lint_and_test.yaml) +**Table of contents** +- [Introduction](#introduction) +- [Installation (first time)](#first-time-setup) +- [Usage](#usage) +- [Useful links](#useful-links) +- [Contributors](#contributors) +- [Development](#development) -## Quickstart -A couple test tasks are listed below. -They might require a **valid voms proxy** for accessing input data. +## Introduction + +This is the code base for the Run2+Run3 iteration of the CMS HH Multileptons analysis. + +The code is forked and for now heavily based on the UHH bersion of the [HH → bb𝜏𝜏 analysis](https://github.com/uhh-cms/hh2bbtautau) +and still very much WIP. Expect remnants from the bb𝜏𝜏 analysis, crashes and bugs, you have been warned! + +Please make sure you are subscribed to our e-group: cms-hh-multilepton@cern.ch +It controls the acess to our indico etc. and is a good way to get updates for our meetings. + +Also join our channel on [mattermost](https://mattermost.web.cern.ch/cms-exp/channels/hh-multilepton-run3). +(You will need to join the CMS team first if not done so). + +The code is currently developed with the Tallinn T2 (and lxplus) in mind. +For further questions please, contact t\*\*\*\*.l\*\*\*\*@no-spam-cern.ch. + +## First time setup ```shell -# clone the project -git clone --recursive git@github.com:uhh-cms/hh2bbtautau.git -cd hh2bbtautau - -# source the setup and store decisions in .setups/dev.sh (arbitrary name) -source setup.sh dev - -# index existing tasks once to enable auto-completion for "law run" -law index --verbose - -# run your first task -# (they are all shipped with columnflow and thus have the "cf." prefix) -law run cf.ReduceEvents \ - --version v1 \ - --dataset hh_ggf_bbtautau_madgraph \ - --branch 0 - -# create a plot -law run cf.PlotVariables1D \ - --version v1 \ - --datasets hh_ggf_bbtautau_madgraph \ - --producers default \ - --variables jet1_pt \ - --categories incl \ - --branch 0 +# 1. clone the project +git clone --recursive git@github.com:HEP-KBFI/hhmultilepton.git +cd hhmultilepton -# create a (test) datacard (CMS-style) -law run cf.CreateDatacards \ - --version v1 \ - --producers default \ - --inference-model test \ - --workers 3 +# 2. get a voms token +voms-proxy-init -voms cms -rfc -valid 196:00 + +# 3. copy the provided template to a new file (you can choose any ): +cp .setups/template.sh .setups/mydev.sh + +# 4. open .setups/mydev.sh in your editor and adjust any environment variables or paths as needed for your local setup. +# then source the main setup script with your custom setup name: +source setup.sh [sandbox_type] +``` +```bash +source setup.sh --help +Arguments: + Name of the setup (random name of your choice) + [sandbox_type] Optional: choose between 'minimal' (default) or 'full' +Examples: + source setup.sh mydev # uses 'minimal' environment from (sandboxes/venv_multilepton.sh) + source setup.sh mydev full # uses 'full' environment from (sandboxes/venv_multilepton_dev.sh) ``` -## Useful commands +Note: If you prefer not to use the provided template, you can still activate the environment manually by running: +source setup.sh `` +In this case, `` should not already exist under the `.setups/` directory. +When you run the command, the setup script will guide you interactively, prompting you to enter the required environment variables (as `export` commands). Once completed, these settings will be automatically saved to `.setups/.sh`. + + +image + + +Code can now be run but first storage locations for the tasks outputs should be checked as configured [here](https://github.com/HEP-KBFI/hhmultilepton/blob/master/law_outputs.cfg#L26-L90). Currently outputs point to the user store of the `T2_EE_Estonia on manivald` so that outputs are also accessible remotely, but we will likely adapt this over time depending on the output. +I.e large outputs available in a remote reachable location, smaller ones on local stores. Larger ones likely also split by user/cluster so that central versions can be reused. + +**For development on lxplus "i strongly" advise to change `wlcg_fs_manivald` to `wlcg_fs_cernbox` in the beginning.** + +## Usage -### Full reduction +1. Setup your enviorement (**always**): ```shell -law run cf.ReduceEventsWrapper \ - --version prod1 \ - --configs run3_2022_preEE \ - --datasets "*" \ - --shifts "nominal,{tune,hdamp,mtop}_{up,down}" \ - --cf.ReduceEvents-workflow htcondor \ - --cf.ReduceEvents-pilot \ - --cf.ReduceEvents-tasks-per-job 3 \ - --local-scheduler False \ - --workers 6 +voms-proxy-init -voms cms -rfc -valid 196:00 + +# source the setup and export env in the sorted file " .setups/mydev.sh " in this case +source setup.sh mydev +``` + +2. Try to run on 1 signal, 1 backgound and 1 data locally: + +```shell +law run cf.PlotVariables1D \ + --version test \ + --producers default \ + --variables nmu \ + --datasets hh_ggf_htt_hvv_kl1_kt1_powheg,zz_pythia,data_e_c \ +``` + +3. And if the above run sucessfully, you can proceed to submit jobs via slurm/condor adding + +```shell + --workflow slurm \ # or + --workflow htcondor \ # or + --workflow crab \ # to be tested!? ``` +## Documentation + +- Lives here: https://gitlab.cern.ch/hh-multileptons-full-analysis/hh-multileptons-doc +- Talks: + - slides: https://indico.cern.ch/event/1580193/contributions/6660044/attachments/3121091/5534653/multilep%20framework.pdf + +## 🙏 Contributors + + + + + + + + + + + + +
`Torben Lange`
Torben Lange

💻
`Matheus Coelho`
Matheus Coelho

💻
+ + + + + ## Useful links - [columnflow documentation](https://columnflow.readthedocs.io/en/latest/index.html) @@ -68,13 +129,11 @@ law run cf.ReduceEventsWrapper \ - [GrASP](https://cms-pdmv-prod.web.cern.ch/grasp/) - [XSDB](https://xsdb-temp.app.cern.ch) - [DAS](https://cmsweb.cern.ch/das) -NanoAOD: +NanoAOD - [Nano documentation](https://gitlab.cern.ch/cms-nanoAOD/nanoaod-doc) - [Correctionlib files](https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration) -- JME - - [Docs](https://cms-jerc.web.cern.ch) -- BTV - - [Docs](https://btv-wiki.docs.cern.ch) +- [JME](https://cms-jerc.web.cern.ch) +- [BTV](https://btv-wiki.docs.cern.ch) - TAU - [Run 2 Twiki](https://twiki.cern.ch/twiki/bin/viewauth/CMS/TauIDRecommendationForRun2) - [Run 3 Twiki](https://twiki.cern.ch/twiki/bin/viewauth/CMS/TauIDRecommendationForRun3) @@ -82,5 +141,8 @@ NanoAOD: ## Development -- Source hosted at [GitHub](https://github.com/uhh-cms/hh2bbtautau) -- Report issues, questions, feature requests on [GitHub Issues](https://github.com/uhh-cms/hh2bbtautau/issues) +- Source hosted at [GitHub](https://github.com/HEP-KBFI/hhmultilepton) +- Report issues, questions, feature requests on [GitHub Issues](https://github.com/HEP-KBFI/hhmultilepton/issues) +- Ideally also ping us on [mattermost](https://mattermost.web.cern.ch/cms-exp/channels/hh-multilepton-run3). +- For new features open a new branch before merging into master, ask for a code review by a felllow contributor and dont forget linting! +- Happy coding 😊 diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py deleted file mode 100644 index f0d54934..00000000 --- a/hbt/calibration/default.py +++ /dev/null @@ -1,176 +0,0 @@ -# coding: utf-8 - -""" -Calibration methods. -""" - -from columnflow.calibration import Calibrator, calibrator -from columnflow.calibration.cms.met import met_phi -from columnflow.calibration.cms.jets import jec, jec_nominal, jer -from columnflow.calibration.cms.tau import tec, tec_nominal -from columnflow.calibration.cms.egamma import eer, eec -from columnflow.production.cms.mc_weight import mc_weight -from columnflow.production.cms.supercluster_eta import electron_sceta -from columnflow.production.cms.seeds import ( - deterministic_event_seeds, deterministic_jet_seeds, deterministic_electron_seeds, - deterministic_photon_seeds, -) -from columnflow.util import maybe_import - -from hbt.util import IF_RUN_2, IF_RUN_3_2022 - -ak = maybe_import("awkward") - - -# custom seed producer skipping GenPart fields -custom_deterministic_event_seeds = deterministic_event_seeds.derive( - "custom_deterministic_event_seeds", - cls_dict={"object_count_columns": [ - route - for route in deterministic_event_seeds.object_count_columns - if not str(route).startswith(("GenPart.", "Photon.")) - ]}, - -) - - -@calibrator( - uses={ - mc_weight, custom_deterministic_event_seeds, deterministic_jet_seeds, - deterministic_photon_seeds, deterministic_electron_seeds, - electron_sceta, - }, - produces={ - mc_weight, custom_deterministic_event_seeds, deterministic_jet_seeds, - deterministic_photon_seeds, deterministic_electron_seeds, - electron_sceta, - }, -) -def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: - if self.dataset_inst.is_mc: - events = self[mc_weight](events, **kwargs) - - # seed producers - # !! as this is the first step, the object collections should still be pt-sorted, - # !! so no manual sorting needed here (but necessary if, e.g., jec is applied before) - events = self[custom_deterministic_event_seeds](events, **kwargs) - events = self[deterministic_jet_seeds](events, **kwargs) - events = self[deterministic_electron_seeds](events, **kwargs) - - events = self[electron_sceta](events, **kwargs) - if self.dataset_inst.is_data or not self.global_shift_inst.is_nominal: - events = self[self.jec_nominal_cls](events, **kwargs) - # egamma scale calibrations should only be applied to data - # so if the global shift is not nominal, we are in the shifted case - # and will only execute something if it's data - if self.dataset_inst.is_data: - if self.has_dep(self.electron_scale_nominal_cls): - events = self[self.electron_scale_nominal_cls](events, **kwargs) - else: - if self.has_dep(self.electron_res_nominal_cls): - events = self[self.electron_res_nominal_cls](events, **kwargs) - else: - events = self[self.jec_full_cls](events, **kwargs) - events = self[self.deterministic_jer_cls](events, **kwargs) - # in this block, we are in the nominal case in MC - if self.has_dep(self.electron_res_cls): - events = self[self.electron_res_cls](events, **kwargs) - if self.has_dep(self.electron_scale_cls): - events = self[self.electron_scale_cls](events, **kwargs) - - if self.config_inst.campaign.x.run == 2: - events = self[self.met_phi_cls](events, **kwargs) - - if self.dataset_inst.is_mc: - if self.global_shift_inst.is_nominal: - events = self[self.tec_cls](events, **kwargs) - else: - events = self[self.tec_nominal_cls](events, **kwargs) - - return events - - -@default.init -def default_init(self: Calibrator) -> None: - # set the name of the met collection to use - met_name = self.config_inst.x.met_name - raw_met_name = self.config_inst.x.raw_met_name - - # derive calibrators to add settings once - flag = f"custom_calibs_registered_{self.cls_name}" - if not self.config_inst.x(flag, False): - # jec calibrators - self.config_inst.x.calib_jec_full_cls = jec.derive("jec_full", cls_dict={ - "mc_only": True, - "nominal_only": True, - "met_name": met_name, - "raw_met_name": raw_met_name, - }) - self.config_inst.x.calib_jec_nominal_cls = jec_nominal.derive("jec_nominal", cls_dict={ - "met_name": met_name, - "raw_met_name": raw_met_name, - }) - # version of jer that uses the first random number from deterministic_seeds - self.config_inst.x.calib_deterministic_jer_cls = jer.derive("deterministic_jer", cls_dict={ - "deterministic_seed_index": 0, - "met_name": met_name, - }) - # derive tec calibrators - self.config_inst.x.calib_jec_cls = tec.derive("tec", cls_dict={ - "met_name": met_name, - }) - self.config_inst.x.calib_jec_cls = tec_nominal.derive("tec_nominal", cls_dict={ - "met_name": met_name, - }) - # derive met_phi calibrator (currently only used in run 2) - self.config_inst.x.calib_met_phi_cls = met_phi.derive("met_phi", cls_dict={ - "met_name": met_name, - }) - - # derive electron scale calibrators - self.config_inst.x.calib_electron_scale_cls = eec.derive("eec_full", cls_dict={ - }) - - self.config_inst.x.calib_electron_scale_nominal_cls = eec.derive("eec_nominal", cls_dict={ - "with_uncertainties": False, - }) - - # derive electron resolution calibrator - self.config_inst.x.calib_electron_res_cls = eer.derive("eer_full", cls_dict={ - "deterministic_seed_index": 0, - }) - - self.config_inst.x.calib_electron_res_nominal_cls = eer.derive("eer_nominal", cls_dict={ - "deterministic_seed_index": 0, - "with_uncertainties": False, - }) - - # change the flag - self.config_inst.set_aux(flag, True) - - self.jec_full_cls = self.config_inst.x.calib_jec_full_cls - self.jec_nominal_cls = self.config_inst.x.calib_jec_nominal_cls - self.deterministic_jer_cls = self.config_inst.x.calib_deterministic_jer_cls - self.tec_cls = self.config_inst.x.calib_jec_cls - self.tec_nominal_cls = self.config_inst.x.calib_jec_cls - self.met_phi_cls = self.config_inst.x.calib_met_phi_cls - self.electron_scale_cls = self.config_inst.x.calib_electron_scale_cls - self.electron_scale_nominal_cls = self.config_inst.x.calib_electron_scale_nominal_cls - self.electron_res_cls = self.config_inst.x.calib_electron_res_cls - self.electron_res_nominal_cls = self.config_inst.x.calib_electron_res_nominal_cls - - # collect derived calibrators and add them to the calibrator uses and produces - derived_calibrators = { - self.jec_full_cls, - self.jec_nominal_cls, - self.deterministic_jer_cls, - self.tec_cls, - self.tec_nominal_cls, - IF_RUN_2(self.met_phi_cls), - IF_RUN_3_2022(self.electron_scale_cls), - IF_RUN_3_2022(self.electron_scale_nominal_cls), - IF_RUN_3_2022(self.electron_res_cls), - IF_RUN_3_2022(self.electron_res_nominal_cls), - } - self.uses |= derived_calibrators - self.produces |= derived_calibrators diff --git a/hbt/categorization/default.py b/hbt/categorization/default.py deleted file mode 100644 index 9bc0ad5e..00000000 --- a/hbt/categorization/default.py +++ /dev/null @@ -1,155 +0,0 @@ -# coding: utf-8 - -""" -Exemplary selection methods. -""" - -from columnflow.categorization import Categorizer, categorizer -from columnflow.util import maybe_import - -ak = maybe_import("awkward") - - -# -# dummy selector -# - -@categorizer(uses={"event"}) -def cat_all(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # keep all events - return events, ak.ones_like(events.event) == 1 - - -# -# lepton channels -# - -@categorizer(uses={"channel_id"}) -def cat_etau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.etau.id - - -@categorizer(uses={"channel_id"}) -def cat_mutau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.mutau.id - - -@categorizer(uses={"channel_id"}) -def cat_tautau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.tautau.id - - -@categorizer(uses={"channel_id"}) -def cat_ee(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.ee.id - - -@categorizer(uses={"channel_id"}) -def cat_mumu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.mumu.id - - -@categorizer(uses={"channel_id"}) -def cat_emu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - return events, events.channel_id == self.config_inst.channels.n.emu.id - - -# -# QCD regions -# - -@categorizer(uses={"leptons_os"}) -def cat_os(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # oppositive sign leptons - return events, events.leptons_os == 1 - - -@categorizer(uses={"leptons_os"}) -def cat_ss(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # same sign leptons - return events, events.leptons_os == 0 - - -@categorizer(uses={"tau2_isolated"}) -def cat_iso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # isolated tau2 - return events, events.tau2_isolated == 1 - - -@categorizer(uses={"tau2_isolated"}) -def cat_noniso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # noon-isolated tau2 - return events, events.tau2_isolated == 0 - - -# -# kinematic regions -# - -@categorizer(uses={"event"}) -def cat_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # fully inclusive selection - return events, ak.ones_like(events.event) == 1 - - -@categorizer(uses={"Jet.{pt,phi}"}) -def cat_2j(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # two or more jets - return events, ak.num(events.Jet.pt, axis=1) >= 2 - - -@categorizer(uses={"Jet.btagPNetB"}) -def cat_res1b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # exactly pnet b-tags - wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] - tagged = events.Jet.btagPNetB > wp - return events, ak.sum(tagged, axis=1) == 1 - - -@categorizer(uses={"Jet.btagPNetB"}) -def cat_res2b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # at least two medium pnet b-tags - wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] - tagged = events.Jet.btagPNetB > wp - return events, ak.sum(tagged, axis=1) >= 2 - - -@categorizer(uses={cat_res1b, cat_res2b, "FatJet.{pt,phi}"}) -def cat_boosted(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # not res1b or res2b, and exactly one selected fat jet that should also pass a tighter pt cut - # note: this is just a draft - mask = ( - (ak.num(events.FatJet, axis=1) == 1) & - (ak.sum(events.FatJet.pt > 350, axis=1) == 1) & - ~self[cat_res1b](events, **kwargs)[1] & - ~self[cat_res2b](events, **kwargs)[1] - ) - return events, mask - - -@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) -def cat_dy(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # e/mu driven DY region: mll > 40 and met < 30 (to supress tau decays into e/mu) - leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] - mask = ( - (leps.sum(axis=1).mass > 40) & - (events[self.config_inst.x.met_name].pt < 30) - ) - return events, mask - - -@cat_dy.init -def cat_dy_init(self: Categorizer) -> None: - self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") - - -@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) -def cat_tt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: - # tt region: met > 30 (due to neutrino presence in leptonic w decays) - mask = events[self.config_inst.x.met_name].pt > 30 - return events, mask - - -@cat_tt.init -def cat_tt_init(self: Categorizer) -> None: - self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") diff --git a/hbt/columnflow_patches.py b/hbt/columnflow_patches.py deleted file mode 100644 index f79f2ab3..00000000 --- a/hbt/columnflow_patches.py +++ /dev/null @@ -1,79 +0,0 @@ -# coding: utf-8 - -""" -Collection of patches of underlying columnflow tasks. -""" - -import os -import getpass - -import law -from columnflow.util import memoize - - -logger = law.logger.get_logger(__name__) - - -@memoize -def patch_bundle_repo_exclude_files(): - """ - Patches the exclude_files attribute of the existing BundleRepo task to exclude files - specific to _this_ analysis project. - """ - from columnflow.tasks.framework.remote import BundleRepo - - # get the relative path to CF_BASE - cf_rel = os.path.relpath(os.environ["CF_BASE"], os.environ["HBT_BASE"]) - - # amend exclude files to start with the relative path to CF_BASE - exclude_files = [os.path.join(cf_rel, path) for path in BundleRepo.exclude_files] - - # add additional files - exclude_files.extend([ - "docs", "tests", "data", "assets", ".law", ".setups", ".data", ".github", - ]) - - # overwrite them - BundleRepo.exclude_files[:] = exclude_files - - logger.debug(f"patched exclude_files of {BundleRepo.task_family}") - - -@memoize -def patch_remote_workflow_poll_interval(): - """ - Patches the HTCondorWorkflow and SlurmWorkflow tasks to change the default value of the - poll_interval parameter to 30 seconds. - """ - from columnflow.tasks.framework.remote import HTCondorWorkflow, SlurmWorkflow - - HTCondorWorkflow.poll_interval._default = 0.5 # minutes - SlurmWorkflow.poll_interval._default = 0.5 # minutes - - logger.debug( - f"patched poll_interval._default of {HTCondorWorkflow.task_family} and " - f"{SlurmWorkflow.task_family}", - ) - - -@memoize -def patch_htcondor_workflow_naf_resources(): - """ - Patches the HTCondorWorkflow task to declare user-specific resources when running on the NAF. - """ - from columnflow.tasks.framework.remote import HTCondorWorkflow - - def htcondor_job_resources(self, job_num, branches): - # one "naf_" resource per job, indendent of the number of branches in the job - return {f"naf_{getpass.getuser()}": 1} - - HTCondorWorkflow.htcondor_job_resources = htcondor_job_resources - - logger.debug(f"patched htcondor_job_resources of {HTCondorWorkflow.task_family}") - - -@memoize -def patch_all(): - patch_bundle_repo_exclude_files() - patch_remote_workflow_poll_interval() - patch_htcondor_workflow_naf_resources() diff --git a/hbt/config/analysis_hbt.py b/hbt/config/analysis_hbt.py deleted file mode 100644 index b5abb61a..00000000 --- a/hbt/config/analysis_hbt.py +++ /dev/null @@ -1,177 +0,0 @@ -# coding: utf-8 - -""" -Configuration of the HH → bb𝜏𝜏 analysis. -""" - -from __future__ import annotations - -import importlib - -import order as od - -from hbt.config.configs_hbt import add_config - - -# -# the main analysis object -# - -analysis_hbt = od.Analysis( - name="analysis_hbt", - id=1, -) - -# analysis-global versions -# (empty since we use the lookup from the law.cfg instead) -analysis_hbt.x.versions = {} - -# files of bash sandboxes that might be required by remote tasks -# (used in cf.HTCondorWorkflow) -analysis_hbt.x.bash_sandboxes = [ - "$CF_BASE/sandboxes/cf.sh", - "$CF_BASE/sandboxes/venv_columnar.sh", - "$HBT_BASE/sandboxes/venv_columnar_tf.sh", -] - -# files of cmssw sandboxes that might be required by remote tasks -# (used in cf.HTCondorWorkflow) -analysis_hbt.x.cmssw_sandboxes = [ - # "$CF_BASE/sandboxes/cmssw_default.sh", -] - -# config groups for conveniently looping over certain configs -# (used in wrapper_factory) -analysis_hbt.x.config_groups = {} - -# named function hooks that can modify store_parts of task outputs if needed -analysis_hbt.x.store_parts_modifiers = {} - - -# -# define configs -# - -def add_lazy_config( - *, - campaign_module: str, - campaign_attr: str, - config_name: str, - config_id: int, - add_limited: bool = True, - **kwargs, -): - def create_factory( - config_id: int, - config_name_postfix: str = "", - limit_dataset_files: int | None = None, - ): - def factory(configs: od.UniqueObjectIndex): - # import the campaign - mod = importlib.import_module(campaign_module) - campaign = getattr(mod, campaign_attr) - - return add_config( - analysis_hbt, - campaign.copy(), - config_name=config_name + config_name_postfix, - config_id=config_id, - limit_dataset_files=limit_dataset_files, - **kwargs, - ) - return factory - - analysis_hbt.configs.add_lazy_factory(config_name, create_factory(config_id)) - if add_limited: - analysis_hbt.configs.add_lazy_factory(f"{config_name}_limited", create_factory(config_id + 200, "_limited", 2)) - - -# 2022, preEE -# TODO: remove after move to v14 -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_preEE_nano_uhh_v12", - campaign_attr="campaign_run3_2022_preEE_nano_uhh_v12", - config_name="22pre_v12", - config_id=5012, -) -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_preEE_nano_uhh_v14", - campaign_attr="campaign_run3_2022_preEE_nano_uhh_v14", - config_name="22pre_v14", - config_id=5014, -) - -# 2022, postEE -# TODO: remove after move to v14 -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_postEE_nano_uhh_v12", - campaign_attr="campaign_run3_2022_postEE_nano_uhh_v12", - config_name="22post_v12", - config_id=6012, -) -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_postEE_nano_uhh_v14", - campaign_attr="campaign_run3_2022_postEE_nano_uhh_v14", - config_name="22post_v14", - config_id=6014, -) - -# 2023, preBPix -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2023_preBPix_nano_uhh_v14", - campaign_attr="campaign_run3_2023_preBPix_nano_uhh_v14", - config_name="23pre_v14", - config_id=7014, -) - -# 2023, postBPix -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2023_postBPix_nano_uhh_v14", - campaign_attr="campaign_run3_2023_postBPix_nano_uhh_v14", - config_name="23post_v14", - config_id=8014, -) - -# -# sync configs -# - -# 2022, preEE -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_preEE_nano_v12", - campaign_attr="campaign_run3_2022_preEE_nano_v12", - config_name="22pre_v12_sync", - config_id=5112, - add_limited=False, - sync_mode=True, -) - -# 2022, postEE -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2022_postEE_nano_v12", - campaign_attr="campaign_run3_2022_postEE_nano_v12", - config_name="22post_v12_sync", - config_id=6112, - add_limited=False, - sync_mode=True, -) - -# 2023, preBPix -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2023_preBPix_nano_v13", - campaign_attr="campaign_run3_2023_preBPix_nano_v13", - config_name="23pre_v13_sync", - config_id=7113, - add_limited=False, - sync_mode=True, -) - -# 2023, postBPix -add_lazy_config( - campaign_module="cmsdb.campaigns.run3_2023_postBPix_nano_v13", - campaign_attr="campaign_run3_2023_postBPix_nano_v13", - config_name="23post_v13_sync", - config_id=8113, - add_limited=False, - sync_mode=True, -) diff --git a/hbt/config/categories.py b/hbt/config/categories.py deleted file mode 100644 index ceb77b7d..00000000 --- a/hbt/config/categories.py +++ /dev/null @@ -1,136 +0,0 @@ -# coding: utf-8 - -""" -Definition of categories. -""" - -import functools - -import order as od - -from columnflow.config_util import add_category, create_category_combinations -from columnflow.types import Any - - -def add_categories(config: od.Config) -> None: - """ - Adds all categories to a *config*. - """ - # root category (-1 has special meaning in cutflow) - root_cat = add_category(config, name="all", id=-1, selection="cat_all", label="") - _add_category = functools.partial(add_category, parent=root_cat) - - # lepton channels - _add_category(config, name="etau", id=1, selection="cat_etau", label=config.channels.n.etau.label) - _add_category(config, name="mutau", id=2, selection="cat_mutau", label=config.channels.n.mutau.label) - _add_category(config, name="tautau", id=3, selection="cat_tautau", label=config.channels.n.tautau.label) - _add_category(config, name="ee", id=4, selection="cat_ee", label=config.channels.n.ee.label) - _add_category(config, name="mumu", id=5, selection="cat_mumu", label=config.channels.n.mumu.label) - _add_category(config, name="emu", id=6, selection="cat_emu", label=config.channels.n.emu.label) - - # qcd regions - _add_category(config, name="os", id=10, selection="cat_os", label="OS", tags={"os"}) - _add_category(config, name="ss", id=11, selection="cat_ss", label="SS", tags={"ss"}) - _add_category(config, name="iso", id=12, selection="cat_iso", label=r"iso", tags={"iso"}) - _add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"non-iso", tags={"noniso"}) # noqa: E501 - - # kinematic categories - _add_category(config, name="incl", id=100, selection="cat_incl", label="inclusive") - _add_category(config, name="2j", id=110, selection="cat_2j", label="2 jets") - _add_category(config, name="dy", id=210, selection="cat_dy", label="DY enriched") - _add_category(config, name="tt", id=220, selection="cat_tt", label=r"$t\bar{t}$ enriched") - - _add_category(config, name="res1b", id=300, selection="cat_res1b", label="res1b") - _add_category(config, name="res2b", id=301, selection="cat_res2b", label="res2b") - _add_category(config, name="boosted", id=310, selection="cat_boosted", label="boosted") - - # - # build groups - # - - def name_fn(categories: dict[str, od.Category]) -> str: - return "__".join(cat.name for cat in categories.values() if cat) - - def kwargs_fn(categories: dict[str, od.Category], add_qcd_group: bool = True) -> dict[str, Any]: - # build auxiliary information - aux = {} - if add_qcd_group: - aux["qcd_group"] = name_fn({ - name: cat for name, cat in categories.items() - if name not in {"sign", "tau2"} - }) - # return the desired kwargs - return { - # just increment the category id - # NOTE: for this to be deterministic, the order of the categories must no change! - "id": "+", - # join all tags - "tags": set.union(*[cat.tags for cat in categories.values() if cat]), - # auxiliary information - "aux": aux, - # label - "label": ", ".join([ - cat.label or cat.name - for cat in categories.values() - if cat.name != "os" # os is the default - ]) or None, - } - - # main analysis categories - main_categories = { - # channels first - "channel": [ - config.get_category("etau"), config.get_category("mutau"), config.get_category("tautau"), - ], - # kinematic regions in the middle (to be extended) - "kin": [ - config.get_category("incl"), - config.get_category("2j"), - config.get_category("res1b"), - config.get_category("res2b"), - config.get_category("boosted"), - ], - # qcd regions last - "sign": [config.get_category("os"), config.get_category("ss")], - "tau2": [config.get_category("iso"), config.get_category("noniso")], - } - - create_category_combinations( - config=config, - categories=main_categories, - name_fn=name_fn, - kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=True), - ) - - # control categories - control_categories = { - # channels first - "channel": [ - config.get_category("ee"), config.get_category("mumu"), config.get_category("emu"), - ], - # kinematic regions in the middle (to be extended) - "kin": [config.get_category("incl"), config.get_category("dy"), config.get_category("tt")], - # relative sign last - "sign": [config.get_category("os")], - } - - def skip_fn_ctrl(categories: dict[str, od.Category]) -> bool: - if "channel" not in categories or "kin" not in categories: - return False - ch_cat = categories["channel"] - kin_cat = categories["kin"] - # skip dy in emu - if kin_cat.name == "dy" and ch_cat.name == "emu": - return True - # skip tt in ee/mumu - if kin_cat.name == "tt" and ch_cat.name in ("ee", "mumu"): - return True - return False - - create_category_combinations( - config=config, - categories=control_categories, - name_fn=name_fn, - kwargs_fn=functools.partial(kwargs_fn, add_qcd_group=False), - skip_fn=skip_fn_ctrl, - ) diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py deleted file mode 100644 index 6a2b5682..00000000 --- a/hbt/config/configs_hbt.py +++ /dev/null @@ -1,1492 +0,0 @@ -# coding: utf-8 - -""" -Configuration of the HH → bb𝜏𝜏 analysis. -""" - -from __future__ import annotations - -import os -import re -import itertools -import functools - -import yaml -import law -import order as od -from scinum import Number - -from columnflow.util import DotDict, dev_sandbox -from columnflow.config_util import ( - get_root_processes_from_campaign, add_shift_aliases, get_shifts_from_sources, - verify_config_processes, -) -from columnflow.columnar_util import ColumnCollection, skip_column - - -thisdir = os.path.dirname(os.path.abspath(__file__)) - -logger = law.logger.get_logger(__name__) - - -def add_config( - analysis: od.Analysis, - campaign: od.Campaign, - config_name: str | None = None, - config_id: int | None = None, - limit_dataset_files: int | None = None, - sync_mode: bool = False, -) -> od.Config: - # gather campaign data - run = campaign.x.run - year = campaign.x.year - year2 = year % 100 - - # some validations - assert run in {2, 3} - assert year in {2016, 2017, 2018, 2022, 2023} - - # get all root processes - procs = get_root_processes_from_campaign(campaign) - - # create a config by passing the campaign, so id and name will be identical - cfg = od.Config( - name=config_name, - id=config_id, - campaign=campaign, - aux={ - "sync": sync_mode, - }, - ) - - ################################################################################################ - # helpers - ################################################################################################ - - # helper to enable processes / datasets only for a specific era - def _match_era( - *, - run: int | set[int] | None = None, - year: int | set[int] | None = None, - postfix: str | set[int] | None = None, - tag: str | set[str] | None = None, - nano: int | set[int] | None = None, - sync: bool = False, - ) -> bool: - return ( - (run is None or campaign.x.run in law.util.make_set(run)) and - (year is None or campaign.x.year in law.util.make_set(year)) and - (postfix is None or campaign.x.postfix in law.util.make_set(postfix)) and - (tag is None or campaign.has_tag(tag, mode=any)) and - (nano is None or campaign.x.version in law.util.make_set(nano)) and - (sync is sync_mode) - ) - - def if_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]: - return list(filter(bool, values or [])) if _match_era(**kwargs) else [] - - def if_not_era(*, values: list[str | None] | None = None, **kwargs) -> list[str]: - return list(filter(bool, values or [])) if not _match_era(**kwargs) else [] - - ################################################################################################ - # processes - ################################################################################################ - - # add custom processes - if not sync_mode: - cfg.add_process( - name="v", - id=7997, - label="W/Z", - processes=[procs.n.w, procs.n.z], - ) - cfg.add_process( - name="multiboson", - id=7998, - label="Multiboson", - processes=[procs.n.vv, procs.n.vvv], - ) - cfg.add_process( - name="all_v", - id=7996, - label="Multiboson", - processes=[cfg.processes.n.v, cfg.processes.n.multiboson], - ) - cfg.add_process( - name="tt_multiboson", - id=7999, - label=r"$t\bar{t}$ + Multiboson", - processes=[procs.n.ttv, procs.n.ttvv], - ) - - # processes we are interested in - process_names = [ - "data", - "tt", - "st", - "dy", - "v", - "multiboson", - "tt_multiboson", - "qcd", - "h", - "hh_ggf_hbb_htt_kl1_kt1", - "hh_ggf_hbb_htt_kl0_kt1", - "hh_ggf_hbb_htt_kl2p45_kt1", - "hh_ggf_hbb_htt_kl5_kt1", - "hh_ggf_hbb_htt_kl0_kt1_c21", - "hh_ggf_hbb_htt_kl1_kt1_c23", - "hh_vbf_hbb_htt_kv1_k2v1_kl1", - "hh_vbf_hbb_htt_kv1_k2v0_kl1", - "hh_vbf_hbb_htt_kv1_k2v1_kl2", - "hh_vbf_hbb_htt_kv1_k2v2_kl1", - "hh_vbf_hbb_htt_kv1p74_k2v1p37_kl14p4", - "hh_vbf_hbb_htt_kvm0p012_k2v0p03_kl10p2", - "hh_vbf_hbb_htt_kvm0p758_k2v1p44_klm19p3", - "hh_vbf_hbb_htt_kvm0p962_k2v0p959_klm1p43", - "hh_vbf_hbb_htt_kvm1p21_k2v1p94_klm0p94", - "hh_vbf_hbb_htt_kvm1p6_k2v2p72_klm1p36", - "hh_vbf_hbb_htt_kvm1p83_k2v3p57_klm3p39", - "hh_vbf_hbb_htt_kvm2p12_k2v3p87_klm5p96", - "radion_hh_ggf_hbb_htt_m450", - "radion_hh_ggf_hbb_htt_m1200", - "graviton_hh_ggf_hbb_htt_m450", - "graviton_hh_ggf_hbb_htt_m1200", - ] - for process_name in process_names: - if process_name in procs: - proc = procs.get(process_name) - elif process_name == "qcd": - # qcd is not part of procs since there is no dataset registered for it - from cmsdb.processes.qcd import qcd - proc = qcd - else: - # development switch in case datasets are not _yet_ there - continue - - # add tags to processes - if process_name.startswith("hh_"): - proc.add_tag("signal") - proc.add_tag("nonresonant_signal") - if process_name.startswith(("graviton_hh_", "radion_hh_")): - proc.add_tag("signal") - proc.add_tag("resonant_signal") - if re.match(r"^tt(|_.+)$", process_name): - proc.add_tag({"ttbar", "tt"}) - if re.match(r"^dy(|_.+)$", process_name): - proc.add_tag("dy") - if re.match(r"^w_lnu(|_.+)$", process_name): - proc.add_tag("w_lnu") - - # add the process - cfg.add_process(proc) - - # configure colors, labels, etc - from hbt.config.styles import stylize_processes - stylize_processes(cfg) - - ################################################################################################ - # datasets - ################################################################################################ - - # add datasets we need to study - dataset_names = [ - # hh ggf - "hh_ggf_hbb_htt_kl1_kt1_powheg", - "hh_ggf_hbb_htt_kl0_kt1_powheg", - "hh_ggf_hbb_htt_kl2p45_kt1_powheg", - "hh_ggf_hbb_htt_kl5_kt1_powheg", - - # hh vbf - "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", - "hh_vbf_hbb_htt_kv1_k2v0_kl1_madgraph", - *if_era(year=2022, values=[ - "hh_vbf_hbb_htt_kv1_k2v1_kl2_madgraph", # Poisson60KeepRAW for 2022post - "hh_vbf_hbb_htt_kv1_k2v2_kl1_madgraph", # Poisson60KeepRAW for 2022post - ]), - "hh_vbf_hbb_htt_kv1p74_k2v1p37_kl14p4_madgraph", - "hh_vbf_hbb_htt_kvm0p012_k2v0p03_kl10p2_madgraph", - "hh_vbf_hbb_htt_kvm0p758_k2v1p44_klm19p3_madgraph", - "hh_vbf_hbb_htt_kvm0p962_k2v0p959_klm1p43_madgraph", - "hh_vbf_hbb_htt_kvm1p21_k2v1p94_klm0p94_madgraph", - "hh_vbf_hbb_htt_kvm1p6_k2v2p72_klm1p36_madgraph", - "hh_vbf_hbb_htt_kvm1p83_k2v3p57_klm3p39_madgraph", - "hh_vbf_hbb_htt_kvm2p12_k2v3p87_klm5p96_madgraph", - - # x -> hh resonances - *if_era(year=2022, values=[ - "radion_hh_ggf_hbb_htt_m450_madgraph", - "radion_hh_ggf_hbb_htt_m1200_madgraph", - "graviton_hh_ggf_hbb_htt_m450_madgraph", - "graviton_hh_ggf_hbb_htt_m1200_madgraph", - ]), - - # ttbar - "tt_sl_powheg", - "tt_dl_powheg", - "tt_fh_powheg", - - # single top - "st_tchannel_t_4f_powheg", - "st_tchannel_tbar_4f_powheg", - "st_twchannel_t_sl_powheg", - "st_twchannel_tbar_sl_powheg", - "st_twchannel_t_dl_powheg", - "st_twchannel_tbar_dl_powheg", - "st_twchannel_t_fh_powheg", - "st_twchannel_tbar_fh_powheg", - "st_schannel_t_lep_4f_amcatnlo", - "st_schannel_tbar_lep_4f_amcatnlo", - - # tt + v - "ttw_wlnu_amcatnlo", - "ttz_zqq_amcatnlo", - "ttz_zll_m4to50_amcatnlo", - "ttz_zll_m50toinf_amcatnlo", - - # tt + vv - "ttww_madgraph", - *if_not_era(year=2022, tag="preEE", values=[ - "ttwz_madgraph", # not available in 22pre - ]), - "ttzz_madgraph", - - # dy - "dy_m4to10_amcatnlo", - "dy_m10to50_amcatnlo", - "dy_m50toinf_amcatnlo", - "dy_m50toinf_0j_amcatnlo", - "dy_m50toinf_1j_amcatnlo", - "dy_m50toinf_2j_amcatnlo", - "dy_m50toinf_1j_pt40to100_amcatnlo", - "dy_m50toinf_1j_pt100to200_amcatnlo", - "dy_m50toinf_1j_pt200to400_amcatnlo", - "dy_m50toinf_1j_pt400to600_amcatnlo", - "dy_m50toinf_1j_pt600toinf_amcatnlo", - "dy_m50toinf_2j_pt40to100_amcatnlo", - "dy_m50toinf_2j_pt100to200_amcatnlo", - "dy_m50toinf_2j_pt200to400_amcatnlo", - "dy_m50toinf_2j_pt400to600_amcatnlo", - "dy_m50toinf_2j_pt600toinf_amcatnlo", - - # w + jets - "w_lnu_amcatnlo", - "w_lnu_0j_amcatnlo", - "w_lnu_1j_amcatnlo", - "w_lnu_2j_amcatnlo", - "w_lnu_1j_pt40to100_amcatnlo", - "w_lnu_1j_pt100to200_amcatnlo", - "w_lnu_1j_pt200to400_amcatnlo", - "w_lnu_1j_pt400to600_amcatnlo", - "w_lnu_1j_pt600toinf_amcatnlo", - "w_lnu_2j_pt40to100_amcatnlo", - "w_lnu_2j_pt100to200_amcatnlo", - "w_lnu_2j_pt200to400_amcatnlo", - "w_lnu_2j_pt400to600_amcatnlo", - "w_lnu_2j_pt600toinf_amcatnlo", - - # z + jets (not DY but qq) - # decided to drop z_qq for now as their contribution is negligible, - # but we should check that again at a much later stage - # "z_qq_1j_pt100to200_amcatnlo", - # "z_qq_1j_pt200to400_amcatnlo", - # "z_qq_1j_pt400to600_amcatnlo", - # "z_qq_1j_pt600toinf_amcatnlo", - # "z_qq_2j_pt100to200_amcatnlo", - # "z_qq_2j_pt200to400_amcatnlo", - # "z_qq_2j_pt400to600_amcatnlo", - # "z_qq_2j_pt600toinf_amcatnlo", - - # vv - "zz_pythia", - "wz_pythia", - "ww_pythia", - - # vvv - "www_4f_amcatnlo", - "wwz_4f_amcatnlo", - "wzz_amcatnlo", - "zzz_amcatnlo", - - # single H - "h_ggf_htt_powheg", - "h_vbf_htt_powheg", - "vh_hnonbb_amcatnlo", - "wmh_wlnu_hbb_powheg", - "wph_wlnu_hbb_powheg", - "wph_htt_powheg", - "wmh_htt_powheg", - "wph_wqq_hbb_powheg", - "wmh_wqq_hbb_powheg", - "zh_zll_hbb_powheg", - "zh_zqq_hbb_powheg", - "zh_htt_powheg", - "zh_gg_zll_hbb_powheg", - "zh_gg_zqq_hbb_powheg", - "zh_gg_znunu_hbb_powheg", - "tth_hbb_powheg", - "tth_hnonbb_powheg", - - # data - *if_era(year=2022, tag="preEE", values=[ - f"data_{stream}_{period}" for stream in ["e", "mu", "tau"] for period in "cd" - ]), - *if_era(year=2022, tag="postEE", values=[ - f"data_{stream}_{period}" for stream in ["e", "mu", "tau"] for period in "efg" - ]), - *if_era(year=2023, tag="preBPix", values=[ - f"data_{stream}_c{v}" for stream in ["e", "mu", "tau"] for v in "1234" - ]), - *if_era(year=2023, tag="postBPix", values=[ - f"data_{stream}_d{v}" for stream in ["e", "mu", "tau"] for v in "12" - ]), - ] - for dataset_name in dataset_names: - # skip when in sync mode and not exiting - if sync_mode and not campaign.has_dataset(dataset_name): - continue - - # add the dataset - dataset = cfg.add_dataset(campaign.get_dataset(dataset_name)) - # add tags to datasets - if dataset.name.startswith("data_e_"): - dataset.add_tag({"etau", "emu_from_e", "ee"}) - if dataset.name.startswith("data_mu_"): - dataset.add_tag({"mutau", "emu_from_mu", "mumu"}) - if dataset.name.startswith("data_tau_"): - dataset.add_tag({"tautau"}) - if dataset.name.startswith("tt_"): - dataset.add_tag({"has_top", "ttbar", "tt"}) - if dataset.name.startswith("st_"): - dataset.add_tag({"has_top", "single_top", "st"}) - if dataset.name.startswith("dy_"): - dataset.add_tag("dy") - if re.match(r"^dy_m50toinf_\dj_(|pt.+_)amcatnlo$", dataset.name): - dataset.add_tag("dy_stitched") - if dataset.name.startswith("w_lnu_"): - dataset.add_tag("w_lnu") - if re.match(r"^w_lnu_\dj_(|pt.+_)amcatnlo$", dataset.name): - dataset.add_tag("w_lnu_stitched") - # datasets that are known to have no lhe info at all - if law.util.multi_match(dataset.name, [ - r"^(ww|wz|zz)_.*pythia$", - r"^tt(w|z)_.*amcatnlo$", - ]): - dataset.add_tag("no_lhe_weights") - # datasets that are allowed to contain some events with missing lhe infos - # (known to happen for amcatnlo) - if dataset.name.endswith("_amcatnlo"): - dataset.add_tag("partial_lhe_weights") - if dataset_name.startswith("hh_"): - dataset.add_tag("signal") - dataset.add_tag("nonresonant_signal") - if dataset_name.startswith("hh_ggf_"): - dataset.add_tag("ggf") - elif dataset_name.startswith("hh_vbf_"): - dataset.add_tag("vbf") - if dataset_name.startswith(("graviton_hh_", "radion_hh_")): - dataset.add_tag("signal") - dataset.add_tag("resonant_signal") - if dataset_name.startswith(("graviton_hh_ggf_", "radion_hh_ggf")): - dataset.add_tag("ggf") - elif dataset_name.startswith(("graviton_hh_vbf_", "radion_hh_vbf")): - dataset.add_tag("vbf") - - # bad ecalBadCalibFilter MET filter in 2022 data - # https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2?rev=172#ECal_BadCalibration_Filter_Flag - # https://cms-talk.web.cern.ch/t/noise-met-filters-in-run-3/63346/5 - if year == 2022 and dataset.is_data and dataset.x.era in "FG": - dataset.add_tag("broken_ecalBadCalibFilter") - - # apply an optional limit on the number of files - if limit_dataset_files: - for info in dataset.info.values(): - info.n_files = min(info.n_files, limit_dataset_files) - - # apply synchronization settings - if sync_mode: - # only first file per - for info in dataset.info.values(): - info.n_files = 1 - - # verify that the root process of each dataset is part of any of the registered processes - if not sync_mode: - verify_config_processes(cfg, warn=True) - - ################################################################################################ - # task defaults and groups - ################################################################################################ - - # default objects, such as calibrator, selector, producer, ml model, inference model, etc - cfg.x.default_calibrator = "default" - cfg.x.default_selector = "default" - cfg.x.default_producer = "default" - cfg.x.default_ml_model = None - cfg.x.default_inference_model = "default_no_shifts" - cfg.x.default_categories = ("all",) - cfg.x.default_variables = ("njet", "nbtag", "res_pdnn_hh", "res_dnn_hh") - cfg.x.default_weight_producer = "default" - - # process groups for conveniently looping over certain processs - # (used in wrapper_factory and during plotting) - cfg.x.process_groups = { - "signals": [ - "hh_ggf_hbb_htt_kl1_kt1", - "hh_vbf_hbb_htt_kv1_k2v1_kl1", - ], - "signals_ggf": [ - "hh_ggf_hbb_htt_kl0_kt1", - "hh_ggf_hbb_htt_kl1_kt1", - "hh_ggf_hbb_htt_kl2p45_kt1", - "hh_ggf_hbb_htt_kl5_kt1", - ], - "backgrounds": (backgrounds := [ - "dy", - "tt", - "qcd", - "st", - "tt_multiboson", - "multiboson", - "v", - "h", - "ewk", - ]), - "dy_split": [ - "dy_m4to10", "dy_m10to50", - "dy_m50toinf_0j", - "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", "dy_m50toinf_1j_pt200to400", - "dy_m50toinf_1j_pt400to600", "dy_m50toinf_1j_pt600toinf", - "dy_m50toinf_2j_pt40to100", "dy_m50toinf_2j_pt100to200", "dy_m50toinf_2j_pt200to400", - "dy_m50toinf_2j_pt400to600", "dy_m50toinf_2j_pt600toinf", - ], - "dy_split_no_incl": [ - "dy_m4to10", "dy_m10to50", - "dy_m50toinf_0j", "dy_m50toinf_1j", "dy_m50toinf_2j", - "dy_m50toinf_1j_pt0to40", "dy_m50toinf_1j_pt40to100", "dy_m50toinf_1j_pt100to200", - "dy_m50toinf_1j_pt200to400", "dy_m50toinf_1j_pt400to600", "dy_m50toinf_1j_pt600toinf", - "dy_m50toinf_2j_pt0to40", "dy_m50toinf_2j_pt40to100", "dy_m50toinf_2j_pt100to200", - "dy_m50toinf_2j_pt200to400", "dy_m50toinf_2j_pt400to600", "dy_m50toinf_2j_pt600toinf", - ], - "sm_ggf": (sm_ggf_group := ["hh_ggf_hbb_htt_kl1_kt1", *backgrounds]), - "sm": (sm_group := ["hh_ggf_hbb_htt_kl1_kt1", "hh_vbf_hbb_htt_kv1_k2v1_kl1", *backgrounds]), - "sm_ggf_data": ["data"] + sm_ggf_group, - "sm_data": ["data"] + sm_group, - } - - # define inclusive datasets for the stitched process identification with corresponding leaf processes - if run == 3 and not sync_mode: - # drell-yan - cfg.x.dy_stitching = { - "m50toinf": { - "inclusive_dataset": cfg.datasets.n.dy_m50toinf_amcatnlo, - "leaf_processes": [ - # the following processes cover the full njet and pt phasespace - procs.n.dy_m50toinf_0j, - *( - procs.get(f"dy_m50toinf_{nj}j_pt{pt}") - for nj in [1, 2] - for pt in ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"] - ), - procs.n.dy_m50toinf_ge3j, - ], - }, - } - # w+jets - cfg.x.w_lnu_stitching = { - "incl": { - "inclusive_dataset": cfg.datasets.n.w_lnu_amcatnlo, - "leaf_processes": [ - # the following processes cover the full njet and pt phasespace - procs.n.w_lnu_0j, - *( - procs.get(f"w_lnu_{nj}j_pt{pt}") - for nj in [1, 2] - for pt in ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"] - ), - procs.n.w_lnu_ge3j, - ], - }, - } - - # dataset groups for conveniently looping over certain datasets - # (used in wrapper_factory and during plotting) - cfg.x.dataset_groups = { - "data": (data_group := [dataset.name for dataset in cfg.datasets if dataset.is_data]), - "backgrounds": (backgrounds := [ - dataset.name for dataset in cfg.datasets - if dataset.is_mc and not dataset.has_tag("signal") - ]), - "backgrounds_unstitched": (backgrounds_unstitched := [ - dataset.name for dataset in cfg.datasets - if ( - dataset.is_mc and - not dataset.has_tag("signal") and - not dataset.has_tag({"dy_stitched", "w_lnu_stitched"}, mode=any) - ) - ]), - "sm_ggf": (sm_ggf_group := ["hh_ggf_hbb_htt_kl1_kt1_powheg", *backgrounds]), - "sm": (sm_group := [ - "hh_ggf_hbb_htt_kl1_kt1_powheg", - "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", - *backgrounds, - ], - ), - "sm_unstitched": (sm_group_unstitched := [ - "hh_ggf_hbb_htt_kl1_kt1_powheg", - "hh_vbf_hbb_htt_kv1_k2v1_kl1_madgraph", - *backgrounds_unstitched, - ]), - "sm_ggf_data": data_group + sm_ggf_group, - "sm_data": data_group + sm_group, - "sm_data_unstitched": data_group + sm_group_unstitched, - "dy": [dataset.name for dataset in cfg.datasets if dataset.has_tag("dy")], - "w_lnu": [dataset.name for dataset in cfg.datasets if dataset.has_tag("w_lnu")], - } - - # category groups for conveniently looping over certain categories - # (used during plotting) - cfg.x.category_groups = {} - - # variable groups for conveniently looping over certain variables - # (used during plotting) - cfg.x.variable_groups = { - "hh": (hh := [f"hh_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), - "dilep": (dilep := [f"dilep_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), - "dijet": (dijet := [f"dijet_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), - "default": [ - *dijet, *dilep, *hh, - "mu1_pt", "mu1_eta", "mu1_phi", "mu2_pt", "mu2_eta", "mu2_phi", - "e1_pt", "e1_eta", "e1_phi", "e2_pt", "e2_eta", "e2_phi", - "tau1_pt", "tau1_eta", "tau1_phi", "tau2_pt", "tau2_eta", "tau2_phi", - ], - } - - # shift groups for conveniently looping over certain shifts - # (used during plotting) - cfg.x.shift_groups = {} - - # selector step groups for conveniently looping over certain steps - # (used in cutflow tasks) - cfg.x.selector_step_groups = { - "default": ["json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet2"], - } - cfg.x.default_selector_steps = "default" - - # plotting overwrites - from hbt.config.styles import setup_plot_styles - setup_plot_styles(cfg) - - ################################################################################################ - # luminosity and normalization - ################################################################################################ - - # lumi values in 1/pb (= 1000/fb) - # https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun2?rev=7 - # https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun3?rev=25 - # https://twiki.cern.ch/twiki/bin/view/CMS/PdmVRun3Analysis - # difference pre-post VFP: https://cds.cern.ch/record/2854610/files/DP2023_006.pdf - if year == 2016 and campaign.has_tag("preVFP"): - cfg.x.luminosity = Number(19_500, { - "lumi_13TeV_2016": 0.01j, - "lumi_13TeV_correlated": 0.006j, - }) - elif year == 2016 and campaign.has_tag("postVFP"): - cfg.x.luminosity = Number(16_800, { - "lumi_13TeV_2016": 0.01j, - "lumi_13TeV_correlated": 0.006j, - }) - elif year == 2017: - cfg.x.luminosity = Number(41_480, { - "lumi_13TeV_2017": 0.02j, - "lumi_13TeV_1718": 0.006j, - "lumi_13TeV_correlated": 0.009j, - }) - elif year == 2018: - cfg.x.luminosity = Number(59_830, { - "lumi_13TeV_2017": 0.015j, - "lumi_13TeV_1718": 0.002j, - "lumi_13TeV_correlated": 0.02j, - }) - elif year == 2022 and campaign.has_tag("preEE"): - cfg.x.luminosity = Number(7_980.4, { - "lumi_13p6TeV_correlated": 0.014j, - }) - elif year == 2022 and campaign.has_tag("postEE"): - cfg.x.luminosity = Number(26_671.7, { - "lumi_13p6TeV_correlated": 0.014j, - }) - elif year == 2023 and campaign.has_tag("preBPix"): - cfg.x.luminosity = Number(17_794, { - "lumi_13p6TeV_correlated": 0.013j, - }) - elif year == 2023 and campaign.has_tag("postBPix"): - cfg.x.luminosity = Number(9_451, { - "lumi_13p6TeV_correlated": 0.013j, - }) - else: - assert False - - # minimum bias cross section in mb (milli) for creating PU weights, values from - # https://twiki.cern.ch/twiki/bin/view/CMS/PileupJSONFileforData?rev=52#Recommended_cross_section - cfg.x.minbias_xs = Number(69.2, 0.046j) - - ################################################################################################ - # met settings - ################################################################################################ - - if run == 2: - cfg.x.met_name = "MET" - cfg.x.raw_met_name = "RawMET" - elif run == 3: - cfg.x.met_name = "PuppiMET" - cfg.x.raw_met_name = "RawPuppiMET" - else: - assert False - - # name of the MET phi correction set - # (used in the met_phi calibrator) - if run == 2: - cfg.x.met_phi_correction_set = r"{variable}_metphicorr_pfmet_{data_source}" - - ################################################################################################ - # jet settings - # TODO: keep a single table somewhere that configures all settings: btag correlation, year - # dependence, usage in calibrator, etc - ################################################################################################ - - # common jec/jer settings configuration - if run == 2: - # https://cms-jerc.web.cern.ch/Recommendations/#run-2 - # https://twiki.cern.ch/twiki/bin/view/CMS/JECDataMC?rev=204 - # https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=109 - jec_campaign = f"Summer19UL{year2}{campaign.x.postfix}" - jec_version = {2016: "V7", 2017: "V5", 2018: "V5"}[year] - jer_campaign = f"Summer{'20' if year == 2016 else '19'}UL{year2}{campaign.x.postfix}" - jer_version = "JR" + {2016: "V3", 2017: "V2", 2018: "V2"}[year] - jet_type = "AK4PFchs" - elif run == 3: - # https://cms-jerc.web.cern.ch/Recommendations/#2022 - jerc_postfix = {2022: "_22Sep2023", 2023: "Prompt23"}[year] - jec_campaign = f"Summer{year2}{campaign.x.postfix}{jerc_postfix}" - jec_version = {2022: "V2", 2023: "V1"}[year] - jer_campaign = f"Summer{year2}{campaign.x.postfix}{jerc_postfix}" - # special "Run" fragment in 2023 jer campaign - if year == 2023: - jer_campaign += f"_Run{'Cv1234' if campaign.has_tag('preBPix') else 'D'}" - jer_version = "JR" + {2022: "V1", 2023: "V1"}[year] - jet_type = "AK4PFPuppi" - else: - assert False - - cfg.x.jec = DotDict.wrap({ - "Jet": { - "campaign": jec_campaign, - "version": jec_version, - "jet_type": jet_type, - "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], - "levels_for_type1_met": ["L1FastJet"], - "uncertainty_sources": [ - # "AbsoluteStat", - # "AbsoluteScale", - # "AbsoluteSample", - # "AbsoluteFlavMap", - # "AbsoluteMPFBias", - # "Fragmentation", - # "SinglePionECAL", - # "SinglePionHCAL", - # "FlavorQCD", - # "TimePtEta", - # "RelativeJEREC1", - # "RelativeJEREC2", - # "RelativeJERHF", - # "RelativePtBB", - # "RelativePtEC1", - # "RelativePtEC2", - # "RelativePtHF", - # "RelativeBal", - # "RelativeSample", - # "RelativeFSR", - # "RelativeStatFSR", - # "RelativeStatEC", - # "RelativeStatHF", - # "PileUpDataMC", - # "PileUpPtRef", - # "PileUpPtBB", - # "PileUpPtEC1", - # "PileUpPtEC2", - # "PileUpPtHF", - # "PileUpMuZero", - # "PileUpEnvelope", - # "SubTotalPileUp", - # "SubTotalRelative", - # "SubTotalPt", - # "SubTotalScale", - # "SubTotalAbsolute", - # "SubTotalMC", - "Total", - # "TotalNoFlavor", - # "TotalNoTime", - # "TotalNoFlavorNoTime", - # "FlavorZJet", - # "FlavorPhotonJet", - # "FlavorPureGluon", - # "FlavorPureQuark", - # "FlavorPureCharm", - # "FlavorPureBottom", - "CorrelationGroupMPFInSitu", - "CorrelationGroupIntercalibration", - "CorrelationGroupbJES", - "CorrelationGroupFlavor", - "CorrelationGroupUncorrelated", - ], - }, - }) - - # JER - cfg.x.jer = DotDict.wrap({ - "Jet": { - "campaign": jer_campaign, - "version": jer_version, - "jet_type": jet_type, - }, - }) - - ################################################################################################ - # tau settings - ################################################################################################ - - # tau tagger name - # (needed by TECConfig below as well as tau selection) - if run == 2: - # TODO: still correct? what about 2p5? - cfg.x.tau_tagger = "DeepTau2017v2p1" - elif run == 3: - # https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun3?rev=9 - cfg.x.tau_tagger = "DeepTau2018v2p5" - else: - assert False - - # tec config - from columnflow.calibration.cms.tau import TECConfig - corrector_kwargs = {"wp": "Medium", "wp_VSe": "VVLoose"} if run == 3 else {} - cfg.x.tec = TECConfig(tagger=cfg.x.tau_tagger, corrector_kwargs=corrector_kwargs) - - # pec config - from columnflow.calibration.cms.egamma import EGammaCorrectionConfig - - cfg.x.eec = EGammaCorrectionConfig(correction_set="Scale") - cfg.x.eer = EGammaCorrectionConfig(correction_set="Smearing") - - # tau ID working points - if campaign.x.version < 10: - cfg.x.tau_id_working_points = DotDict.wrap({ - "tau_vs_e": {"vvvloose": 1, "vvloose": 2, "vloose": 4, "loose": 8, "medium": 16, "tight": 32, "vtight": 64, "vvtight": 128}, # noqa - "tau_vs_jet": {"vvvloose": 1, "vvloose": 2, "vloose": 4, "loose": 8, "medium": 16, "tight": 32, "vtight": 64, "vvtight": 128}, # noqa - "tau_vs_mu": {"vloose": 1, "loose": 2, "medium": 4, "tight": 8}, - }) - else: - cfg.x.tau_id_working_points = DotDict.wrap({ - "tau_vs_e": {"vvvloose": 1, "vvloose": 2, "vloose": 3, "loose": 4, "medium": 5, "tight": 6, "vtight": 7, "vvtight": 8}, # noqa - "tau_vs_jet": {"vvvloose": 1, "vvloose": 2, "vloose": 3, "loose": 4, "medium": 5, "tight": 6, "vtight": 7, "vvtight": 8}, # noqa - "tau_vs_mu": {"vloose": 1, "loose": 2, "medium": 3, "tight": 4}, - }) - - # tau trigger working points - cfg.x.tau_trigger_working_points = DotDict.wrap({ - "id_vs_jet_v0": "VVLoose", - "id_vs_jet_gv0": ("Loose", "VVLoose"), - "id_vs_mu_single": "Tight", - "id_vs_mu_cross": "VLoose", - "id_vs_e_single": "VVLoose", - "id_vs_e_cross": "VVLoose", - "trigger_corr": "VVLoose", - }) - - ################################################################################################ - # electron settings - ################################################################################################ - - # names of electron correction sets and working points - # (used in the electron_sf producer) - from columnflow.production.cms.electron import ElectronSFConfig - if run == 2: - e_postfix = "" - if year == 2016: - e_postfix = "preVFP" if campaign.has_tag("preVFP") else "postVFP" - cfg.x.electron_sf_names = ElectronSFConfig( - correction="UL-Electron-ID-SF", - campaign=f"{year}{e_postfix}", - working_point="wp80iso", - ) - elif run == 3: - if year == 2022: - cmpgn = "2022Re-recoBCD" if campaign.has_tag("preEE") else "2022Re-recoE+PromptFG" - elif year == 2023: - cmpgn = "2023PromptC" if campaign.has_tag("preBPix") else "2023PromptD" - cfg.x.electron_sf_names = ElectronSFConfig( - correction="Electron-ID-SF", - campaign=cmpgn, - working_point="wp80iso", - ) - else: - assert False - - ################################################################################################ - # muon settings - ################################################################################################ - - # names of muon correction sets and working points - # (used in the muon producer) - from columnflow.production.cms.muon import MuonSFConfig - if run == 2: - cfg.x.muon_sf_names = MuonSFConfig( - correction="NUM_TightRelIso_DEN_TightIDandIPCut", - ) - elif run == 3: - cfg.x.muon_sf_names = MuonSFConfig( - correction="NUM_TightPFIso_DEN_TightID", - ) - else: - assert False - - ################################################################################################ - # b tagging - ################################################################################################ - - # b-tag working points - btag_key = f"{year}{campaign.x.postfix}" - if run == 2: - # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16preVFP?rev=6 - # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL16postVFP?rev=8 - # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL17?rev=15 - # https://twiki.cern.ch/twiki/bin/view/CMS/BtagRecommendation106XUL18?rev=18 - cfg.x.btag_working_points = DotDict.wrap({ - "deepjet": { - "loose": {"2016APV": 0.0508, "2016": 0.0480, "2017": 0.0532, "2018": 0.0490}[btag_key], - "medium": {"2016APV": 0.2598, "2016": 0.2489, "2017": 0.3040, "2018": 0.2783}[btag_key], - "tight": {"2016APV": 0.6502, "2016": 0.6377, "2017": 0.7476, "2018": 0.7100}[btag_key], - }, - "deepcsv": { - "loose": {"2016APV": 0.2027, "2016": 0.1918, "2017": 0.1355, "2018": 0.1208}[btag_key], - "medium": {"2016APV": 0.6001, "2016": 0.5847, "2017": 0.4506, "2018": 0.4168}[btag_key], - "tight": {"2016APV": 0.8819, "2016": 0.8767, "2017": 0.7738, "2018": 0.7665}[btag_key], - }, - }) - elif run == 3: - # https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22 - # https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22EE - # https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer23 - # https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer23BPix - cfg.x.btag_working_points = DotDict.wrap({ - "deepjet": { - "loose": {"2022": 0.0583, "2022EE": 0.0614, "2023": 0.0479, "2023BPix": 0.048}[btag_key], - "medium": {"2022": 0.3086, "2022EE": 0.3196, "2023": 0.2431, "2023BPix": 0.2435}[btag_key], - "tight": {"2022": 0.7183, "2022EE": 0.73, "2023": 0.6553, "2023BPix": 0.6563}[btag_key], - "xtight": {"2022": 0.8111, "2022EE": 0.8184, "2023": 0.7667, "2023BPix": 0.7671}[btag_key], - "xxtight": {"2022": 0.9512, "2022EE": 0.9542, "2023": 0.9459, "2023BPix": 0.9483}[btag_key], - }, - "particleNet": { - "loose": {"2022": 0.047, "2022EE": 0.0499, "2023": 0.0358, "2023BPix": 0.0359}[btag_key], - "medium": {"2022": 0.245, "2022EE": 0.2605, "2023": 0.1917, "2023BPix": 0.1919}[btag_key], - "tight": {"2022": 0.6734, "2022EE": 0.6915, "2023": 0.6172, "2023BPix": 0.6133}[btag_key], - "xtight": {"2022": 0.7862, "2022EE": 0.8033, "2023": 0.7515, "2023BPix": 0.7544}[btag_key], - "xxtight": {"2022": 0.961, "2022EE": 0.9664, "2023": 0.9659, "2023BPix": 0.9688}[btag_key], - }, - "robustParticleTransformer": { - "loose": {"2022": 0.0849, "2022EE": 0.0897, "2023": 0.0681, "2023BPix": 0.0683}[btag_key], - "medium": {"2022": 0.4319, "2022EE": 0.451, "2023": 0.3487, "2023BPix": 0.3494}[btag_key], - "tight": {"2022": 0.8482, "2022EE": 0.8604, "2023": 0.7969, "2023BPix": 0.7994}[btag_key], - "xtight": {"2022": 0.9151, "2022EE": 0.9234, "2023": 0.8882, "2023BPix": 0.8877}[btag_key], - "xxtight": {"2022": 0.9874, "2022EE": 0.9893, "2023": 0.9883, "2023BPix": 0.9883}[btag_key], - }, - }) - else: - assert False - - # JEC uncertainty sources propagated to btag scale factors - # (names derived from contents in BTV correctionlib file) - cfg.x.btag_sf_jec_sources = [ - "", # same as "Total" - "Absolute", - "AbsoluteMPFBias", - "AbsoluteScale", - "AbsoluteStat", - f"Absolute_{year}", - "BBEC1", - f"BBEC1_{year}", - "EC2", - f"EC2_{year}", - "FlavorQCD", - "Fragmentation", - "HF", - f"HF_{year}", - "PileUpDataMC", - "PileUpPtBB", - "PileUpPtEC1", - "PileUpPtEC2", - "PileUpPtHF", - "PileUpPtRef", - "RelativeBal", - "RelativeFSR", - "RelativeJEREC1", - "RelativeJEREC2", - "RelativeJERHF", - "RelativePtBB", - "RelativePtEC1", - "RelativePtEC2", - "RelativePtHF", - "RelativeSample", - f"RelativeSample_{year}", - "RelativeStatEC", - "RelativeStatFSR", - "RelativeStatHF", - "SinglePionECAL", - "SinglePionHCAL", - "TimePtEta", - ] - - from columnflow.production.cms.btag import BTagSFConfig - cfg.x.btag_sf_deepjet = BTagSFConfig( - correction_set="deepJet_shape", - jec_sources=cfg.x.btag_sf_jec_sources, - discriminator="btagDeepFlavB", - ) - if run == 3: - cfg.x.btag_sf_pnet = BTagSFConfig( - correction_set="particleNet_shape", - jec_sources=cfg.x.btag_sf_jec_sources, - discriminator="btagPNetB", - ) - - ################################################################################################ - # dataset / process specific methods - ################################################################################################ - - # top pt reweighting - # https://twiki.cern.ch/twiki/bin/view/CMS/TopPtReweighting?rev=31 - from columnflow.production.cms.top_pt_weight import TopPtWeightConfig - cfg.x.top_pt_weight = TopPtWeightConfig( - params={ - "a": 0.0615, - "a_up": 0.0615 * 1.5, - "a_down": 0.0615 * 0.5, - "b": -0.0005, - "b_up": -0.0005 * 1.5, - "b_down": -0.0005 * 0.5, - }, - pt_max=500.0, - ) - - ################################################################################################ - # shifts - ################################################################################################ - - # load jec sources - with open(os.path.join(thisdir, "jec_sources.yaml"), "r") as f: - all_jec_sources = yaml.load(f, yaml.Loader)["names"] - - # register shifts - cfg.add_shift(name="nominal", id=0) - - cfg.add_shift(name="tune_up", id=1, type="shape", tags={"disjoint_from_nominal"}) - cfg.add_shift(name="tune_down", id=2, type="shape", tags={"disjoint_from_nominal"}) - - cfg.add_shift(name="hdamp_up", id=3, type="shape", tags={"disjoint_from_nominal"}) - cfg.add_shift(name="hdamp_down", id=4, type="shape", tags={"disjoint_from_nominal"}) - - cfg.add_shift(name="mtop_up", id=5, type="shape", tags={"disjoint_from_nominal"}) - cfg.add_shift(name="mtop_down", id=6, type="shape", tags={"disjoint_from_nominal"}) - - cfg.add_shift(name="minbias_xs_up", id=7, type="shape") - cfg.add_shift(name="minbias_xs_down", id=8, type="shape") - add_shift_aliases( - cfg, - "minbias_xs", - { - "pu_weight": "pu_weight_{name}", - "normalized_pu_weight": "normalized_pu_weight_{name}", - }, - ) - - cfg.add_shift(name="top_pt_up", id=9, type="shape") - cfg.add_shift(name="top_pt_down", id=10, type="shape") - add_shift_aliases(cfg, "top_pt", {"top_pt_weight": "top_pt_weight_{direction}"}) - - for jec_source in cfg.x.jec.Jet.uncertainty_sources: - idx = all_jec_sources.index(jec_source) - cfg.add_shift( - name=f"jec_{jec_source}_up", - id=5000 + 2 * idx, - type="shape", - tags={"jec"}, - aux={"jec_source": jec_source}, - ) - cfg.add_shift( - name=f"jec_{jec_source}_down", - id=5001 + 2 * idx, - type="shape", - tags={"jec"}, - aux={"jec_source": jec_source}, - ) - add_shift_aliases( - cfg, - f"jec_{jec_source}", - { - "Jet.pt": "Jet.pt_{name}", - "Jet.mass": "Jet.mass_{name}", - f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", - f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", - }, - ) - # TODO: check the JEC de/correlation across years and the interplay with btag weights - if ("" if jec_source == "Total" else jec_source) in cfg.x.btag_sf_jec_sources: - add_shift_aliases( - cfg, - f"jec_{jec_source}", - { - "normalized_btag_deepjet_weight": "normalized_btag_deepjet_weight_{name}", - "normalized_njet_btag_deepjet_weight": "normalized_njet_btag_deepjet_weight_{name}", - "normalized_btag_pnet_weight": "normalized_btag_pnet_weight_{name}", - "normalized_njet_btag_pnet_weight": "normalized_njet_btag_pnet_weight_{name}", - }, - ) - - cfg.add_shift(name="jer_up", id=6000, type="shape", tags={"jer"}) - cfg.add_shift(name="jer_down", id=6001, type="shape", tags={"jer"}) - add_shift_aliases( - cfg, - "jer", - { - "Jet.pt": "Jet.pt_{name}", - "Jet.mass": "Jet.mass_{name}", - f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", - f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", - }, - ) - - for i, (match, dm) in enumerate(itertools.product(["jet", "e"], [0, 1, 10, 11])): - cfg.add_shift(name=f"tec_{match}_dm{dm}_up", id=20 + 2 * i, type="shape", tags={"tec"}) - cfg.add_shift(name=f"tec_{match}_dm{dm}_down", id=21 + 2 * i, type="shape", tags={"tec"}) - add_shift_aliases( - cfg, - f"tec_{match}_dm{dm}", - { - "Tau.pt": "Tau.pt_{name}", - "Tau.mass": "Tau.mass_{name}", - f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", - f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", - }, - ) - - # start at id=50 - cfg.x.tau_unc_names = [ - "jet_dm0", "jet_dm1", "jet_dm10", - "e_barrel", "e_endcap", - "mu_0p0To0p4", "mu_0p4To0p8", "mu_0p8To1p2", "mu_1p2To1p7", "mu_1p7To2p3", - ] - for i, unc in enumerate(cfg.x.tau_unc_names): - cfg.add_shift(name=f"tau_{unc}_up", id=50 + 2 * i, type="shape") - cfg.add_shift(name=f"tau_{unc}_down", id=51 + 2 * i, type="shape") - add_shift_aliases(cfg, f"tau_{unc}", {"tau_weight": f"tau_weight_{unc}_" + "{direction}"}) - - cfg.add_shift(name="tautau_trigger_up", id=80, type="shape") - cfg.add_shift(name="tautau_trigger_down", id=81, type="shape") - add_shift_aliases(cfg, "tautau_trigger", {"tau_trigger_weight": "tau_trigger_weight_tautau_{direction}"}) - cfg.add_shift(name="etau_trigger_up", id=82, type="shape") - cfg.add_shift(name="etau_trigger_down", id=83, type="shape") - add_shift_aliases(cfg, "etau_trigger", {"tau_trigger_weight": "tau_trigger_weight_etau_{direction}"}) - cfg.add_shift(name="mutau_trigger_up", id=84, type="shape") - cfg.add_shift(name="mutau_trigger_down", id=85, type="shape") - add_shift_aliases(cfg, "mutau_trigger", {"tau_trigger_weight": "tau_trigger_weight_mutau_{direction}"}) - # no uncertainty for di-tau VBF trigger existing yet - # cfg.add_shift(name="mutau_trigger_up", id=86, type="shape") - # cfg.add_shift(name="tautauvbf_trigger_down", id=86, type="shape") - # add_shift_aliases(cfg, "tautauvbf_trigger", {"tau_trigger_weight": "tau_trigger_weight_tautauvbf_{direction}"}) - - cfg.add_shift(name="e_up", id=90, type="shape") - cfg.add_shift(name="e_down", id=91, type="shape") - add_shift_aliases(cfg, "e", {"electron_weight": "electron_weight_{direction}"}) - - # electron shifts - # TODO: energy corrections are currently only available for 2022 (Jan 2025) - # include them when available - if run == 3 and year == 2022: - logger.debug("adding ees and eer shifts") - cfg.add_shift(name="ees_up", id=92, type="shape", tags={"eec"}) - cfg.add_shift(name="ees_down", id=93, type="shape", tags={"eec"}) - add_shift_aliases( - cfg, - "ees", - { - "Electron.pt": "Electron.pt_scale_{direction}", - }, - ) - - cfg.add_shift(name="eer_up", id=94, type="shape", tags={"eer"}) - cfg.add_shift(name="eer_down", id=95, type="shape", tags={"eer"}) - add_shift_aliases( - cfg, - "eer", - { - "Electron.pt": "Electron.pt_res_{direction}", - }, - ) - - cfg.add_shift(name="mu_up", id=100, type="shape") - cfg.add_shift(name="mu_down", id=101, type="shape") - add_shift_aliases(cfg, "mu", {"muon_weight": "muon_weight_{direction}"}) - - cfg.x.btag_unc_names = [ - "hf", "lf", - f"hfstats1_{year}", f"hfstats2_{year}", - f"lfstats1_{year}", f"lfstats2_{year}", - "cferr1", "cferr2", - ] - for i, unc in enumerate(cfg.x.btag_unc_names): - cfg.add_shift(name=f"btag_{unc}_up", id=110 + 2 * i, type="shape") - cfg.add_shift(name=f"btag_{unc}_down", id=111 + 2 * i, type="shape") - add_shift_aliases( - cfg, - f"btag_{unc}", - { - "normalized_btag_deepjet_weight": f"normalized_btag_deepjet_weight_{unc}_" + "{direction}", - "normalized_njet_btag_deepjet_weight": f"normalized_njet_btag_deepjet_weight_{unc}_" + "{direction}", - # TODO: pnet here, or is this another shift? probably the latter - }, - ) - - cfg.add_shift(name="pdf_up", id=130, type="shape", tags={"lhe_weight"}) - cfg.add_shift(name="pdf_down", id=131, type="shape", tags={"lhe_weight"}) - add_shift_aliases( - cfg, - "pdf", - { - "pdf_weight": "pdf_weight_{direction}", - "normalized_pdf_weight": "normalized_pdf_weight_{direction}", - }, - ) - - cfg.add_shift(name="murmuf_up", id=140, type="shape", tags={"lhe_weight"}) - cfg.add_shift(name="murmuf_down", id=141, type="shape", tags={"lhe_weight"}) - add_shift_aliases( - cfg, - "murmuf", - { - "murmuf_weight": "murmuf_weight_{direction}", - "normalized_murmuf_weight": "normalized_murmuf_weight_{direction}", - }, - ) - - ################################################################################################ - # external files - ################################################################################################ - - cfg.x.external_files = DotDict() - - # helper - def add_external(name, value): - if isinstance(value, dict): - value = DotDict.wrap(value) - cfg.x.external_files[name] = value - - if run == 2: - json_postfix = "" - if year == 2016: - json_postfix = f"{'pre' if campaign.has_tag('preVFP') else 'post'}VFP" - json_pog_era = f"{year}{json_postfix}_UL" - json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-377439e8" - elif run == 3: - json_pog_era = f"{year}_Summer{year2}{campaign.x.postfix}" - json_mirror = "/afs/cern.ch/user/m/mrieger/public/mirrors/jsonpog-integration-377439e8" - else: - assert False - - # common files - # (versions in the end are for hashing in cases where file contents changed but paths did not) - add_external("lumi", { - "golden": { - 2016: ("/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions16/13TeV/Legacy_2016/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt", "v1"), # noqa - 2017: ("/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/Legacy_2017/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt", "v1"), # noqa - 2018: ("/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions18/13TeV/Legacy_2018/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt", "v1"), # noqa, - # https://twiki.cern.ch/twiki/bin/view/CMS/PdmVRun3Analysis?rev=161#Year_2022 - 2022: ("https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions22/Cert_Collisions2022_355100_362760_Golden.json", "v1"), # noqa - # https://twiki.cern.ch/twiki/bin/view/CMS/PdmVRun3Analysis?rev=161#Year_2023 - 2023: ("https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions23/Cert_Collisions2023_366442_370790_Golden.json", "v1"), # noqa - }[year], - "normtag": { - 2016: ("/afs/cern.ch/user/l/lumipro/public/Normtags/normtag_PHYSICS.json", "v1"), - 2017: ("/afs/cern.ch/user/l/lumipro/public/Normtags/normtag_PHYSICS.json", "v1"), - 2018: ("/afs/cern.ch/user/l/lumipro/public/Normtags/normtag_PHYSICS.json", "v1"), - # https://twiki.cern.ch/twiki/bin/view/CMS/PdmVRun3Analysis?rev=161#Year_2022 - 2022: ("/cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_BRIL.json", "v1"), - # https://twiki.cern.ch/twiki/bin/view/CMS/PdmVRun3Analysis?rev=161#Year_2023 - 2023: ("/cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_BRIL.json", "v1"), - }[year], - }) - # pileup weight corrections - add_external("pu_sf", (f"{json_mirror}/POG/LUM/{json_pog_era}/puWeights.json.gz", "v1")) - # jet energy correction - add_external("jet_jerc", (f"{json_mirror}/POG/JME/{json_pog_era}/jet_jerc.json.gz", "v1")) - # jet veto map - add_external("jet_veto_map", (f"{json_mirror}/POG/JME/{json_pog_era}/jetvetomaps.json.gz", "v1")) - # btag scale factor - add_external("btag_sf_corr", (f"{json_mirror}/POG/BTV/{json_pog_era}/btagging.json.gz", "v1")) - # Tobias' tautauNN (https://github.com/uhh-cms/tautauNN) - add_external("res_pdnn", ("/afs/cern.ch/work/m/mrieger/public/hbt/models/res_prod3/model_fold0.tgz", "v1")) - # non-parametric (flat) training up to mX = 800 GeV - add_external("res_dnn", ("/afs/cern.ch/work/m/mrieger/public/hbt/models/res_prod3_nonparam/model_fold0.tgz", "v1")) - - # run specific files - if run == 2: - # tau energy correction and scale factors - add_external("tau_sf", (f"{json_mirror}/POG/TAU/{json_pog_era}/tau.json.gz", "v1")) - # tau trigger scale factors - add_external("tau_trigger_sf", (f"{json_mirror}/POG/TAU/{json_pog_era}/tau.json.gz", "v1")) - # electron scale factors - add_external("electron_sf", (f"{json_mirror}/POG/EGM/{json_pog_era}/electron.json.gz", "v1")) - # muon scale factors - add_external("muon_sf", (f"{json_mirror}/POG/MUO/{json_pog_era}/muon_Z.json.gz", "v1")) - # met phi correction - add_external("met_phi_corr", (f"{json_mirror}/POG/JME/{json_pog_era}/met.json.gz", "v1")) - # hh-btag repository with TF saved model directories trained on Run2 UL samples - add_external("hh_btag_repo", ("https://gitlab.cern.ch/hh/bbtautau/hh-btag/-/archive/master/hh-btag-master.tar.gz", "v2")) # noqa - - elif run == 3: - # muon scale factors - add_external("muon_sf", (f"{json_mirror}/POG/MUO/{json_pog_era}/muon_Z.json.gz", "v1")) - # electron scale factors - add_external("electron_sf", (f"{json_mirror}/POG/EGM/{json_pog_era}/electron.json.gz", "v1")) - # hh-btag repository with TF saved model directories trained on 22+23 samples using pnet - add_external("hh_btag_repo", ("https://gitlab.cern.ch/hh/bbtautau/hh-btag/-/archive/master/hh-btag-master.tar.gz", "v3")) # noqa - - # TODO: electron (and photon) energy corrections and smearing are only available for 2022 - # include them when available - if year == 2022: - # electron energy correction and smearing - add_external("electron_ss", (f"{json_mirror}/POG/EGM/{json_pog_era}/electronSS.json.gz", "v1")) - - # tau energy correction and scale factors - # TODO: remove tag pog mirror once integrated centrally - json_mirror_tau_pog = "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-taupog" - if year == 2022: - tau_pog_era = f"{year}_{'pre' if campaign.has_tag('preEE') else 'post'}EE" - tau_pog_dir = tau_pog_era - elif year == 2023: - tau_pog_era = f"{year}_{'pre' if campaign.has_tag('preBPix') else 'post'}BPix" - tau_pog_dir = str(year) # yes, it's inconsistent w.r.t. 2022 - add_external("tau_sf", (f"{json_mirror_tau_pog}/POG/TAU/{tau_pog_dir}/tau_DeepTau2018v2p5_{tau_pog_era}.json.gz", "v1")) # noqa - else: - assert False - - ################################################################################################ - # reductions - ################################################################################################ - - # target file size after MergeReducedEvents in MB - cfg.x.reduced_file_size = 512.0 - - # columns to keep after certain steps - cfg.x.keep_columns = DotDict.wrap({ - "cf.ReduceEvents": { - # mandatory - ColumnCollection.MANDATORY_COFFEA, - # object info - "Jet.{pt,eta,phi,mass,hadronFlavour,puId,hhbtag,btagPNet*,btagDeep*}", - "HHBJet.{pt,eta,phi,mass,hadronFlavour,puId,hhbtag,btagPNet*,btagDeep*}", - "NonHHBJet.{pt,eta,phi,mass,hadronFlavour,puId,hhbtag,btagPNet*,btagDeep*}", - "VBFJet.{pt,eta,phi,mass,hadronFlavour,puId,hhbtag,btagPNet*,btagDeep*}", - "FatJet.*", - "SubJet{1,2}.*", - "Electron.*", - "Muon.*", - "Tau.*", - f"{cfg.x.met_name}.{{pt,phi,significance,covXX,covXY,covYY}}", - "PV.npvs", - "FatJet.*", - # additional event info - "deterministic_seed", "Jet.deterministic_seed", - # keep all columns added during selection, but skip cutflow feature - ColumnCollection.ALL_FROM_SELECTOR, - skip_column("cutflow.*"), - }, - "cf.MergeSelectionMasks": { - "cutflow.*", - }, - "cf.UniteColumns": { - "*", - }, - }) - - ################################################################################################ - # weights - ################################################################################################ - - # configurations for all possible event weight columns as keys in an OrderedDict, - # mapped to shift instances they depend on - # (this info is used by weight producers) - get_shifts = functools.partial(get_shifts_from_sources, cfg) - cfg.x.event_weights = DotDict({ - "normalization_weight": [], - "normalization_weight_inclusive": [], - "pdf_weight": get_shifts("pdf"), - "murmuf_weight": get_shifts("murmuf"), - "normalized_pu_weight": get_shifts("minbias_xs"), - # TODO: enable again once we have btag cuts - # "normalized_njet_btag_deepjet_weight": get_shifts(*(f"btag_{unc}" for unc in cfg.x.btag_unc_names)), - "electron_weight": get_shifts("e"), - "muon_weight": get_shifts("mu"), - "tau_weight": get_shifts(*(f"tau_{unc}" for unc in cfg.x.tau_unc_names)), - "tau_trigger_weight": get_shifts("etau_trigger", "mutau_trigger", "tautau_trigger"), - }) - - # define per-dataset event weights - for dataset in cfg.datasets: - if dataset.has_tag("has_top"): - dataset.x.event_weights = {"top_pt_weight": get_shifts("top_pt")} - - cfg.x.shift_groups = { - "jec": [ - shift_inst.name for shift_inst in cfg.shifts - if shift_inst.has_tag(("jec", "jer")) - ], - "lepton_sf": [ - shift_inst.name for shift_inst in (*get_shifts("e"), *get_shifts("mu")) - ], - "tec": [ - shift_inst.name for shift_inst in cfg.shifts - if shift_inst.has_tag(("tec")) - ], - "eec": [ - shift_inst.name for shift_inst in cfg.shifts - if shift_inst.has_tag(("ees", "eer")) - ], - "ees": [ - shift_inst.name for shift_inst in cfg.shifts - if shift_inst.has_tag(("ees")) - ], - "eer": [ - shift_inst.name for shift_inst in cfg.shifts - if shift_inst.has_tag(("eer")) - ], - "btag_sf": [ - shift_inst.name for shift_inst in get_shifts(*(f"btag_{unc}" for unc in cfg.x.btag_unc_names)) - ], - "pdf": [shift_inst.name for shift_inst in get_shifts("pdf")], - "murmuf": (shift_inst.name for shift_inst in get_shifts("murmuf")), - "pu": [shift_inst.name for shift_inst in get_shifts("minbias_xs")], - } - - ################################################################################################ - # external configs: channels, categories, met filters, triggers, variables - ################################################################################################ - - # channels - cfg.add_channel(name="etau", id=1, label=r"$e\tau_{h}$") - cfg.add_channel(name="mutau", id=2, label=r"$\mu\tau_{h}$") - cfg.add_channel(name="tautau", id=3, label=r"$\tau_{h}\tau_{h}$") - cfg.add_channel(name="ee", id=4, label=r"$ee$") - cfg.add_channel(name="mumu", id=5, label=r"$\mu\mu$") - cfg.add_channel(name="emu", id=6, label=r"$e\mu$") - - # add categories - from hbt.config.categories import add_categories - add_categories(cfg) - - # add variables - from hbt.config.variables import add_variables - add_variables(cfg) - - # add met filters - from hbt.config.met_filters import add_met_filters - add_met_filters(cfg) - - # add triggers - if year == 2016: - from hbt.config.triggers import add_triggers_2016 - add_triggers_2016(cfg) - elif year == 2017: - from hbt.config.triggers import add_triggers_2017 - add_triggers_2017(cfg) - elif year == 2018: - from hbt.config.triggers import add_triggers_2018 - add_triggers_2018(cfg) - elif year == 2022: - from hbt.config.triggers import add_triggers_2022 - add_triggers_2022(cfg) - elif year == 2023: - from hbt.config.triggers import add_triggers_2023 - add_triggers_2023(cfg) - else: - raise False - - ################################################################################################ - # hist hooks - ################################################################################################ - - cfg.x.hist_hooks = DotDict() - - # simple blinding - cfg.x.hist_hooks.blind = lambda task, hists: {p: h for p, h in hists.items() if not p.is_data} - - # qcd estimation - from hbt.hist_hooks.qcd import add_hooks as add_qcd_hooks - add_qcd_hooks(cfg) - - # binning - from hbt.hist_hooks.binning import add_hooks as add_binning_hooks - add_binning_hooks(cfg) - - ################################################################################################ - # LFN settings - ################################################################################################ - - # custom method and sandbox for determining dataset lfns - cfg.x.get_dataset_lfns = None - cfg.x.get_dataset_lfns_sandbox = None - - # whether to validate the number of obtained LFNs in GetDatasetLFNs - cfg.x.validate_dataset_lfns = limit_dataset_files is None and not sync_mode - - # custom lfn retrieval method in case the underlying campaign is custom uhh - if cfg.campaign.x("custom", {}).get("creator") == "uhh": - def get_dataset_lfns( - dataset_inst: od.Dataset, - shift_inst: od.Shift, - dataset_key: str, - ) -> list[str]: - # destructure dataset_key into parts and create the store path - dataset_id, full_campaign, tier = dataset_key.split("/")[1:] - main_campaign, sub_campaign = full_campaign.split("-", 1) - path = f"store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0" - - # create the lfn base directory, local or remote - dir_cls = law.wlcg.WLCGDirectoryTarget - fs = f"wlcg_fs_{cfg.campaign.x.custom['name']}" - local_fs = f"local_fs_{cfg.campaign.x.custom['name']}" - if law.config.has_section(local_fs): - base = law.target.file.remove_scheme(law.config.get_expanded(local_fs, "base")) - if os.path.exists(base): - dir_cls = law.LocalDirectoryTarget - fs = local_fs - lfn_base = dir_cls(path, fs=fs) - - # loop though files and interpret paths as lfns - print(lfn_base) - return sorted( - "/" + lfn_base.child(basename, type="f").path.lstrip("/") - for basename in lfn_base.listdir(pattern="*.root") - ) - - # define the lfn retrieval function - cfg.x.get_dataset_lfns = get_dataset_lfns - - # define a custom sandbox - cfg.x.get_dataset_lfns_sandbox = dev_sandbox("bash::$CF_BASE/sandboxes/cf.sh") - - # define custom remote fs's to look at - cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: [ - f"local_fs_{cfg.campaign.x.custom['name']}", - f"wlcg_fs_{cfg.campaign.x.custom['name']}", - ] - - return cfg diff --git a/hbt/config/jec_sources.yaml b/hbt/config/jec_sources.yaml deleted file mode 100644 index 28a15189..00000000 --- a/hbt/config/jec_sources.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# full list of jec sources in a fixed order that is used to assign consistent ids across configs -# (please add new sources at the bottom to preserve the order of existing ones) -names: - - AbsoluteStat - - AbsoluteScale - - AbsoluteSample - - AbsoluteFlavMap - - AbsoluteMPFBias - - Fragmentation - - SinglePionECAL - - SinglePionHCAL - - FlavorQCD - - TimePtEta - - RelativeJEREC1 - - RelativeJEREC2 - - RelativeJERHF - - RelativePtBB - - RelativePtEC1 - - RelativePtEC2 - - RelativePtHF - - RelativeBal - - RelativeSample - - RelativeFSR - - RelativeStatFSR - - RelativeStatEC - - RelativeStatHF - - PileUpDataMC - - PileUpPtRef - - PileUpPtBB - - PileUpPtEC1 - - PileUpPtEC2 - - PileUpPtHF - - PileUpMuZero - - PileUpEnvelope - - SubTotalPileUp - - SubTotalRelative - - SubTotalPt - - SubTotalScale - - SubTotalAbsolute - - SubTotalMC - - Total - - TotalNoFlavor - - TotalNoTime - - TotalNoFlavorNoTime - - FlavorZJet - - FlavorPhotonJet - - FlavorPureGluon - - FlavorPureQuark - - FlavorPureCharm - - FlavorPureBottom - - TimeRunA - - TimeRunB - - TimeRunC - - TimeRunD - - CorrelationGroupMPFInSitu - - CorrelationGroupIntercalibration - - CorrelationGroupbJES - - CorrelationGroupFlavor - - CorrelationGroupUncorrelated diff --git a/hbt/config/styles.py b/hbt/config/styles.py deleted file mode 100644 index 1fb705d2..00000000 --- a/hbt/config/styles.py +++ /dev/null @@ -1,251 +0,0 @@ -# coding: utf-8 - -""" -Style definitions. -""" - -from __future__ import annotations - -from collections import defaultdict - -import order as od - -from columnflow.util import DotDict, try_int - - -def setup_plot_styles(config: od.Config) -> None: - """ - Setup plot styles. - """ - # general settings - config.x.default_general_settings = { - "cms_label": "wip", - "whitespace_fraction": 0.31, - } - - # default component configs - gridspec = { - "height_ratios": [3, 0.9], - } - legend = { - "borderpad": 0, "borderaxespad": 1.2, "columnspacing": 1.8, "labelspacing": 0.28, - "fontsize": 16, "cf_line_breaks": True, "cf_short_labels": False, - } - ratio = { - "yloc": "center", - } - annotate = { - "fontsize": 18, "style": "italic", "xycoords": "axes fraction", "xy": (0.035, 0.955), - } - - # wide legend - # - 3 columns, backgrounds in first 2 columns - # - shortened process labels - # - changed annotation (channel) position to fit right under legend - wide_legend = legend | { - "ncols": 3, "loc": "upper left", "cf_entries_per_column": legend_entries_per_column, - "cf_short_labels": True, - } - annotate_wide = annotate | { - "xy": (0.035, 0.765), - } - - # wide extended legend, same as wide legend except - # - process labels are not shortened - # - annotation (channel) moved slightly down to fut under (now taller) legend - wide_ext_legend = wide_legend | { - "cf_short_labels": False, - } - annotate_wide_ext = annotate_wide | { - "xy": (0.035, 0.750), - } - - # construct named style configs - config.x.custom_style_config_groups = { - "default": (default_cfg := { - "gridspec_cfg": gridspec, - "rax_cfg": ratio, - "legend_cfg": legend, - "annotate_cfg": annotate, - }), - "wide_legend": (wide_legend_cfg := { - **default_cfg, - "legend_cfg": wide_legend, - "annotate_cfg": annotate_wide, - }), - "wide_ext_legend": { - **wide_legend_cfg, - "legend_cfg": wide_ext_legend, - "annotate_cfg": annotate_wide_ext, - }, - } - - config.x.default_custom_style_config = "wide_legend" - config.x.default_blinding_threshold = 0 - - -def stylize_processes(config: od.Config) -> None: - """ - Adds process colors and adjust labels. - """ - cfg = config - - # recommended cms colors - # see https://cms-analysis.docs.cern.ch/guidelines/plotting/colors - cfg.x.colors = DotDict( - bright_blue="#3f90da", - dark_blue="#011c87", - purple="#832db6", - aubergine="#964a8b", - yellow="#f7c331", - bright_orange="#ffa90e", - dark_orange="#e76300", - red="#bd1f01", - teal="#92dadd", - grey="#94a4a2", - brown="#a96b59", - green="#30c300", - dark_green="#269c00", - ) - - for kl in ["0", "1", "2p45", "5"]: - if (p := config.get_process(f"hh_ggf_hbb_htt_kl{kl}_kt1", default=None)): - p.color1 = cfg.x.colors.dark_blue - kappa_label = create_kappa_label(**{r"\lambda": kl, "t": "1"}) - p.label = rf"$HH_{{ggf}} \rightarrow bb\tau\tau$ __SCALE____SHORT____BREAK__({kappa_label})" - - for kv, k2v, kl in [ - ("1", "1", "1"), - ("1", "0", "1"), - ("1", "2", "1"), - ("1", "1", "2"), - ("1p74", "1p37", "14p4"), - ("m0p012", "0p03", "10p2"), - ("m0p758", "1p44", "m19p3"), - ("m0p962", "0p959", "m1p43"), - ("m1p21", "1p94", "m0p94"), - ("m1p6", "2p72", "m1p36"), - ("m1p83", "3p57", "m3p39"), - ("m2p12", "3p87", "m5p96"), - ]: - if (p := config.get_process(f"hh_vbf_hbb_htt_kv{kv}_k2v{k2v}_kl{kl}", default=None)): - p.color1 = cfg.x.colors.brown - kappa_label = create_kappa_label(**{"2V": k2v, r"\lambda": kl, "V": kv}) - p.label = rf"$HH_{{vbf}} \rightarrow bb\tau\tau$ __SCALE____SHORT____BREAK__({kappa_label})" - - if (p := config.get_process("h", default=None)): - p.color1 = cfg.x.colors.teal - - if (p := config.get_process("tt", default=None)): - p.color1 = cfg.x.colors.bright_orange - p.label = r"$t\bar{t}$" - - if (p := config.get_process("st", default=None)): - p.color1 = cfg.x.colors.purple - - if (p := config.get_process("dy", default=None)): - p.color1 = cfg.x.colors.bright_blue - - if (p := config.get_process("vv", default=None)): - p.color1 = cfg.x.colors.yellow - - if (p := config.get_process("vvv", default=None)): - p.color1 = cfg.x.colors.yellow - - if (p := config.get_process("multiboson", default=None)): - p.color1 = cfg.x.colors.yellow - - if (p := config.get_process("w", default=None)): - p.color1 = cfg.x.colors.aubergine - p.label = "W" - - if (p := config.get_process("z", default=None)): - p.color1 = cfg.x.colors.aubergine - p.label = "Z" - - if (p := config.get_process("v", default=None)): - p.color1 = cfg.x.colors.aubergine - - if (p := config.get_process("all_v", default=None)): - p.color1 = cfg.x.colors.aubergine - - if (p := config.get_process("ewk", default=None)): - p.color1 = cfg.x.colors.dark_orange - - if (p := config.get_process("ttv", default=None)): - p.color1 = cfg.x.colors.grey - p.label = r"$t\bar{t} + V$" - - if (p := config.get_process("ttvv", default=None)): - p.color1 = cfg.x.colors.grey - p.label = r"$t\bar{t} + VV$" - - if (p := config.get_process("tt_multiboson", default=None)): - p.color1 = cfg.x.colors.grey - - if (p := config.get_process("qcd", default=None)): - p.color1 = cfg.x.colors.red - - -def legend_entries_per_column(ax, handles: list, labels: list, n_cols: int) -> list[int]: - """ - Control number of entries such that backgrounds are in the first n - 1 columns, and everything - else in the last one. - """ - # get number of background and remaining entries - n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch") - n_other = len(handles) - n_backgrounds - - # fill number of entries per column - entries_per_col = n_cols * [0] - n_bkg_cols = n_cols - # set last column if non-backgrounds are present - if n_other: - entries_per_col[-1] = n_other - n_bkg_cols -= 1 - # fill background columns - for i in range(n_bkg_cols): - entries_per_col[i] = n_backgrounds // n_bkg_cols + (n_backgrounds % n_bkg_cols > i) - - return entries_per_col - - -def kappa_str_to_num(value: str) -> int | float: - """ - Converts a string-encoded kappa value to an actual number. An integer is returned if possible, - and a float otherwise. Examples: - - .. code-block:: python - - kappa_str_to_num("1") # 1 - kappa_str_to_num("2.45") # 2.45 - kappa_str_to_num("m1p7") # -1.7 - """ - value = value.replace("p", ".").replace("m", "-") - return int(value) if try_int(value) else float(value) - - -def group_kappas(**kappas: dict[str, str]) -> dict[int | float, list[str]]: - """ - Groups kappa values by their coupling strength. Examples: - - .. code-block:: python - - group_kappas(kl="1", kt="1") # {1: ["kl", "kt"]} - group_kappas(kl="2p45", kt="1") # {2.45: ["kl"], 1: ["kt"]} - group_kappas(k2v="0", kv="1", kl="1") # {0: ["k2v"], 1: ["kv", "kl"]} - """ - str_groups = defaultdict(list) - for k, v in kappas.items(): - str_groups[v].append(k) - - # convert keys to numbers - return {kappa_str_to_num(k): v for k, v in str_groups.items()} - - -def create_kappa_label(*, sep: str = ",", **kappas: dict[str, str]) -> str: - parts = [] - for v, _kappas in group_kappas(**kappas).items(): - k_str = "=".join(rf"\kappa_{{{k}}}"for k in _kappas) - parts.append(f"{k_str}={v}") - return "$" + sep.join(parts) + "$" diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py deleted file mode 100644 index f39b9eb7..00000000 --- a/hbt/config/triggers.py +++ /dev/null @@ -1,1535 +0,0 @@ -# coding: utf-8 - -""" -Definition of triggers. - -General requirement from the lepton selection: -For cross triggers, the lepton leg (lepton= {"e", "mu"}) must be defined before the tau leg. -An error here would be caught in the lepton selection, but it is better to avoid it. - -Convention for Ids: -- 1xx: single muon triggers -- 2xx: single electron triggers -- 3xx: mu-tau triggers -- 4xx: e-tau triggers -- 5xx: tau-tau triggers -- 6xx: vbf triggers -- 7xx: tau tau jet triggers -- 8xx: quadjet triggers - -Starting from xx = 01 and with a unique name for each path across all years. - -Current status: -101 -> HLT_IsoMu22 -102 -> HLT_IsoMu22_eta2p1 -103 -> HLT_IsoTkMu22 -104 -> HLT_IsoTkMu22_eta2p1 -105 -> HLT_IsoMu24 -106 -> HLT_IsoMu27 - -201 -> HLT_Ele25_eta2p1_WPTight_Gsf -202 -> HLT_Ele32_WPTight_Gsf -203 -> HLT_Ele32_WPTight_Gsf_L1DoubleEG -204 -> HLT_Ele35_WPTight_Gsf -205 -> HLT_Ele30_WPTight_Gsf - -301 -> HLT_IsoMu19_eta2p1_LooseIsoPFTau20 -302 -> HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1 -303 -> HLT_IsoMu20_eta2p1_LooseChargedIsoPFTau27_eta2p1_CrossL1 -304 -> HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1 - -401 -> HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20_SingleL1 -402 -> HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20 -403 -> HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30 -404 -> HLT_Ele24_eta2p1_WPTight_Gsf_LooseChargedIsoPFTau30_eta2p1_CrossL1 -405 -> HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1 - -501 -> HLT_DoubleMediumIsoPFTau35_Trk1_eta2p1_Reg -502 -> HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg -503 -> HLT_DoubleMediumChargedIsoPFTau35_Trk1_eta2p1_Reg -504 -> HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg -505 -> HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg -506 -> HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg -507 -> HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1 -508 -> HLT_DoubleMediumChargedIsoPFTauHPS40_Trk1_eta2p1 -509 -> HLT_DoubleMediumChargedIsoDisplacedPFTauHPS32_Trk1_eta2p1 - -601 -> HLT_VBF_DoubleLooseChargedIsoPFTau20_Trk1_eta2p1_Reg -602 -> HLT_VBF_DoubleMediumDeepTauPFTauHPS20_eta2p1 -603 -> HLT_VBF_DoubleLooseChargedIsoPFTauHPS20_Trk1_eta2p1 -604 -> HLT_DoublePFJets40_Mass500_MediumDeepTauPFTauHPS45_L2NN_MediumDeepTauPFTauHPS20_eta2p1 - -701 -> HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60 -702 -> HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet75 -""" - -from __future__ import annotations - -import functools - -import order as od - -from columnflow.util import DotDict - -from hbt.config.util import Trigger, TriggerLeg, TriggerBits as Bits - - -# use the CCLub names for the trigger bits and improve them when necessary -trigger_bits = DotDict.wrap({ - # for v12: - # checked with https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py - # and in particular https://github.com/cms-sw/cmssw/blob/2defd844e96613d2438b690d10b79c773e02ab57/PhysicsTools/NanoAOD/python/triggerObjects_cff.py # noqa - # for v14: - # from https://github.com/cms-sw/cmssw/tree/f50cf84669608dbe67fd8430660abe651d5b46fd/PhysicsTools/NanoAOD/python/triggerObjects_cff.py # noqa - # last update in https://github.com/cms-sw/cmssw/blob/CMSSW_14_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py - - "e": { - "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14="v12"), - "WPTightTrackIso": Bits(v12=2, v14="v12"), - "WPLooseTrackIso": Bits(v12=4, v14="v12"), - "OverlapFilterPFTau": Bits(v12=8, v14="v12"), - "DiElectron": Bits(v12=16), - "DiElectronLeg1": Bits(v14=16), - "DiElectronLeg2": Bits(v14=32), - "MuEle": Bits(v12=32, v14=64), - "EleTau": Bits(v12=64, v14=128), - "TripleElectron": Bits(v12=128, v14=256), - "SingleMuonDiEle": Bits(v12=256, v14=512), - "DiMuonSingleEle": Bits(v12=512, v14=1024), - "SingleEle_L1DoubleAndSingleEle": Bits(v12=1024, v14=2048), - "SingleEle_CaloIdVT_GsfTrkIdT": Bits(v12=2048, v14=4096), - "SingleEle_PFJet": Bits(v12=4096, v14=8192), - "Photon175_Photon200": Bits(v12=8192, v14=16384), - "DoubleEle_CaloIdL_MW_seeded": Bits(v14=32768), - "DoubleEle_CaloIdL_MW_unseeded": Bits(v14=65536), - "EleTauPNet": Bits(v14=131072), - }, - "mu": { - "TrkIsoVVL": Bits(v12=1, v14="v12"), - "Iso": Bits(v12=2, v14="v12"), - "OverlapFilterPFTau": Bits(v12=4, v14="v12"), - "SingleMuon": Bits(v12=8, v14="v12"), - "DiMuon": Bits(v12=16, v14="v12"), - "MuEle": Bits(v12=32, v14="v12"), - "MuTau": Bits(v12=64, v14="v12"), - "TripleMuon": Bits(v12=128, v14="v12"), - "DiMuonSingleEle": Bits(v12=256, v14="v12"), - "SingleMuonDiEle": Bits(v12=512, v14="v12"), - "Mu50": Bits(v12=1024, v14="v12"), - "Mu100": Bits(v12=2048, v14="v12"), - "SingleMuonSinglePhoton": Bits(v12=4096, v14="v12"), - "MuTauPNet": Bits(v14=8192), - }, - "tau": { # general comment: lot of v14 paths contain PNet paths, not available in v12, e.g. OverlapFilterIsoEle - "LooseChargedIso": Bits(v12=1), - "Loose": Bits(v14=1), - "MediumChargedIso": Bits(v12=2), - "Medium": Bits(v14=2), - "TightChargedIso": Bits(v12=4), - "Tight": Bits(v14=4), - "DeepTau": Bits(v12=8, v14="v12"), - "PNet": Bits(v14=16), - "TightOOSCPhotons": Bits(v12=16), - "HPS": Bits(v12=32, v14=268435456), - "ChargedIso": Bits(v14=32), - "ChargedIsoDiTau": Bits(v12=64), - "Dxy": Bits(v14=64), - "DeepTauDiTau": Bits(v12=128, v14=2048 + 8), # manually created bit combinations for v14 - "ETauFilter": Bits(v14=128), - "MuTauFilter": Bits(v14=256), - "OverlapFilterIsoEle": Bits(v12=256, v14=4096), # contains HPS in v14, not in v12 - "OverlapFilterIsoMu": Bits(v12=512, v14=8192), # contains HPS in v14, not in v12 - "SingleTau": Bits(v14=512), - "SingleTauOrTauMet": Bits(v12=1024), # more general paths than SingleTau in v14 - "VBFDiTau": Bits(v14=1024), - "VBFpDoublePFTau_run2": Bits(v12=2048), - "VBFpDoublePFTau_run3": Bits(v12=4096), # warning: this trigger bit expects "ChargedIso" in the filter name, this does not correspond to our actual VBF filter name # noqa - "DiTau": Bits(v14=2048), - "DiPFJetAndDiTau": Bits(v12=8192), - "DiTauAndPFJet": Bits(v12=16384, v14="v12"), - "DisplacedTau": Bits(v12=32768), - "ETauDisplaced": Bits(v14=32768), - "MuTauDisplaced": Bits(v14=65536), - "DiTauDisplaced": Bits(v14=131072), - "Monitoring": Bits(v12=65536, v14=262144), - "MonitoringForVBFIsoTau": Bits(v14=524288), - "MonitoringDiTauAndPFJet": Bits(v14=1048576), - "MonitoringMuTauDisplaced": Bits(v14=2097152), - "MonitoringDiTau": Bits(v14=8388608), - "VBFDoubleTauMonitoring": Bits(v14=33554432), - "OverlapFilter": Bits(v14=16777216), - "RegionalPaths": Bits(v12=131072), - "L1SeededPaths": Bits(v12=262144), - "MatchL1HLT": Bits(v12=262144, v14=134217728), # for v12: alias for v12-v14 compatibility - "1Prong": Bits(v12=524288), - "OneProng": Bits(v14=4194304), # just changed "1" to "One" for v14, still means different filters - "SinglePFTauFilter": Bits(v14=536870912), - "VBFSingleTau": Bits(v14=1073741824), - }, - "jet": { - "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14="v12"), - "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14="v12"), - "PFJetFilterTwoC30": Bits(v12=4, v14="v12"), - "4PFCentralJetTightIDPt30": Bits(v12=8, v14="v12"), - "4PFCentralJetTightIDPt35": Bits(v12=16, v14="v12"), - "QuadCentralJet30": Bits(v12=32, v14="v12"), - "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14="v12"), - "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14="v12"), - "3PFCentralJetTightIDPt40": Bits(v12=256, v14="v12"), - "3PFCentralJetTightIDPt45": Bits(v12=512, v14="v12"), - "L1sQuadJetsHT": Bits(v12=1024, v14="v12"), - "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14="v12"), - "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14="v12"), - "1PFCentralJetLooseID75": Bits(v12=8192, v14="v12"), - "2PFCentralJetLooseID60": Bits(v12=16384, v14="v12"), - "3PFCentralJetLooseID45": Bits(v12=32768, v14="v12"), - "4PFCentralJetLooseID40": Bits(v12=65536, v14="v12"), - "DoubleTau+Jet": Bits(v12=131072, v14="v12"), # v14 also contains PNet paths - "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14="v12"), # more general VBFDiTauJets in v14 TODO: change name? # noqa - "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14="v12"), # more general VBFSingleTauJets in v14 TODO: change name? # noqa - "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14="v12"), - "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14="v12"), - "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14="v12"), - "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14="v12"), - "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14="v12"), - "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14="v12"), - "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14="v12"), - "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14="v12"), - "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14="v12"), - "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14="v12"), - "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14="v12"), - }, -}) - - -def get_bit_sum(nano_version: int, obj_name: str, names: list[str | None]) -> int: - return sum( - trigger_bits[obj_name][name].get(nano_version) - for name in names - if name is not None - ) or None - - -# 2016 triggers as per AN of CMS-HIG-20-010 (AN2018_121_v11-1) -def add_triggers_2016(config: od.Config) -> None: - """ - Adds all triggers to a *config*. For the conversion from filter names to trigger bits, see - https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. - """ - config.x.triggers = od.UniqueObjectIndex(Trigger) - - # - # e tauh - # - # from https://twiki.cern.ch/twiki/bin/view/CMS/TauTrigger#Tau_Triggers_in_NanoAOD_2016 - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20_SingleL1", - id=401, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=26.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=22.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - ), - # does not exist for run F on but should only be used until run 276215 -> which era? - # TODO: to be checked - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era <= "E"), - tags={"cross_trigger", "cross_e_tau"}, - ) - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20", - id=402, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=26.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=22.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - ), - # does not exist for run F on but should only be used between run 276215 and 278270 -> which eras? - # TODO: to be checked - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era <= "E"), - tags={"cross_trigger", "cross_e_tau"}, - ) - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30", - id=403, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=26.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=32.0, # TODO - # filter names: - # - trigger_bits=None, # TODO - ), - ), - # does not exist until run E but should only be used after run 278270 -> which era? - # TODO: to be checked - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era >= "E"), - tags={"cross_trigger", "cross_e_tau"}, - ) - - # - # mu tauh - # - config.x.triggers.add( - name="HLT_IsoMu19_eta2p1_LooseIsoPFTau20", - id=301, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=22, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=23, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"cross_trigger", "cross_mu_tau"}, - ) - config.x.triggers.add( - name="HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1", - id=302, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=22, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=23, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"cross_trigger", "cross_mu_tau"}, - ) - - # - # tauh tauh - # - config.x.triggers.add( - name="HLT_DoubleMediumIsoPFTau35_Trk1_eta2p1_Reg", - id=501, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=38, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=38, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or ("B" <= dataset_inst.x.era <= "F")), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg", - id=502, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=38, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=38, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "H"), - tags={"cross_trigger", "cross_tau_tau"}, - ) - - # - # vbf - # - # none - - if config.campaign.has_tag("preVFP"): - # - # single electron - # - config.x.triggers.add( - name="HLT_Ele25_eta2p1_WPTight_Gsf", - id=201, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=28, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"single_trigger", "single_e"}, - ) - - # - # single muon - # - config.x.triggers.add( - name="HLT_IsoMu22", - id=101, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=25, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"single_trigger", "single_mu"}, - ) - config.x.triggers.add( - name="HLT_IsoMu22_eta2p1", - id=102, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=25, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"single_trigger", "single_mu"}, - ) - config.x.triggers.add( - name="HLT_IsoTkMu22", - id=103, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=25, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"single_trigger", "single_mu"}, - ) - config.x.triggers.add( - name="HLT_IsoTkMu22_eta2p1", - id=104, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=25, # TODO - # filter names: - # TODO - trigger_bits=None, # TODO - ), - ), - tags={"single_trigger", "single_mu"}, - ) - - -def add_triggers_2017(config: od.Config) -> None: - """ - Adds all triggers to a *config*. For the conversion from filter names to trigger bits, see - https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. - """ - config.x.triggers = od.UniqueObjectIndex(Trigger) - - # - # single electron - # - config.x.triggers.add( - name="HLT_Ele32_WPTight_Gsf", - id=202, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=35.0, - # filter names: - # hltEle32WPTightGsfTrackIsoFilter - trigger_bits=2, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"single_trigger", "single_e"}, - ) - config.x.triggers.add( - name="HLT_Ele32_WPTight_Gsf_L1DoubleEG", - id=203, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=35.0, - # filter names: - # hltEle32L1DoubleEGWPTightGsfTrackIsoFilter - # hltEGL1SingleEGOrFilter - trigger_bits=2 + 1024, - ), - ), - tags={"single_trigger", "single_e"}, - ) - config.x.triggers.add( - name="HLT_Ele35_WPTight_Gsf", - id=204, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=38.0, - # filter names: - # hltEle35noerWPTightGsfTrackIsoFilter - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_e"}, - ) - - # - # single muon - # - config.x.triggers.add( - name="HLT_IsoMu24", - id=105, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=26.0, - # filter names: - # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p07 - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_mu"}, - ) - config.x.triggers.add( - name="HLT_IsoMu27", - id=106, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=29.0, - # filter names: - # hltL3crIsoL1sMu22Or25L1f0L2f10QL3f27QL3trkIsoFiltered0p07 - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_mu"}, - ) - - # - # e tauh - # - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPTight_Gsf_LooseChargedIsoPFTau30_eta2p1_CrossL1", - id=404, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=27.0, - # filter names: - # hltEle24erWPTightGsfTrackIsoFilterForTau - # hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30 - trigger_bits=2 + 64, - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=35.0, - # filter names: - # hltSelectedPFTau30LooseChargedIsolationL1HLTMatched - # hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30 - trigger_bits=1024 + 256, - ), - ), - tags={"cross_trigger", "cross_e_tau"}, - ) - - # - # mu tauh - # - config.x.triggers.add( - name="HLT_IsoMu20_eta2p1_LooseChargedIsoPFTau27_eta2p1_CrossL1", - id=303, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=22.0, - # filter names: - # hltL3crIsoL1sMu18erTau24erIorMu20erTau24erL1f0L2f10QL3f20QL3trkIsoFiltered0p07 - # hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded - trigger_bits=2 + 64, - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=32.0, - # filter names: - # hltSelectedPFTau27LooseChargedIsolationAgainstMuonL1HLTMatched or - # hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded - trigger_bits=1024 + 512, - ), - ), - tags={"cross_trigger", "cross_mu_tau"}, - ) - - # - # tauh tauh - # - config.x.triggers.add( - name="HLT_DoubleMediumChargedIsoPFTau35_Trk1_eta2p1_Reg", - id=503, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - ), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg", - id=504, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1TightChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1TightChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg", - id=505, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg", - id=506, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1TightChargedIsolationDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1TightChargedIsolationDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - - # - # vbf - # - config.x.triggers.add( - name="HLT_VBF_DoubleLooseChargedIsoPFTau20_Trk1_eta2p1_Reg", - id=601, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=25.0, - # filter names: - # hltDoublePFTau20TrackPt1LooseChargedIsolation - trigger_bits=2048, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=25.0, - # filter names: - # hltDoublePFTau20TrackPt1LooseChargedIsolation - trigger_bits=2048, - ), - # additional leg infos for vbf jets - # TODO check if vbf legs are needed - vbf1=TriggerLeg( - # min_pt=115.0, - # filter names: - # hltMatchedVBFOnePFJet2CrossCleanedFromDoubleLooseChargedIsoPFTau20 - trigger_bits=1, - ), - vbf2=TriggerLeg( - # min_pt=40.0, - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleLooseChargedIsoPFTau20 - trigger_bits=1, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"cross_trigger", "cross_tau_tau_vbf"}, - ) - - -def add_triggers_2018(config: od.Config) -> None: - config.x.triggers = od.UniqueObjectIndex(Trigger) - - # - # single electron - # - config.x.triggers.add( - name="HLT_Ele32_WPTight_Gsf", - id=202, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=35.0, - # filter names: - # hltEle32WPTightGsfTrackIsoFilter - trigger_bits=2, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"single_trigger", "single_e"}, - ) - config.x.triggers.add( - name="HLT_Ele35_WPTight_Gsf", - id=204, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=38.0, - # filter names: - # hltEle35noerWPTightGsfTrackIsoFilter - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_e"}, - ) - - # - # single muon - # - config.x.triggers.add( - name="HLT_IsoMu24", - id=105, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=26.0, - # filter names: - # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p07 - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_mu"}, - ) - config.x.triggers.add( - name="HLT_IsoMu27", - id=106, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=29.0, - # filter names: - # hltL3crIsoL1sMu22Or25L1f0L2f10QL3f27QL3trkIsoFiltered0p07 - trigger_bits=2, - ), - ), - tags={"single_trigger", "single_mu"}, - ) - - # - # e tauh - # - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPTight_Gsf_LooseChargedIsoPFTau30_eta2p1_CrossL1", - id=404, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=27.0, - # filter names: - # hltEle24erWPTightGsfTrackIsoFilterForTau - # hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30 - trigger_bits=2 + 64, - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=35.0, - # filter names: - # hltSelectedPFTau30LooseChargedIsolationL1HLTMatched - # hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30 - trigger_bits=1024 + 256, - ), - ), - # the non-HPS path existed only for data and is fully covered in MC below - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_e_tau"}, - ) - - # - # mu tauh - # - config.x.triggers.add( - name="HLT_IsoMu20_eta2p1_LooseChargedIsoPFTau27_eta2p1_CrossL1", - id=303, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=22.0, - # filter names: - # hltL3crIsoL1sMu18erTau24erIorMu20erTau24erL1f0L2f10QL3f20QL3trkIsoFiltered0p07 - # hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded - trigger_bits=2 + 64, - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=32.0, - # filter names: - # hltSelectedPFTau27LooseChargedIsolationAgainstMuonL1HLTMatched or - # hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded - trigger_bits=1024 + 512, - ), - ), - # the non-HPS path existed only for data and is fully covered in MC below - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_mu_tau"}, - ) - - # - # tauh tauh - # - config.x.triggers.add( - name="HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg", - id=504, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1TightChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=40.0, - # filter names: - # hltDoublePFTau35TrackPt1TightChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg", - id=505, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1MediumChargedIsolationAndTightOOSCPhotonsDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - config.x.triggers.add( - name="HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg", - id=506, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1TightChargedIsolationDz02Reg - trigger_bits=64, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=45.0, - # filter names: - # hltDoublePFTau40TrackPt1TightChargedIsolationDz02Reg - trigger_bits=64, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data), - tags={"cross_trigger", "cross_tau_tau"}, - ) - - # - # vbf - # - config.x.triggers.add( - name="HLT_VBF_DoubleLooseChargedIsoPFTau20_Trk1_eta2p1_Reg", - id=601, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=25.0, - # filter names: - # hltDoublePFTau20TrackPt1LooseChargedIsolation - trigger_bits=2048, - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=25.0, - # filter names: - # hltDoublePFTau20TrackPt1LooseChargedIsolation - trigger_bits=2048, - ), - # additional leg infos for vbf jets - vbf1=TriggerLeg( - # min_pt=115.0, - # filter names: - # hltMatchedVBFOnePFJet2CrossCleanedFromDoubleLooseChargedIsoPFTau20 - trigger_bits=1, - ), - vbf2=TriggerLeg( - # min_pt=40.0, - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleLooseChargedIsoPFTau20 - trigger_bits=1, - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), - tags={"cross_trigger", "cross_tau_tau_vbf"}, - ) - - -def add_triggers_2022(config: od.Config) -> None: - """ - Adds all triggers to a *config*. For the conversion from filter names to trigger bits, see - https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. - Tau Trigger: https://twiki.cern.ch/twiki/bin/viewauth/CMS/TauTrigger#Trigger_Table_for_2022 - Electron Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/EgHLTRunIIISummary - Muon Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/MuonHLT2022 - """ - # get the nano version - nano_version = config.campaign.x.version - get_bit_sum_v = functools.partial(get_bit_sum, nano_version) - - config.x.triggers = od.UniqueObjectIndex(Trigger) - - # - # single electron - # - config.x.triggers.add( - name="HLT_Ele30_WPTight_Gsf", - id=205, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltEle30WPTightGsfTrackIsoFilter - trigger_bits=get_bit_sum_v("e", [ - "WPTightTrackIso", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: ( - dataset_inst.is_mc or - dataset_inst.has_tag("etau") or - dataset_inst.has_tag("ee") or - dataset_inst.has_tag("emu_from_e") or - dataset_inst.has_tag("emu_from_mu") - )), - tags={"single_trigger", "single_e"}, - ) - - # - # single muon - # - config.x.triggers.add( - name="HLT_IsoMu24", - id=105, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p08 - trigger_bits=get_bit_sum_v("mu", [ - "Iso", - "SingleMuon", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: ( - dataset_inst.is_mc or - dataset_inst.has_tag("mutau") or - dataset_inst.has_tag("emu_from_e") or - dataset_inst.has_tag("emu_from_mu") or - dataset_inst.has_tag("mumu") - )), - tags={"single_trigger", "single_mu"}, - ) - - # - # e tauh - # - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1", - id=405, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoEle24WPTightGsfLooseETauWPDeepTauPFTau30 - trigger_bits=get_bit_sum_v("e", [ - "OverlapFilterPFTau", - "EleTau", - ]), - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoEle24WPTightGsfLooseETauWPDeepTauPFTau30 - trigger_bits=get_bit_sum_v("tau", [ - "DeepTau", - "HPS", - "OverlapFilterIsoEle", - "ETauFilter" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")), - tags={"cross_trigger", "cross_e_tau"}, - ) - - # - # mu tauh - # - config.x.triggers.add( - name="HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1", - id=304, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoMu20LooseMuTauWPDeepTauPFTau27L1Seeded - trigger_bits=get_bit_sum_v("mu", [ - "OverlapFilterPFTau", - "MuTau", - ]), - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoMu20LooseMuTauWPDeepTauPFTau27L1Seeded - trigger_bits=get_bit_sum_v("tau", [ - "DeepTau", - "HPS", - "OverlapFilterIsoMu", - "MuTauFilter" if nano_version == 14 else None, - "MatchL1HLT", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")), - tags={"cross_trigger", "cross_mu_tau"}, - ) - - # - # tauh tauh - # - config.x.triggers.add( - name="HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1", - id=507, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau35MediumDitauWPDeepTauL1HLTMatched - trigger_bits=get_bit_sum_v("tau", [ - "DeepTauDiTau", - "HPS", - "Medium" if nano_version == 14 else None, - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau35MediumDitauWPDeepTauL1HLTMatched - trigger_bits=get_bit_sum_v("tau", [ - "DeepTauDiTau", - "HPS", - "Medium" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau"}, - ) - - # - # vbf - # - # TODO: remove check when fully switched to v14 - if nano_version >= 14: - config.x.triggers.add( - name="HLT_VBF_DoubleMediumDeepTauPFTauHPS20_eta2p1", - id=602, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau20TrackDeepTauDitauWPForVBFIsoTau - # HPS and DeepTau actually redundant for v14 but needed for v12 - # as there is nothing else matching due to wrong VBFpDoublePFTau_run3 bit - trigger_bits=get_bit_sum_v("tau", [ - "VBFDiTau" if nano_version == 14 else None, - "HPS", - "DeepTau", - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau20TrackDeepTauDitauWPForVBFIsoTau - trigger_bits=get_bit_sum_v("tau", [ - "VBFDiTau" if nano_version == 14 else None, - "HPS", - "DeepTau", - ]), - ), - # TODO: check if vbf legs are needed - # additional leg infos for vbf jets - vbf1=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleMediumDeepTauDitauWPPFTauHPS20? - trigger_bits=get_bit_sum_v("jet", [ - "VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None, - ]), - ), - vbf2=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleMediumDeepTauDitauWPPFTauHPS20? - trigger_bits=get_bit_sum_v("jet", [ - "VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_vbf"}, - ) - - # Currently disabled since it may not be needed - # config.x.triggers.add( - # name="HLT_DoublePFJets40_Mass500_MediumDeepTauPFTauHPS45_L2NN_MediumDeepTauPFTauHPS20_eta2p1", - # id=604, - # legs=dict( - # TriggerLeg( - # pdg_id=15, - # # min_pt=25.0, - # trigger_bits=None, - # ), - # TriggerLeg( - # pdg_id=15, - # # min_pt=25.0, - # # filter names: - # trigger_bits=None, - # ) - # ], - # ) - - # - # tau tau jet - # - config.x.triggers.add( - name="HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60", - id=701, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet60 - trigger_bits=get_bit_sum_v("tau", [ - "DiTauAndPFJet", - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet60 - trigger_bits=get_bit_sum_v("tau", [ - "DiTauAndPFJet", - ]), - ), - jet=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet60 - trigger_bits=get_bit_sum_v("jet", [ - "DoubleTau+Jet", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_jet"}, - ) - - -def add_triggers_2023(config: od.Config) -> None: - """ - Adds all triggers to a *config*. For the conversion from filter names to trigger bits, see - https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. - """ - # get trigger bits for the requested nano version - nano_version = config.campaign.x.version - get_bit_sum_v = functools.partial(get_bit_sum, nano_version) - - config.x.triggers = od.UniqueObjectIndex(Trigger) - - # - # single electron - # - config.x.triggers.add( - name="HLT_Ele30_WPTight_Gsf", - id=205, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # WPTightTrackIso - trigger_bits=get_bit_sum_v("e", [ - "WPTightTrackIso", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: ( - dataset_inst.is_mc or - dataset_inst.has_tag("etau") or - dataset_inst.has_tag("ee") or - dataset_inst.has_tag("emu_from_e") or - dataset_inst.has_tag("emu_from_mu") - )), - tags={"single_trigger", "single_e"}, - ) - - # - # single muon - # - config.x.triggers.add( - name="HLT_IsoMu24", - id=105, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p08 (1mu + Iso) - trigger_bits=get_bit_sum_v("mu", [ - "Iso", - "SingleMuon", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: ( - dataset_inst.is_mc or - dataset_inst.has_tag("mutau") or - dataset_inst.has_tag("emu_from_e") or - dataset_inst.has_tag("emu_from_mu") or - dataset_inst.has_tag("mumu") - )), - tags={"single_trigger", "single_mu"}, - ) - - # - # e tauh - # - config.x.triggers.add( - name="HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1", - id=405, - legs=dict( - e=TriggerLeg( - pdg_id=11, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoEle24WPTightGsfLooseETauWPDeepTauPFTau30 (OverlapFilter) - trigger_bits=get_bit_sum_v("e", [ - "OverlapFilterPFTau", - "EleTau", - ]), - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoEle24WPTightGsfLooseETauWPDeepTauPFTau30 - trigger_bits=get_bit_sum_v("tau", [ - "DeepTau", - "HPS", - "OverlapFilterIsoEle", - "ETauFilter" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("etau")), - tags={"cross_trigger", "cross_e_tau"}, - ) - - # - # mu tauh - # - config.x.triggers.add( - name="HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1", - id=304, - legs=dict( - mu=TriggerLeg( - pdg_id=13, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterIsoMu20LooseMuTauWPDeepTauPFTau27L1Seeded (OverlapFilter PFTau) - trigger_bits=get_bit_sum_v("mu", [ - "OverlapFilterPFTau", - "MuTau", - ]), - ), - tau=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsSelectedPFTau27LooseMuTauWPDeepTauVsJetsAgainstMuonL1HLTMatched (DeepTau + HPS) - trigger_bits=get_bit_sum_v("tau", [ - "DeepTau", - "HPS", - "OverlapFilterIsoMu", - "MuTauFilter" if nano_version == 14 else None, - "MatchL1HLT", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("mutau")), - tags={"cross_trigger", "cross_mu_tau"}, - ) - - # - # tauh tauh - # - config.x.triggers.add( - name="HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1", - id=507, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau35MediumDitauWPDeepTauL1HLTMatched (Deeptau + HPS) - trigger_bits=get_bit_sum_v("tau", [ - "DeepTauDiTau", - "HPS", - "Medium" if nano_version == 14 else None, - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau35MediumDitauWPDeepTauL1HLTMatched (Deeptau + HPS) - trigger_bits=get_bit_sum_v("tau", [ - "DeepTauDiTau", - "HPS", - "Medium" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau"}, - ) - - # - # vbf - # - config.x.triggers.add( - name="HLT_VBF_DoubleMediumDeepTauPFTauHPS20_eta2p1", - id=602, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau20TrackDeepTauDitauWPForVBFIsoTau - trigger_bits=get_bit_sum_v("tau", [ - "VBFDiTau" if nano_version == 14 else None, - "HPS", - "DeepTau", - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau20TrackDeepTauDitauWPForVBFIsoTau - trigger_bits=get_bit_sum_v("tau", [ - "VBFDiTau" if nano_version == 14 else None, - "HPS", - "DeepTau", - ]), - ), - # TODO: check if vbf legs are needed - # additional leg infos for vbf jets - vbf1=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleMediumDeepTauDitauWPPFTauHPS20? - trigger_bits=get_bit_sum_v("jet", [ - "VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None, - ]), - ), - vbf2=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleMediumDeepTauDitauWPPFTauHPS20? - trigger_bits=get_bit_sum_v("jet", [ - "VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None, - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_vbf"}, - ) - - # - # tau tau jet - # - config.x.triggers.add( - name="HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60", - id=701, - legs=dict( - tau1=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau30MediumDitauWPDeepTauL1HLTMatchedDoubleTauJet - trigger_bits=get_bit_sum_v("tau", [ - "DiTauAndPFJet", - ]), - ), - tau2=TriggerLeg( - pdg_id=15, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsDoublePFTau30MediumDitauWPDeepTauL1HLTMatchedDoubleTauJet - trigger_bits=get_bit_sum_v("tau", [ - "DiTauAndPFJet", - ]), - ), - jet=TriggerLeg( - pdg_id=1, - # min_pt=None, # cut on reco objects, not TrigObj - # filter names: - # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet60 - trigger_bits=get_bit_sum_v("jet", [ - "DoubleTau+Jet", - ]), - ), - ), - applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.has_tag("tautau")), - tags={"cross_trigger", "cross_tau_tau_jet"}, - ) diff --git a/hbt/inference/default.py b/hbt/inference/default.py deleted file mode 100644 index ff99dd21..00000000 --- a/hbt/inference/default.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 - -""" -Default inference model. -""" - -import functools - -import law - -from columnflow.inference import inference_model, ParameterType, FlowStrategy -from columnflow.config_util import get_datasets_from_process - - -logger = law.logger.get_logger(__name__) - - -@inference_model -def default(self): - # gather config and campaign info - year2 = self.config_inst.campaign.x.year % 100 - campaign_suffix = "" - if self.config_inst.campaign.has_tag({"preEE", "preBPix"}): - campaign_suffix = "pre" - elif self.config_inst.campaign.has_tag({"postEE", "postBPix"}): - campaign_suffix = "post" - campaign_key = f"{year2}{campaign_suffix}" - - # helper - find_datasets = functools.partial(get_datasets_from_process, self.config_inst, strategy="all") - - # mapping between names of processes in the config and how combine datacards should see them - proc_map = dict([ - *[ - (f"hh_ggf_hbb_htt_kl{kl}_kt1", f"ggHH_kl_{kl}_kt_1_13p6TeV_hbbhtt") - for kl in ["0", "1", "2p45", "5"] - ], - ("tt", "ttbar"), - ("ttv", "ttbarV"), - ("ttvv", "ttbarVV"), - ("st", "singlet"), - ("dy", "DY"), - # ("z", "EWK"), # currently not used - ("w", "W"), - ("vv", "VV"), - ("vvv", "VVV"), - ("wh", "WH_htt"), - ("zh", "ZH_hbb"), - ("h_ggf", "ggH_htt"), - ("h_vbf", "qqH_htt"), - ("tth", "ttH_hbb"), - ("qcd", "QCD"), - ]) - - # - # categories - # - - for ch in ["etau", "mutau", "tautau"]: - for cat in ["res1b", "res2b", "boosted"]: - self.add_category( - f"cat_{campaign_key}_{ch}_{cat}", - config_category=f"{ch}__{cat}__os__iso", - config_variable="res_dnn_hh_fine", - config_data_datasets=["data_*"], - data_from_processes=[ - combine_name for proc_name, combine_name in proc_map.items() - if ( - not self.config_inst.get_process(proc_name).has_tag("nonresonant_signal") and - proc_name != "qcd" - ) - ], - mc_stats=10.0, - flow_strategy=FlowStrategy.move, - ) - - # - # processes - # - - for proc_name, combine_name in proc_map.items(): - proc_inst = self.config_inst.get_process(proc_name) - is_dynamic = proc_name == "qcd" - dataset_names = [] - if not is_dynamic: - dataset_names = [dataset.name for dataset in find_datasets(proc_name)] - if not dataset_names: - logger.debug( - f"skipping process {proc_name} in inference model {self.cls_name}, no matching " - f"datasets found in config {self.config_inst.name}", - ) - continue - self.add_process( - name=combine_name, - config_process=proc_name, - config_mc_datasets=dataset_names, - is_signal=proc_inst.has_tag("nonresonant_signal"), - is_dynamic=is_dynamic, - ) - - # - # parameters - # - - # general groups - self.add_parameter_group("experiment") - self.add_parameter_group("theory") - - # groups that contain parameters that solely affect the signal cross section and/or br - self.add_parameter_group("signal_norm_xs") - self.add_parameter_group("signal_norm_xsbr") - - # parameter that is added by the HH physics model, representing kl-dependent QCDscale + mtop - # uncertainties on the ggHH cross section - self.add_parameter_to_group("THU_HH", "theory") - self.add_parameter_to_group("THU_HH", "signal_norm_xs") - self.add_parameter_to_group("THU_HH", "signal_norm_xsbr") - - # theory uncertainties - self.add_parameter( - "BR_hbb", - type=ParameterType.rate_gauss, - process=["*_hbb", "*_hbbhtt"], - effect=(0.9874, 1.0124), - group=["theory", "signal_norm_xsbr"], - ) - self.add_parameter( - "BR_htt", - type=ParameterType.rate_gauss, - process=["*_htt", "*_hbbhtt"], - effect=(0.9837, 1.0165), - group=["theory", "signal_norm_xsbr"], - ) - self.add_parameter( - "BR_htt", - type=ParameterType.rate_gauss, - process=["tt*"], - effect=(0.9, 1.1), - group=["theory", "signal_norm_xsbr"], - ) - self.add_parameter( - "pdf_gg", # contains alpha_s - type=ParameterType.rate_gauss, - process="TT", - effect=1.042, - group=["theory"], - ) - self.add_parameter( - "pdf_Higgs_ggHH", # contains alpha_s - type=ParameterType.rate_gauss, - process="ggHH_*", - effect=1.023, - group=["theory", "signal_norm_xs", "signal_norm_xsbr"], - ) - # self.add_parameter( - # "pdf_Higgs_qqHH", # contains alpha_s - # type=ParameterType.rate_gauss, - # process="qqHH_*", - # effect=1.027, - # group=["theory", "signal_norm_xs", "signal_norm_xsbr"], - # ) - self.add_parameter( - "QCDscale_ttbar", - type=ParameterType.rate_gauss, - process="TT", - effect=(0.965, 1.024), - group=["theory"], - ) - # self.add_parameter( - # "QCDscale_qqHH", - # type=ParameterType.rate_gauss, - # process="qqHH_*", - # effect=(0.9997, 1.0005), - # group=["theory", "signal_norm_xs", "signal_norm_xsbr"], - # ) - - # lumi - lumi = self.config_inst.x.luminosity - for unc_name in lumi.uncertainties: - self.add_parameter( - unc_name, - type=ParameterType.rate_gauss, - effect=lumi.get(names=unc_name, direction=("down", "up"), factor=True), - group="experiment", - ) - - # btag - # for name in self.config_inst.x.btag_unc_names: - # self.add_parameter( - # f"CMS_btag_{name}", - # type=ParameterType.shape, - # config_shift_source=f"btag_{name}", - # group="experiment", - # ) - - # pileup - # self.add_parameter( - # "CMS_pileup_2022", - # type=ParameterType.shape, - # config_shift_source="minbias_xs", - # group="experiment", - # ) - - # - # cleanup - # - - self.cleanup(keep_parameters="THU_HH") - - -@inference_model -def default_no_shifts(self): - # same initialization as "default" above - default.init_func.__get__(self, self.__class__)() - - # - # remove all parameters that require a shift source other than nominal - # - - for category_name, process_name, parameter in self.iter_parameters(): - if parameter.type.is_shape or any(trafo.from_shape for trafo in parameter.transformations): - self.remove_parameter(parameter.name, process=process_name, category=category_name) - - # - # cleanup - # - - self.cleanup(keep_parameters="THU_HH") diff --git a/hbt/production/hh_mass.py b/hbt/production/hh_mass.py deleted file mode 100644 index 6759b1ec..00000000 --- a/hbt/production/hh_mass.py +++ /dev/null @@ -1,66 +0,0 @@ -import functools -from columnflow.production import Producer, producer -from columnflow.util import maybe_import -from columnflow.columnar_util import EMPTY_FLOAT, set_ak_column -from columnflow.production.util import attach_coffea_behavior - -np = maybe_import("numpy") -ak = maybe_import("awkward") - -set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) - - -@producer( - uses=( - "Electron.*", "Tau.*", "Jet.*", "HHBJet.*", - attach_coffea_behavior, - ), - produces={ - "hh.*", "diTau.*", "diBJet.*", - }, -) -def hh_mass(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - events = self[attach_coffea_behavior]( - events, - collections={"HHBJet": {"type_name": "Jet"}}, - **kwargs, - ) - - # total number of objects per event - n_bjets = ak.num(events.HHBJet, axis=1) - n_taus = ak.num(events.Tau, axis=1) - # mask to select events with exactly 2 taus - ditau_mask = (n_taus == 2) - diBjet_mask = (n_bjets == 2) - dihh_mask = ditau_mask & diBjet_mask - - # four-vector sum of first two elements of each object collection (possibly fewer) - diBJet = events.HHBJet.sum(axis=1) - diTau = events.Tau[:, :2].sum(axis=1) - hh = diBJet + diTau - - def save_interesting_properties( - source: ak.Array, - target_column: str, - column_values: ak.Array, - mask: ak.Array[bool], - ): - return set_ak_column_f32( - source, - target_column, - ak.where(mask, column_values, EMPTY_FLOAT), - ) - - # write out variables to the corresponding events array, applying certain masks - events = save_interesting_properties(events, "diBJet.mass", diBJet.mass, diBjet_mask) - events = save_interesting_properties(events, "diBJet.eta", diBJet.eta, diBjet_mask) - events = save_interesting_properties(events, "diBJet.pt", diBJet.pt, diBjet_mask) - events = save_interesting_properties(events, "diTau.mass", diTau.mass, ditau_mask) - events = save_interesting_properties(events, "diTau.eta", diTau.eta, ditau_mask) - events = save_interesting_properties(events, "diTau.pt", diTau.pt, ditau_mask) - events = save_interesting_properties(events, "hh.mass", hh.mass, dihh_mask) - events = save_interesting_properties(events, "hh.eta", hh.eta, dihh_mask) - events = save_interesting_properties(events, "hh.pt", hh.pt, dihh_mask) - - # return the events - return events diff --git a/hbt/production/weights.py b/hbt/production/weights.py deleted file mode 100644 index 2e333fbf..00000000 --- a/hbt/production/weights.py +++ /dev/null @@ -1,233 +0,0 @@ -# coding: utf-8 - -""" -Column production methods related to generic event weights. -""" - -from columnflow.production import Producer, producer -from columnflow.production.cms.pileup import pu_weight -from columnflow.production.cms.pdf import pdf_weights -from columnflow.util import maybe_import, safe_div, InsertableDict -from columnflow.columnar_util import set_ak_column - - -ak = maybe_import("awkward") -np = maybe_import("numpy") - - -@producer( - uses={ - pu_weight.PRODUCES, - # custom columns created upstream, probably by a producer - "process_id", - }, - # only run on mc - mc_only=True, -) -def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - for route in self[pu_weight].produced_columns: - weight_name = str(route) - if not weight_name.startswith("pu_weight"): - continue - - # if there are postfixes to veto (i.e. we are not in the nominal case) - # skip the weight if it has a vetoed postfix - if any(weight_name.endswith(postfix) for postfix in self.veto_postfix): - continue - - # create a weight vector starting with ones - norm_weight_per_pid = np.ones(len(events), dtype=np.float32) - - # fill weights with a new mask per unique process id (mostly just one) - for pid in self.unique_process_ids: - pid_mask = events.process_id == pid - norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] - - # multiply with actual weight - norm_weight_per_pid = norm_weight_per_pid * events[weight_name] - - # store it - norm_weight_per_pid = ak.values_astype(norm_weight_per_pid, np.float32) - events = set_ak_column(events, f"normalized_{weight_name}", norm_weight_per_pid, value_type=np.float32) - - return events - - -@normalized_pu_weight.init -def normalized_pu_weight_init(self: Producer) -> None: - self.veto_postfix = [] - if getattr(self, "global_shift_inst", None): - if not self.global_shift_inst.is_nominal: - self.veto_postfix.extend(("up", "down")) - self.produces |= { - f"normalized_{weight_name}" - for weight_name in (str(route) for route in self[pu_weight].produced_columns) - if weight_name.startswith("pu_weight") - if not any(weight_name.endswith(postfix) for postfix in self.veto_postfix) - } - - -@normalized_pu_weight.requires -def normalized_pu_weight_requires(self: Producer, reqs: dict) -> None: - from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req_different_branching( - self.task, - branch=-1 if self.task.is_workflow() else 0, - ) - - -@normalized_pu_weight.setup -def normalized_pu_weight_setup( - self: Producer, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, -) -> None: - # load the selection stats - selection_stats = self.task.cached_value( - key="selection_stats", - func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), - ) - - # get the unique process ids in that dataset - key = "sum_mc_weight_pu_weight_per_process" - self.unique_process_ids = list(map(int, selection_stats[key].keys())) - - # helper to get numerators and denominators - def numerator_per_pid(pid): - key = "sum_mc_weight_per_process" - return selection_stats[key].get(str(pid), 0.0) - - def denominator_per_pid(weight_name, pid): - key = f"sum_mc_weight_{weight_name}_per_process" - return selection_stats[key].get(str(pid), 0.0) - - # extract the ratio per weight and pid - self.ratio_per_pid = { - weight_name: { - pid: safe_div(numerator_per_pid(pid), denominator_per_pid(weight_name, pid)) - for pid in self.unique_process_ids - } - for weight_name in (str(route) for route in self[pu_weight].produced_columns) - if weight_name.startswith("pu_weight") - } - - -@producer( - # only run on mc - mc_only=True, -) -def normalized_pdf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - for postfix in self.postfixes: - # create the normalized weight - avg = self.average_pdf_weights[postfix] - normalized_weight = events[f"pdf_weight{postfix}"] / avg - - # store it - events = set_ak_column(events, f"normalized_pdf_weight{postfix}", normalized_weight, value_type=np.float32) - - return events - - -@normalized_pdf_weight.init -def normalized_pdf_weight_init(self: Producer) -> None: - - self.postfixes = [""] - if getattr(self, "global_shift_inst", None): - if self.global_shift_inst.is_nominal: - self.postfixes.extend(("_up", "_down")) - columns = {f"pdf_weight{postfix}" for postfix in self.postfixes} - - self.uses |= columns - self.produces |= {f"normalized_{column}" for column in columns} - - -@normalized_pdf_weight.requires -def normalized_pdf_weight_requires(self: Producer, reqs: dict) -> None: - from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req_different_branching( - self.task, - branch=-1 if self.task.is_workflow() else 0, - ) - - -@normalized_pdf_weight.setup -def normalized_pdf_weight_setup( - self: Producer, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, -) -> None: - # load the selection stats - selection_stats = self.task.cached_value( - key="selection_stats", - func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), - ) - - # save average weights - self.average_pdf_weights = { - postfix: safe_div(selection_stats[f"sum_pdf_weight{postfix}"], selection_stats["num_events"]) - for postfix in self.postfixes - } - - -# variation of the pdf weights producer that does not store up and down shifted weights -# but that stores all available pdf weights for the full treatment based on histograms -all_pdf_weights = pdf_weights.derive("all_pdf_weights", cls_dict={"store_all_weights": True}) - - -@producer( - # only run on mc - mc_only=True, -) -def normalized_murmuf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: - for postfix in self.postfixes: - # create the normalized weight - avg = self.average_murmuf_weights[postfix] - normalized_weight = events[f"murmuf_weight{postfix}"] / avg - - # store it - events = set_ak_column(events, f"normalized_murmuf_weight{postfix}", normalized_weight, value_type=np.float32) - - return events - - -@normalized_murmuf_weight.init -def normalized_murmuf_weight_init(self: Producer) -> None: - self.postfixes = [""] - if getattr(self, "global_shift_inst", None): - if self.global_shift_inst.is_nominal: - self.postfixes.extend(("_up", "_down")) - columns = {f"murmuf_weight{postfix}" for postfix in self.postfixes} - - self.uses |= columns - self.produces |= {f"normalized_{column}" for column in columns} - - -@normalized_murmuf_weight.requires -def normalized_murmuf_weight_requires(self: Producer, reqs: dict) -> None: - from columnflow.tasks.selection import MergeSelectionStats - reqs["selection_stats"] = MergeSelectionStats.req_different_branching( - self.task, - branch=-1 if self.task.is_workflow() else 0, - ) - - -@normalized_murmuf_weight.setup -def normalized_murmuf_weight_setup( - self: Producer, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, -) -> None: - # load the selection stats - selection_stats = self.task.cached_value( - key="selection_stats", - func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), - ) - - # save average weights - self.average_murmuf_weights = { - postfix: safe_div(selection_stats[f"sum_murmuf_weight{postfix}"], selection_stats["num_events"]) - for postfix in self.postfixes - } diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py deleted file mode 100644 index 9206fa79..00000000 --- a/hbt/selection/lepton.py +++ /dev/null @@ -1,922 +0,0 @@ -# coding: utf-8 - -""" -Lepton selection methods. -""" - -from __future__ import annotations - -import law - -from operator import or_ -from functools import reduce - -from columnflow.selection import Selector, SelectionResult, selector -from columnflow.columnar_util import ( - set_ak_column, sorted_indices_from_mask, flat_np_view, full_like, -) -from columnflow.util import maybe_import - -from hbt.util import IF_NANO_V9, IF_NANO_GE_V10 -from hbt.config.util import Trigger - -np = maybe_import("numpy") -ak = maybe_import("awkward") - - -logger = law.logger.get_logger(__name__) - - -def trigger_object_matching( - vectors1: ak.Array, - vectors2: ak.Array, - /, - *, - threshold: float = 0.5, - axis: int = 2, - event_mask: ak.Array | type(Ellipsis) | None = None, -) -> ak.Array: - """ - Helper to check per object in *vectors1* if there is at least one object in *vectors2* that - leads to a delta R metric below *threshold*. The final reduction is applied over *axis* of the - resulting metric table containing the full combinatorics. If an *event_mask* is given, the - the matching is performed only for those events, but a full object mask with the same shape as - that of *vectors1* is returned, which all objects set to *False* where not matching was done. - """ - # handle event masks - used_event_mask = event_mask is not None and event_mask is not Ellipsis - event_mask = Ellipsis if event_mask is None else event_mask - - # delta_r for all combinations - dr = vectors1[event_mask].metric_table(vectors2[event_mask]) - - # check per element in vectors1 if there is at least one matching element in vectors2 - any_match = ak.any(dr < threshold, axis=axis) - - # expand to original shape if an event mask was given - if used_event_mask: - full_any_match = full_like(vectors1.pt, False, dtype=bool) - flat_full_any_match = flat_np_view(full_any_match) - flat_full_any_match[flat_np_view(full_any_match | event_mask)] = flat_np_view(any_match) - any_match = full_any_match - - return any_match - - -def update_channel_ids( - events: ak.Array, - previous_channel_ids: ak.Array, - correct_channel_id: int, - channel_mask: ak.Array, -) -> ak.Array: - """ - Check if the events in the is_mask can be inside the given channel - or have already been sorted in another channel before. - """ - events_not_in_channel = (previous_channel_ids != 0) & (previous_channel_ids != correct_channel_id) - channel_id_overwrite = events_not_in_channel & channel_mask - if ak.any(channel_id_overwrite): - raise ValueError( - "The channel_ids of some events are being set to two different values. " - "The first event of this chunk concerned has index", - ak.where(channel_id_overwrite)[0], - ) - return ak.where(channel_mask, correct_channel_id, previous_channel_ids) - - -@selector( - uses={ - "Electron.{pt,eta,phi,dxy,dz,pfRelIso03_all,seediEtaOriX,seediPhiOriY}", - IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90}"), - IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90}"), - }, - exposed=False, -) -def electron_selection( - self: Selector, - events: ak.Array, - trigger: Trigger, - **kwargs, -) -> tuple[ak.Array | None, ak.Array]: - """ - Electron selection returning two sets of masks for default and veto electrons. - See https://twiki.cern.ch/twiki/bin/view/CMS/EgammaNanoAOD?rev=4 - """ - is_2016 = self.config_inst.campaign.x.year == 2016 - is_2022_post = ( - self.config_inst.campaign.x.year == 2022 and - self.config_inst.campaign.has_tag("postEE") - ) - is_single = trigger.has_tag("single_e") - is_cross = trigger.has_tag("cross_e_tau") - - # obtain mva flags, which might be located at different routes, depending on the nano version - if "mvaIso_WP80" in events.Electron.fields: - # >= nano v10 - # beware that the available Iso should be mvaFall17V2 for run2 files, not Winter22V1, - # check this in original root files if necessary - mva_iso_wp80 = events.Electron.mvaIso_WP80 - mva_iso_wp90 = events.Electron.mvaIso_WP90 - else: - # <= nano v9 - mva_iso_wp80 = events.Electron.mvaFall17V2Iso_WP80 - mva_iso_wp90 = events.Electron.mvaFall17V2Iso_WP90 - - # default electron mask - analysis_mask = None - control_mask = None - if is_single or is_cross: - min_pt = 26.0 if is_2016 else (31.0 if is_single else 25.0) - max_eta = 2.5 if is_single else 2.1 - default_mask = ( - (mva_iso_wp80 == 1) & - (abs(events.Electron.eta) < max_eta) & - (abs(events.Electron.dxy) < 0.045) & - (abs(events.Electron.dz) < 0.2) - ) - - # additional cut in 2022 post-EE - # see https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVRun3Analysis?rev=162#From_ECAL_and_EGM - if is_2022_post: - default_mask = default_mask & ~( - (events.Electron.eta > 1.556) & - (events.Electron.seediEtaOriX < 45) & - (events.Electron.seediPhiOriY > 72) - ) - - # control mask for the electron selection - control_mask = default_mask & (events.Electron.pt > 24) - analysis_mask = default_mask & (events.Electron.pt > min_pt) - - # veto electron mask (must be trigger independent!) - veto_mask = ( - (mva_iso_wp90 == 1) & - (abs(events.Electron.eta) < 2.5) & - (abs(events.Electron.dxy) < 0.045) & - (abs(events.Electron.dz) < 0.2) & - (events.Electron.pt > 10.0) - ) - - return analysis_mask, control_mask, veto_mask - - -@electron_selection.init -def electron_selection_init(self) -> None: - if self.config_inst.campaign.x.run == 3 and self.config_inst.campaign.x.year == 2022: - self.shifts |= { - shift_inst.name for shift_inst in self.config_inst.shifts - if shift_inst.has_tag(("ees", "eer")) - } - - -@selector( - uses={"{Electron,TrigObj}.{pt,eta,phi}"}, - exposed=False, -) -def electron_trigger_matching( - self: Selector, - events: ak.Array, - trigger: Trigger, - trigger_fired: ak.Array, - leg_masks: dict[str, ak.Array], - **kwargs, -) -> tuple[ak.Array]: - """ - Electron trigger matching. - """ - is_single = trigger.has_tag("single_e") - is_cross = trigger.has_tag("cross_e_tau") - - # catch config errors - assert is_single or is_cross - assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) - assert abs(trigger.legs["e"].pdg_id) == 11 - - return trigger_object_matching( - events.Electron, - events.TrigObj[leg_masks["e"]], - event_mask=trigger_fired, - ) - - -@selector( - uses={"Muon.{pt,eta,phi,mediumId,tightId,pfRelIso04_all,dxy,dz}"}, - exposed=False, -) -def muon_selection( - self: Selector, - events: ak.Array, - trigger: Trigger, - **kwargs, -) -> tuple[ak.Array | None, ak.Array]: - """ - Muon selection returning two sets of masks for default and veto muons. - - References: - - - Isolation working point: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2?rev=59 - - ID und ISO : https://twiki.cern.ch/twiki/bin/view/CMS/MuonUL2017?rev=15 - """ - is_2016 = self.config_inst.campaign.x.year == 2016 - is_single = trigger.has_tag("single_mu") - is_cross = trigger.has_tag("cross_mu_tau") - - # default muon mask - analysis_mask = None - control_mask = None - if is_single or is_cross: - if is_2016: - min_pt = 23.0 if is_single else 20.0 - else: - min_pt = 26.0 if is_single else 22.0 - default_mask = ( - (events.Muon.tightId == 1) & - (abs(events.Muon.eta) < 2.4) & - (abs(events.Muon.dxy) < 0.045) & - (abs(events.Muon.dz) < 0.2) & - (events.Muon.pfRelIso04_all < 0.15) - ) - control_mask = default_mask & (events.Muon.pt > 20) - analysis_mask = default_mask & (events.Muon.pt > min_pt) - - # veto muon mask (must be trigger independent!) - veto_mask = ( - ((events.Muon.mediumId == 1) | (events.Muon.tightId == 1)) & - (abs(events.Muon.eta) < 2.4) & - (abs(events.Muon.dxy) < 0.045) & - (abs(events.Muon.dz) < 0.2) & - (events.Muon.pfRelIso04_all < 0.3) & - (events.Muon.pt > 10) - ) - - return analysis_mask, control_mask, veto_mask - - -@selector( - uses={"{Muon,TrigObj}.{pt,eta,phi}"}, - exposed=False, -) -def muon_trigger_matching( - self: Selector, - events: ak.Array, - trigger: Trigger, - trigger_fired: ak.Array, - leg_masks: dict[str, ak.Array], - **kwargs, -) -> tuple[ak.Array]: - """ - Muon trigger matching. - """ - is_single = trigger.has_tag("single_mu") - is_cross = trigger.has_tag("cross_mu_tau") - - # catch config errors - assert is_single or is_cross - assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) - assert abs(trigger.legs["mu"].pdg_id) == 13 - - return trigger_object_matching( - events.Muon, - events.TrigObj[leg_masks["mu"]], - event_mask=trigger_fired, - ) - - -@selector( - uses={ - "Tau.{pt,eta,phi,dz,decayMode}", - "{Electron,Muon,TrigObj}.{pt,eta,phi}", - }, - # shifts are declared dynamically below in tau_selection_init - exposed=False, -) -def tau_selection( - self: Selector, - events: ak.Array, - trigger: Trigger, - electron_mask: ak.Array | None, - muon_mask: ak.Array | None, - **kwargs, -) -> tuple[ak.Array, ak.Array]: - """ - Tau selection returning a masks for taus that are at least VVLoose isolated (vs jet) - and a second mask to select isolated ones, eventually to separate normal and iso inverted taus - for QCD estimations. - """ - # return empty mask if no tagged taus exists in the chunk - if ak.all(ak.num(events.Tau) == 0): - logger.info("no taus found in event chunk") - false_mask = full_like(events.Tau.pt, False, dtype=bool) - return false_mask, false_mask - - is_single_e = trigger.has_tag("single_e") - is_single_mu = trigger.has_tag("single_mu") - is_cross_e = trigger.has_tag("cross_e_tau") - is_cross_mu = trigger.has_tag("cross_mu_tau") - is_cross_tau = trigger.has_tag("cross_tau_tau") - is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") - is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") - is_2016 = self.config_inst.campaign.x.year == 2016 - is_run3 = self.config_inst.campaign.x.run == 3 - get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" - wp_config = self.config_inst.x.tau_id_working_points - - # determine minimum pt and maximum eta - max_eta = 2.5 - if is_single_e or is_single_mu: - min_pt = 20.0 - elif is_cross_e: - # only existing after 2016 - min_pt = 0.0 if is_2016 else 35.0 - elif is_cross_mu: - min_pt = 25.0 if is_2016 else 32.0 - elif is_cross_tau: - min_pt = 40.0 - elif is_cross_tau_vbf: - # only existing after 2016 - min_pt = 0.0 if is_2016 else 25.0 - elif is_cross_tau_jet: - min_pt = None if not is_run3 else 35.0 - - # base tau mask for default and qcd sideband tau - base_mask = ( - (abs(events.Tau.eta) < max_eta) & - (events.Tau.pt > min_pt) & - (abs(events.Tau.dz) < 0.2) & - reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) & - (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose) - # vs e and mu cuts are channel dependent and thus applied in the overall lepton selection - ) - - # remove taus with too close spatial separation to previously selected leptons - if electron_mask is not None: - base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_mask]) > 0.5, axis=2) - if muon_mask is not None: - base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_mask]) > 0.5, axis=2) - - # compute the isolation mask separately as it is used to defined (qcd) categories later on - iso_mask = events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium - - return base_mask, iso_mask - - -@tau_selection.init -def tau_selection_init(self: Selector) -> None: - # register tec shifts - self.shifts |= { - shift_inst.name - for shift_inst in self.config_inst.shifts - if shift_inst.has_tag("tec") - } - - # Add columns for the right tau tagger - self.uses |= { - f"Tau.id{self.config_inst.x.tau_tagger}VS{tag}" - for tag in ("e", "mu", "jet") - } - - -@selector( - uses={"{Tau,TrigObj}.{pt,eta,phi}"}, - # shifts are declared dynamically below in tau_selection_init - exposed=False, -) -def tau_trigger_matching( - self: Selector, - events: ak.Array, - trigger: Trigger, - trigger_fired: ak.Array, - leg_masks: dict[str, ak.Array], - **kwargs, -) -> tuple[ak.Array]: - """ - Tau trigger matching. - """ - if ak.all(ak.num(events.Tau) == 0): - logger.info("no taus found in event chunk") - return full_like(events.Tau.pt, False, dtype=bool) - - is_cross_e = trigger.has_tag("cross_e_tau") - is_cross_mu = trigger.has_tag("cross_mu_tau") - is_cross_tau = trigger.has_tag("cross_tau_tau") - is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") - is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") - is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet - assert is_cross_e or is_cross_mu or is_any_cross_tau - - # start per-tau mask with trigger object matching per leg - if is_cross_e or is_cross_mu: - # catch config errors - assert trigger.n_legs == len(leg_masks) == 2 - assert abs(trigger.legs["tau"].pdg_id) == 15 - # match leg 1 - return trigger_object_matching( - events.Tau, - events.TrigObj[leg_masks["tau"]], - event_mask=trigger_fired, - ) - - # is_any_cross_tau - # catch config errors - assert trigger.n_legs == len(leg_masks) >= 2 - assert abs(trigger.legs["tau1"].pdg_id) == 15 - assert abs(trigger.legs["tau2"].pdg_id) == 15 - - # match both legs - matches_leg0 = trigger_object_matching( - events.Tau, - events.TrigObj[leg_masks["tau1"]], - event_mask=trigger_fired, - ) - matches_leg1 = trigger_object_matching( - events.Tau, - events.TrigObj[leg_masks["tau2"]], - event_mask=trigger_fired, - ) - - # taus need to be matched to at least one leg, but as a side condition - # each leg has to have at least one match to a tau - matches = ( - (matches_leg0 | matches_leg1) & - ak.any(matches_leg0, axis=1) & - ak.any(matches_leg1, axis=1) - ) - - return matches - - -@selector( - uses={ - electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, - tau_selection, tau_trigger_matching, - "event", "{Electron,Muon,Tau}.{charge,mass}", - }, - produces={ - electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, - tau_selection, tau_trigger_matching, - # new columns - "channel_id", "leptons_os", "tau2_isolated", "single_triggered", "cross_triggered", - }, -) -def lepton_selection( - self: Selector, - events: ak.Array, - trigger_results: SelectionResult, - **kwargs, -) -> tuple[ak.Array, SelectionResult]: - """ - Combined lepton selection. - """ - wp_config = self.config_inst.x.tau_id_working_points - get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" - - # get channels from the config - ch_etau = self.config_inst.get_channel("etau") - ch_mutau = self.config_inst.get_channel("mutau") - ch_tautau = self.config_inst.get_channel("tautau") - ch_ee = self.config_inst.get_channel("ee") - ch_mumu = self.config_inst.get_channel("mumu") - ch_emu = self.config_inst.get_channel("emu") - - # prepare vectors for output vectors - false_mask = (abs(events.event) < 0) - channel_id = np.uint8(1) * false_mask - tau2_isolated = false_mask - leptons_os = false_mask - single_triggered = false_mask - cross_triggered = false_mask - sel_electron_mask = full_like(events.Electron.pt, False, dtype=bool) - sel_muon_mask = full_like(events.Muon.pt, False, dtype=bool) - sel_tau_mask = full_like(events.Tau.pt, False, dtype=bool) - leading_taus = events.Tau[:, :0] - - # indices for sorting taus first by isolation, then by pt - # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3 - f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 2) - tau_sorting_key = events.Tau[f"raw{self.config_inst.x.tau_tagger}VSjet"] * f + events.Tau.pt - tau_sorting_indices = ak.argsort(tau_sorting_key, axis=-1, ascending=False) - - # perform each lepton election step separately per trigger, avoid caching - sel_kwargs = {**kwargs, "call_force": True} - for trigger, trigger_fired, leg_masks in trigger_results.x.trigger_data: - is_single = trigger.has_tag("single_trigger") - is_cross = trigger.has_tag("cross_trigger") - - # electron selection - electron_mask, electron_control_mask, electron_veto_mask = self[electron_selection]( - events, - trigger, - **sel_kwargs, - ) - - # muon selection - muon_mask, muon_control_mask, muon_veto_mask = self[muon_selection]( - events, - trigger, - **sel_kwargs, - ) - - # tau selection - tau_mask, tau_iso_mask = self[tau_selection]( - events, - trigger, - electron_mask, - muon_mask, - **sel_kwargs, - ) - - # conditions potentially leading to etau channel - if trigger.has_tag({"single_e", "cross_e_tau"}) and ( - self.dataset_inst.is_mc or - self.dataset_inst.has_tag("etau") - ): - # channel dependent deeptau cuts vs e and mu - ch_tau_mask = ( - tau_mask & - (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & - (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) - ) - - # fold trigger matching into the selection - trig_electron_mask = ( - electron_mask & - self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - trig_tau_mask = ch_tau_mask - if trigger.has_tag("cross_e_tau"): - trig_tau_mask = ( - trig_tau_mask & - self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - - # check if the most isolated tau among the selected ones is matched - first_tau_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1), - False, - ) - - # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau - # without and with trigger matching on the default objects - is_etau = ( - trigger_fired & - (ak.sum(electron_mask, axis=1) == 1) & - (ak.sum(trig_electron_mask, axis=1) == 1) & - (ak.sum(electron_veto_mask, axis=1) == 1) & - (ak.sum(muon_veto_mask, axis=1) == 0) & - first_tau_matched - ) - - # get selected taus and sort them - # (this will be correct for events for which is_etau is actually True) - sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] - # determine the relative charge and tau2 isolation - e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1) - tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1) - is_os = e_charge == -tau_charge - is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1 - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau) - tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated) - leptons_os = ak.where(is_etau, is_os, leptons_os) - single_triggered = ak.where(is_etau & is_single, True, single_triggered) - cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered) - sel_electron_mask = ak.where(is_etau, trig_electron_mask, sel_electron_mask) - sel_tau_mask = ak.where(is_etau, trig_tau_mask, sel_tau_mask) - leading_taus = ak.where(is_etau, sorted_sel_taus[:, :1], leading_taus) - - # mutau channel - if trigger.has_tag({"single_mu", "cross_mu_tau"}) and ( - self.dataset_inst.is_mc or - self.dataset_inst.has_tag("mutau") - ): - # channel dependent deeptau cuts vs e and mu - ch_tau_mask = ( - tau_mask & - (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) & - (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) - ) - - # fold trigger matching into the selection - trig_muon_mask = ( - muon_mask & - self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - trig_tau_mask = ch_tau_mask - if trigger.has_tag("cross_e_tau"): - trig_tau_mask = ( - trig_tau_mask & - self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - - # check if the most isolated tau among the selected ones is matched - first_tau_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1), - False, - ) - - # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau - # without and with trigger matching on the default objects - is_mutau = ( - trigger_fired & - (ak.sum(muon_mask, axis=1) == 1) & - (ak.sum(trig_muon_mask, axis=1) == 1) & - (ak.sum(muon_veto_mask, axis=1) == 1) & - (ak.sum(electron_veto_mask, axis=1) == 0) & - first_tau_matched - ) - - # get selected, sorted taus to obtain quantities - # (this will be correct for events for which is_mutau is actually True) - sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] - # determine the relative charge and tau2 isolation - mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1) - tau_charge = ak.firsts(sorted_sel_taus.charge, axis=1) - is_os = mu_charge == -tau_charge - is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 1 - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau) - tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated) - leptons_os = ak.where(is_mutau, is_os, leptons_os) - single_triggered = ak.where(is_mutau & is_single, True, single_triggered) - cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered) - sel_muon_mask = ak.where(is_mutau, trig_muon_mask, sel_muon_mask) - sel_tau_mask = ak.where(is_mutau, trig_tau_mask, sel_tau_mask) - leading_taus = ak.where(is_mutau, sorted_sel_taus[:, :1], leading_taus) - - # tautau channel - if ( - trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and - (self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau")) - ): - # channel dependent deeptau cuts vs e and mu - ch_tau_mask = ( - tau_mask & - (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) & - (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.vloose) - ) - - # fold trigger matching into the selection - trig_tau_mask = ( - ch_tau_mask & - self[tau_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - - # check if the two leading (most isolated) taus are matched - leading_taus_matched = ak.fill_none( - ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]], axis=1) & - ak.firsts(trig_tau_mask[tau_sorting_indices[ch_tau_mask[tau_sorting_indices]]][:, 1:], axis=1), - False, - ) - - # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated - is_tautau = ( - trigger_fired & - (ak.sum(electron_veto_mask, axis=1) == 0) & - (ak.sum(muon_veto_mask, axis=1) == 0) & - leading_taus_matched - ) - - # get selected, sorted taus to obtain quantities - # (this will be correct for events for which is_tautau is actually True) - sorted_sel_taus = events.Tau[tau_sorting_indices][trig_tau_mask[tau_sorting_indices]] - # determine the relative charge and tau2 isolation - tau1_charge = ak.firsts(sorted_sel_taus.charge, axis=1) - tau2_charge = ak.firsts(sorted_sel_taus.charge[:, 1:], axis=1) - is_os = tau1_charge == -tau2_charge - is_iso = ak.sum(tau_iso_mask[trig_tau_mask], axis=1) >= 2 - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau) - tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated) - leptons_os = ak.where(is_tautau, is_os, leptons_os) - single_triggered = ak.where(is_tautau & is_single, True, single_triggered) - cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered) - sel_tau_mask = ak.where(is_tautau, trig_tau_mask, sel_tau_mask) - leading_taus = ak.where(is_tautau, sorted_sel_taus[:, :2], leading_taus) - - # ee channel - if trigger.has_tag("single_e") and ( - self.dataset_inst.is_mc or - self.dataset_inst.has_tag("ee") - ): - # fold trigger matching into the selection - trig_electron_mask = ( - electron_mask & - self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - - # check if the first (hardest) electron matched - electron_sorting_indices = ak.argsort(events.Electron.pt, axis=1, ascending=False) - leading_electron_matched = ak.fill_none( - ak.firsts(trig_electron_mask[electron_sorting_indices[electron_mask[electron_sorting_indices]]], axis=1), # noqa: E501 - False, - ) - - # expect 2 electrons, 2 veto electrons, 0 veto muons, and ignore the taus - is_ee = ( - trigger_fired & - (ak.sum(electron_mask, axis=1) >= 1) & - (ak.sum(electron_control_mask, axis=1) == 2) & - leading_electron_matched & - (ak.sum(electron_veto_mask, axis=1) == 2) & - (ak.sum(muon_veto_mask, axis=1) == 0) - ) - - # get selected, sorted electrons to obtain quantities - # (this will be correct for events for which is_ee is actually True) - sorted_sel_electrons = events.Electron[electron_sorting_indices][electron_control_mask[electron_sorting_indices]] # noqa - # determine the relative charge - e1_charge = ak.firsts(sorted_sel_electrons.charge, axis=1) - e2_charge = ak.firsts(sorted_sel_electrons.charge[:, 1:], axis=1) - is_os = e1_charge == -e2_charge - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_ee.id, is_ee) - leptons_os = ak.where(is_ee, is_os, leptons_os) - single_triggered = ak.where(is_ee & is_single, True, single_triggered) - cross_triggered = ak.where(is_ee & is_cross, True, cross_triggered) - sel_electron_mask = ak.where(is_ee, electron_mask, sel_electron_mask) - - # mumu channel - if trigger.has_tag("single_mu") and ( - self.dataset_inst.is_mc or - self.dataset_inst.has_tag("mumu") - ): - # fold trigger matching into the selection - trig_muon_mask = ( - muon_mask & - self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - - # check if the first (hardest) muon matched - muon_sorting_indices = ak.argsort(events.Muon.pt, axis=1, ascending=False) - leading_muon_matched = ak.fill_none( - ak.firsts(trig_muon_mask[muon_sorting_indices[muon_mask[muon_sorting_indices]]], axis=1), - False, - ) - - # expect 2 muons, 2 veto muons, 0 veto electrons, and ignore the taus - is_mumu = ( - trigger_fired & - (ak.sum(muon_mask, axis=1) >= 1) & - (ak.sum(muon_control_mask, axis=1) == 2) & - leading_muon_matched & - (ak.sum(muon_veto_mask, axis=1) == 2) & - (ak.sum(electron_veto_mask, axis=1) == 0) - ) - - # get selected, sorted muons to obtain quantities - # (this will be correct for events for which is_mumu is actually True) - sorted_sel_muons = events.Muon[muon_sorting_indices][muon_control_mask[muon_sorting_indices]] - # determine the relative charge - mu1_charge = ak.firsts(sorted_sel_muons.charge, axis=1) - mu2_charge = ak.firsts(sorted_sel_muons.charge[:, 1:], axis=1) - is_os = mu1_charge == -mu2_charge - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_mumu.id, is_mumu) - leptons_os = ak.where(is_mumu, is_os, leptons_os) - single_triggered = ak.where(is_mumu & is_single, True, single_triggered) - cross_triggered = ak.where(is_mumu & is_cross, True, cross_triggered) - sel_muon_mask = ak.where(is_mumu, muon_mask, sel_muon_mask) - - # emu channel - if ( - (emu_from_e := ( - trigger.has_tag("single_e") and - (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_e")) - )) or ( - trigger.has_tag("single_mu") and - (self.dataset_inst.is_mc or self.dataset_inst.has_tag("emu_from_mu")) - ) - ): - if emu_from_e: - emu_electron_mask = electron_mask - # fold trigger matching into the selection - trig_electron_mask = ( - electron_mask & - self[electron_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - # for muons, loop over triggers, find single triggers and make sure none of them - # fired in order to avoid double counting - emu_muon_mask = False - mu_trig_fired = full_like(events.event, False, dtype=bool) - for _trigger, _trigger_fired, _ in trigger_results.x.trigger_data: - if not _trigger.has_tag("single_mu"): - continue - # evaluate the muon selection once (it is the same for all single triggers) - if emu_muon_mask is False: - # the correct muon mask is the control muon mask with min pt 20 - _, emu_muon_mask, _ = self[muon_selection](events, _trigger, **sel_kwargs) - # store the trigger decision - mu_trig_fired = mu_trig_fired | _trigger_fired - # muons obey the trigger rules if no single trigger fired - trig_muon_mask = emu_muon_mask & ~mu_trig_fired - - else: - emu_muon_mask = muon_mask - # fold trigger matching into the selection - trig_muon_mask = ( - muon_mask & - self[muon_trigger_matching](events, trigger, trigger_fired, leg_masks, **sel_kwargs) - ) - # for electrons, loop over triggers, find single triggers and check the matching - # only in case a trigger fired - emu_electron_mask = False - e_trig_fired = full_like(events.event, False, dtype=bool) - e_match_mask = full_like(events.Electron.pt, False, dtype=bool) - for _trigger, _trigger_fired, _leg_masks in trigger_results.x.trigger_data: - if not _trigger.has_tag("single_e"): - continue - # evaluate the electron selection once (it is the same for all single triggers) - if emu_electron_mask is False: - emu_electron_mask_if_triggered, emu_electron_control_mask, _ = self[electron_selection](events, _trigger, **sel_kwargs) # noqa - # store the trigger decision - e_trig_fired = e_trig_fired | _trigger_fired - # evaluate the matching - e_match_mask = e_match_mask | ( - self[electron_trigger_matching](events, _trigger, _trigger_fired, _leg_masks, **sel_kwargs) & - _trigger_fired - ) - - # the correct electron mask is the control electron mask where the trigger did not fire - # and the electron_mask_triggered where the trigger did fire - emu_electron_mask = ak.where(e_trig_fired, emu_electron_mask_if_triggered, emu_electron_control_mask) - # for events in which no single e trigger fired, consider the matching as successful - e_match_mask = e_match_mask | ~e_trig_fired - trig_electron_mask = emu_electron_mask & e_match_mask - - # expect 1 electron, 1 muon, 1 veto electron, 1 veto muon, and ignore taus - is_emu = ( - trigger_fired & - (ak.sum(emu_electron_mask, axis=1) == 1) & - (ak.sum(trig_electron_mask, axis=1) == 1) & - (ak.sum(electron_veto_mask, axis=1) == 1) & - (ak.sum(emu_muon_mask, axis=1) == 1) & - (ak.sum(trig_muon_mask, axis=1) == 1) & - (ak.sum(muon_veto_mask, axis=1) == 1) - ) - - # determine the relative charge - e_charge = ak.firsts(events.Electron[trig_electron_mask].charge, axis=1) - mu_charge = ak.firsts(events.Muon[trig_muon_mask].charge, axis=1) - is_os = e_charge == -mu_charge - # store global variables - channel_id = update_channel_ids(events, channel_id, ch_emu.id, is_emu) - leptons_os = ak.where(is_emu, is_os, leptons_os) - single_triggered = ak.where(is_emu & is_single, True, single_triggered) - cross_triggered = ak.where(is_emu & is_cross, True, cross_triggered) - sel_electron_mask = ak.where(is_emu, trig_electron_mask, sel_electron_mask) - sel_muon_mask = ak.where(is_emu, trig_muon_mask, sel_muon_mask) - - # some final type conversions - channel_id = ak.values_astype(channel_id, np.uint8) - leptons_os = ak.fill_none(leptons_os, False) - - # save new columns - events = set_ak_column(events, "channel_id", channel_id) - events = set_ak_column(events, "leptons_os", leptons_os) - events = set_ak_column(events, "tau2_isolated", tau2_isolated) - events = set_ak_column(events, "single_triggered", single_triggered) - events = set_ak_column(events, "cross_triggered", cross_triggered) - - # convert lepton masks to sorted indices (pt for e/mu, iso for tau) - sel_electron_indices = sorted_indices_from_mask(sel_electron_mask, events.Electron.pt, ascending=False) - sel_muon_indices = sorted_indices_from_mask(sel_muon_mask, events.Muon.pt, ascending=False) - sel_tau_indices = sorted_indices_from_mask(sel_tau_mask, tau_sorting_key, ascending=False) - - return events, SelectionResult( - steps={ - "lepton": channel_id != 0, - }, - objects={ - "Electron": { - "Electron": sel_electron_indices, - }, - "Muon": { - "Muon": sel_muon_indices, - }, - "Tau": { - "Tau": sel_tau_indices, - }, - }, - aux={ - # save the selected lepton pair for the duration of the selection - # multiplication of a coffea particle with 1 yields the lorentz vector - "lepton_pair": ak.concatenate( - [ - events.Electron[sel_electron_indices] * 1, - events.Muon[sel_muon_indices] * 1, - events.Tau[sel_tau_indices] * 1, - ], - axis=1, - )[:, :2], - - # save the leading taus for the duration of the selection - # exactly 1 for etau/mutau and exactly 2 for tautau - "leading_taus": leading_taus, - }, - ) - - -@lepton_selection.init -def lepton_selection_init(self: Selector) -> None: - # add column to load the raw tau tagger score - self.uses.add(f"Tau.raw{self.config_inst.x.tau_tagger}VSjet") diff --git a/hbt/tasks/__init__.py b/hbt/tasks/__init__.py deleted file mode 100644 index 2ec46037..00000000 --- a/hbt/tasks/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# coding: utf-8 -# flake8: noqa - -# provisioning imports -import hbt.tasks.base -import hbt.tasks.stats -import hbt.tasks.studies -import hbt.tasks.sync diff --git a/hbt/tasks/base.py b/hbt/tasks/base.py deleted file mode 100644 index 14a410bf..00000000 --- a/hbt/tasks/base.py +++ /dev/null @@ -1,12 +0,0 @@ -# coding: utf-8 - -""" -Custom base tasks. -""" - -from columnflow.tasks.framework.base import BaseTask - - -class HBTTask(BaseTask): - - task_namespace = "hbt" diff --git a/hbt/tasks/sync.py b/hbt/tasks/sync.py deleted file mode 100644 index 499befe8..00000000 --- a/hbt/tasks/sync.py +++ /dev/null @@ -1,370 +0,0 @@ -# coding: utf-8 - -""" -Tasks that create files for synchronization efforts with other frameworks. -""" - -from __future__ import annotations - -from functools import reduce -from operator import or_ - -import luigi -import law - -from columnflow.tasks.framework.base import Requirements, DatasetTask -from columnflow.tasks.framework.mixins import ProducersMixin, MLModelsMixin, ChunkedIOMixin -from columnflow.tasks.framework.remote import RemoteWorkflow -from columnflow.tasks.external import GetDatasetLFNs -from columnflow.tasks.reduction import ReducedEventsUser -from columnflow.tasks.production import ProduceColumns -from columnflow.tasks.ml import MLEvaluation -from columnflow.util import dev_sandbox - -from hbt.tasks.base import HBTTask -from hbt.util import hash_events - - -class CheckExternalLFNOverlap( - HBTTask, - DatasetTask, -): - - lfn = luigi.Parameter( - description="local path to an external LFN to check for overlap with the dataset", - # fetched via nanogen's FetchLFN - default="/pnfs/desy.de/cms/tier2/store/user/bwieders/nanogen_store/FetchLFN/store/mc/Run3Summer22NanoAODv12/GluGlutoHHto2B2Tau_kl-1p00_kt-1p00_c2-0p00_LHEweights_TuneCP5_13p6TeV_powheg-pythia8/NANOAODSIM/130X_mcRun3_2022_realistic_v5-v2/50000/992697da-4a10-4435-b63a-413f6d33517e.root", # noqa - ) - - # no versioning required - version = None - - # default sandbox, might be overwritten by calibrator function - sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox")) - - # upstream requirements - reqs = Requirements( - GetDatasetLFNs=GetDatasetLFNs, - ) - - def requires(self): - return self.reqs.GetDatasetLFNs.req(self) - - def output(self): - return { - "overlap": self.target("lfn_overlap.json"), - "unique_identifiers": self.target("unique_identifiers.parquet"), - } - - def run(self): - import awkward as ak - import numpy as np - - # load the index columns of the reference lfn - output = self.output() - - with self.publish_step("loading reference ids"): - ref_hashes = hash_events(self.load_nano_index(law.LocalFileTarget(self.lfn))) - - # loop over all lfns in the dataset - n_files = self.dataset_inst.n_files - lfns_task = self.requires() - relative_overlap = {"rel_to_file": {}, "rel_to_ref": {}} - overlapping_identifier = [] - - for i, lfn_target in lfns_task.iter_nano_files(self, lfn_indices=list(range(n_files))): - with self.publish_step(f"loading ids of file {i}"): - file_arr = self.load_nano_index(lfn_target) - file_hashes = hash_events(file_arr) - # find unique hashes in the reference and the file - # faster than np. - overlapping_mask = np.isin( - file_hashes, - ref_hashes, - assume_unique=True, - kind="sort", - ) - - num_overlapping = np.sum(overlapping_mask) - if num_overlapping: - # calculate the relative overlaps - # relative to file indicates how many events of the reference are within the files - relative_overlap["rel_to_file"][str(i)] = np.sum(num_overlapping) / len(file_hashes) - relative_overlap["rel_to_ref"][str(i)] = np.sum(num_overlapping) / len(ref_hashes) - # apply mask and store the overlapping identifiers - overlapping_identifier.extend(file_arr[overlapping_mask]) - - # sanity checks - assert relative_overlap["rel_to_file"], "no overlap between reference and files found" - - reference_sum = np.sum([ref_value for ref_value in relative_overlap["rel_to_ref"].values()]) - assert reference_sum.item() == 1, f"reference sum is not 1 but {reference_sum.item()}" - - output["overlap"].dump( - relative_overlap, - formatter="json", - ) - - output["unique_identifiers"].dump( - ak.Array(overlapping_identifier), - formatter="awkward", - ) - - @classmethod - def load_nano_index(cls, lfn_target: law.FileSystemFileTarget) -> set[int]: - fields = ["event", "luminosityBlock", "run"] - arr = lfn_target.load(formatter="uproot")["Events"].arrays(fields) - return arr - - -class CreateSyncFiles( - HBTTask, - ReducedEventsUser, - ChunkedIOMixin, - ProducersMixin, - MLModelsMixin, - law.LocalWorkflow, - RemoteWorkflow, -): - filter_file = luigi.Parameter( - description="local path to a file containing event unique identifier to filter them out", - default="", - ) - - sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox")) - - # upstream requirements - reqs = Requirements( - ReducedEventsUser.reqs, - RemoteWorkflow.reqs, - ProduceColumns=ProduceColumns, - MLEvaluation=MLEvaluation, - ) - - def workflow_requires(self): - reqs = super().workflow_requires() - - # require the full merge forest - reqs["events"] = self.reqs.ProvideReducedEvents.req(self) - - if not self.pilot: - if self.producer_insts: - reqs["producers"] = [ - self.reqs.ProduceColumns.req(self, producer=producer_inst.cls_name) - for producer_inst in self.producer_insts - if producer_inst.produced_columns - ] - if self.ml_model_insts: - reqs["ml"] = [ - self.reqs.MLEvaluation.req(self, ml_model=m) - for m in self.ml_models - ] - - return reqs - - def requires(self): - reqs = {"events": self.reqs.ProvideReducedEvents.req(self)} - - if self.producer_insts: - reqs["producers"] = [ - self.reqs.ProduceColumns.req(self, producer=producer_inst.cls_name) - for producer_inst in self.producer_insts - if producer_inst.produced_columns - ] - if self.ml_model_insts: - reqs["ml"] = [ - self.reqs.MLEvaluation.req(self, ml_model=m) - for m in self.ml_models - ] - - return reqs - - workflow_condition = ReducedEventsUser.workflow_condition.copy() - - @workflow_condition.output - def output(self): - return self.target(f"sync_{self.dataset_inst.name}_{self.branch}.csv") - - @law.decorator.log - @law.decorator.localize - def run(self): - import awkward as ak - import numpy as np - from columnflow.columnar_util import EMPTY_FLOAT, EMPTY_INT, update_ak_array, set_ak_column - - # prepare inputs and outputs - inputs = self.input() - output = self.output() - - # iterate over chunks of events and diffs - files = [inputs["events"]["events"].abspath] - if self.producer_insts: - files.extend([inp["columns"].abspath for inp in inputs["producers"]]) - if self.ml_model_insts: - files.extend([inp["mlcolumns"].abspath for inp in inputs["ml"]]) - - # helper to replace our internal empty placeholders with a custom ones - # dtype -> (our, custom) - empty = { - np.float32: (EMPTY_FLOAT, EMPTY_FLOAT), - np.float64: (EMPTY_FLOAT, EMPTY_FLOAT), - np.int32: (EMPTY_INT, EMPTY_INT), - np.int64: (EMPTY_INT, EMPTY_INT), - np.uint64: (EMPTY_INT, EMPTY_INT), - } - def replace_empty(arr, dtype=np.float32): - default, custom = empty[dtype] - if custom != default: - arr = ak.where(arr == default, custom, arr) - return arr - - # helper to pad nested fields with an empty float if missing - def pad_nested(arr, n, *, axis=1, dtype=np.float32): - padded = ak.pad_none(arr, n, axis=axis) - return ak.values_astype(ak.fill_none(padded, empty[dtype][1]), dtype) - - # helper to pad and select the last element on the first inner axis - def select(arr, idx, dtype=np.float32): - padded = pad_nested(arr, idx + 1, axis=-1, dtype=dtype) - return replace_empty((padded if arr.ndim == 1 else padded[:, idx]), dtype=dtype) - - # helper to select leptons - def select_leptons(events: ak.Array, common_fields: dict[str, int | float]) -> ak.Array: - # ensure all lepton arrays have the same common fields - leptons = [events.Electron, events.Muon, events.Tau] - for i in range(len(leptons)): - lepton = leptons[i] - for field, default in common_fields.items(): - if field not in lepton.fields: - lepton = set_ak_column(lepton, field, default) - leptons[i] = lepton - # concatenate (first event any lepton, second alsways tau) and add to events - return set_ak_column(events, "Lepton", ak.concatenate(leptons, axis=1)) - - def uint64_to_str(array: ak.Array) -> ak.Array: - # -99999 casted to uint64 - empty_uint64_str = str(np.iinfo(np.uint64).max + empty[np.uint64][0] + 1) - array = np.asarray(array, dtype=np.str_) - empty_mask = array == empty_uint64_str - array = np.where(empty_mask, str(empty[np.uint64][0]), array) - return array - - # event chunk loop - # optional filter to get only the events that overlap with given external LFN - if self.filter_file: - with self.publish_step("loading reference ids"): - events_to_filter = law.LocalFileTarget(self.filter_file).load(formatter="awkward") - filter_events = hash_events(events_to_filter) - - for (events, *columns), pos in self.iter_chunked_io( - files, - source_type=len(files) * ["awkward_parquet"], - pool_size=1, - ): - # optional check for overlapping inputs - if self.check_overlapping_inputs: - self.raise_if_overlapping([events] + list(columns)) - - # add additional columns - events = update_ak_array(events, *columns) - # apply mask if optional filter is given - # calculate mask by using 1D hash values - if self.filter_file: - mask = np.isin( - hash_events(events), - filter_events, - assume_unique=True, - kind="sort", - ) - - # apply mask - events = events[mask] - - if len(events) == 0: - raise ValueError( - """ - No events left after filtering. - "Check if correct overlap file is used - """, - ) - - # insert leptons - events = select_leptons(events, {"rawDeepTau2018v2p5VSjet": empty[np.float32][1]}) - # project into dataframe - met_name = self.config_inst.x.met_name - df = ak.to_dataframe({ - # index variables - "event": events.event, - "run": events.run, - "lumi": events.luminosityBlock, - # high-level events variables - "channel_id": events.channel_id, - "os": events.leptons_os * 1, - "iso": events.tau2_isolated * 1, - "deterministic_seed": uint64_to_str(events.deterministic_seed), - # jets - **reduce(or_, ( - { - f"jet{i + 1}_pt": select(events.Jet.pt, i), - f"jet{i + 1}_eta": select(events.Jet.eta, i), - f"jet{i + 1}_phi": select(events.Jet.phi, i), - f"jet{i + 1}_mass": select(events.Jet.mass, i), - f"jet{i + 1}_deterministic_seed": uint64_to_str( - select(events.Jet.deterministic_seed, i, np.uint64), - ), - } - for i in range(2) - )), - # electron specific variables - "e1_deterministic_seed": uint64_to_str( - select(events.Electron.deterministic_seed, 0, np.uint64), - ), - "e2_deterministic_seed": uint64_to_str( - select(events.Electron.deterministic_seed, 1, np.uint64), - ), - # combined leptons - **reduce(or_, ( - { - f"lep{i + 1}_pt": select(events.Lepton.pt, i), - f"lep{i + 1}_phi": select(events.Lepton.phi, i), - f"lep{i + 1}_eta": select(events.Lepton.eta, i), - f"lep{i + 1}_charge": select(events.Lepton.charge, i), - f"lep{i + 1}_deeptauvsjet": select(events.Lepton.rawDeepTau2018v2p5VSjet, i), - } - for i in range(2) - )), - # met - "met_pt": events[met_name].pt, - "met_phi": events[met_name].phi, - **({} if self.config_inst.campaign.x.version < 14 else { - "met_significance": select(events[met_name].significance, 0), - "met_covXX": select(events[met_name].covXX, 0), - "met_covXY": select(events[met_name].covXY, 0), - "met_covYY": select(events[met_name].covYY, 0), - }), - # fatjets - **reduce(or_, ( - { - f"fatjet{i + 1}_pt": select(events.FatJet.pt, i), - f"fatjet{i + 1}_eta": select(events.FatJet.eta, i), - f"fatjet{i + 1}_phi": select(events.FatJet.phi, i), - f"fatjet{i + 1}_mass": select(events.FatJet.mass, i), - } - for i in range(2) - )), - }) - # save as csv in output, append if necessary - output.dump( - df, - formatter="pandas", - index=False, - header=pos.index == 0, - mode="w" if pos.index == 0 else "a", - ) - - -check_overlap_tasks = law.config.get_expanded("analysis", "check_overlapping_inputs", [], split_csv=True) -CreateSyncFiles.check_overlapping_inputs = ChunkedIOMixin.check_overlapping_inputs.copy( - default=CreateSyncFiles.task_family in check_overlap_tasks, - add_default_to_description=True, -) diff --git a/hbt/util.py b/hbt/util.py deleted file mode 100644 index d947fbf9..00000000 --- a/hbt/util.py +++ /dev/null @@ -1,120 +0,0 @@ -# coding: utf-8 - -""" -Helpful utils. -""" - -from __future__ import annotations - -__all__ = [] - -from columnflow.types import Any -from columnflow.columnar_util import ArrayFunction, deferred_column -from columnflow.util import maybe_import - -np = maybe_import("numpy") - - -@deferred_column -def IF_NANO_V9(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.version == 9 else None - - -@deferred_column -def IF_NANO_V11(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.version == 11 else None - - -@deferred_column -def IF_NANO_V12(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.version == 12 else None - - -@deferred_column -def IF_NANO_V14(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.version == 14 else None - - -@deferred_column -def IF_NANO_GE_V10(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.version >= 10 else None - - -@deferred_column -def IF_RUN_2(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.run == 2 else None - - -@deferred_column -def IF_RUN_3(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if func.config_inst.campaign.x.run == 3 else None - - -@deferred_column -def IF_RUN_3_2022(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: - return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.year == 2022) else None - - -@deferred_column -def IF_DATASET_HAS_LHE_WEIGHTS( - self: ArrayFunction.DeferredColumn, - func: ArrayFunction, -) -> Any | set[Any]: - if getattr(func, "dataset_inst", None) is None: - return self.get() - - return self.get() if not func.dataset_inst.has_tag("no_lhe_weights") else None - - -@deferred_column -def IF_DATASET_IS_DY( - self: ArrayFunction.DeferredColumn, - func: ArrayFunction, -) -> Any | set[Any]: - if getattr(func, "dataset_inst", None) is None: - return self.get() - - return self.get() if func.dataset_inst.has_tag("dy") else None - - -@deferred_column -def IF_DATASET_IS_W_LNU( - self: ArrayFunction.DeferredColumn, - func: ArrayFunction, -) -> Any | set[Any]: - if getattr(func, "dataset_inst", None) is None: - return self.get() - - return self.get() if func.dataset_inst.has_tag("w_lnu") else None - - -def hash_events(arr: np.ndarray) -> np.ndarray: - """ - Helper function to create a hash value from the event, run and luminosityBlock columns. - The values are padded to specific lengths and concatenated to a single integer. - """ - import awkward as ak - - def assert_value(arr: np.ndarray, field: str, max_value: int) -> None: - """ - Helper function to check if a column does not exceed a maximum value. - """ - digits = len(str(arr[field].to_numpy().max())) - assert digits <= max_value, f"{field} digit count is {digits} and exceed max value {max_value}" - - max_digits_run = 6 - max_digits_luminosityBlock = 6 - max_digits_event = 8 - assert_value(arr, "run", max_digits_run) - assert_value(arr, "luminosityBlock", max_digits_luminosityBlock) - assert_value(arr, "event", max_digits_event) - - max_digits_hash = max_digits_event + max_digits_luminosityBlock + max_digits_run - assert max_digits_hash <= 20, "sum of digits exceeds int64" - - # upcast to int64 to avoid overflow - return ( - ak.values_astype(arr.run, np.int64) * 10**(max_digits_luminosityBlock + max_digits_event) + - ak.values_astype(arr.luminosityBlock, np.int64) * 10**max_digits_event + - ak.values_astype(arr.event, np.int64) - ) diff --git a/hbt/weight/default.py b/hbt/weight/default.py deleted file mode 100644 index 363d0419..00000000 --- a/hbt/weight/default.py +++ /dev/null @@ -1,78 +0,0 @@ -# coding: utf-8 - -""" -Default event weight definitions. -""" - -from columnflow.weight import WeightProducer, weight_producer -from columnflow.columnar_util import Route -from columnflow.util import maybe_import, pattern_matcher - -ak = maybe_import("awkward") -np = maybe_import("numpy") - - -@weight_producer( - # both produced columns and dependent shifts are defined in init below - # only run on mc - mc_only=True, - # options to keep or drop specific weights - keep_weights=None, - drop_weights={"normalization_weight_inclusive"}, -) -def default(self: WeightProducer, events: ak.Array, **kwargs) -> ak.Array: - # build the full event weight - weight = ak.Array(np.ones(len(events), dtype=np.float32)) - for column in self.weight_columns: - weight = weight * Route(column).apply(events) - - return events, weight - - -@default.init -def default_init(self: WeightProducer) -> None: - dataset_inst = getattr(self, "dataset_inst", None) - - # use the config's auxiliary event_weights, drop some of them based on drop_weights, and on this - # weight producer instance, store weight_columns, used columns, and shifts - self.weight_columns = [] - - # helpers to match to kept or dropped weights - do_keep = pattern_matcher(self.keep_weights) if self.keep_weights else (lambda _, /: True) - do_drop = pattern_matcher(self.drop_weights) if self.drop_weights else (lambda _, /: False) - - # collect all possible weight columns and affected shifts - all_weights = self.config_inst.x.event_weights - if dataset_inst: - all_weights.update(dataset_inst.x("event_weights", {})) - for weight_name, shift_insts in all_weights.items(): - if not do_keep(weight_name) or do_drop(weight_name): - continue - - # manually skip pdf and scale weights for samples that do not have lhe info - if dataset_inst: - is_lhe_weight = any(shift_inst.has_tag("lhe_weight") for shift_inst in shift_insts) - if is_lhe_weight and self.dataset_inst.has_tag("no_lhe_weights"): - continue - - self.weight_columns.append(weight_name) - self.uses.add(weight_name) - self.shifts |= {shift_inst.name for shift_inst in shift_insts} - - -normalization_inclusive = default.derive( - "normalization_inclusive", - cls_dict={"drop_weights": {"normalization_weight"}}, -) - - -normalization_only = default.derive( - "normalization_only", - cls_dict={"keep_weights": {"normalization_weight"}}, -) - - -normalization_inclusive_only = default.derive( - "normalization_inclusive_only", - cls_dict={"keep_weights": {"normalization_weight_inclusive"}, "drop_weights": None}, -) diff --git a/img.png b/img.png new file mode 100644 index 00000000..f94be392 --- /dev/null +++ b/img.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49a1b8b59fb81c903de06b4fc534850ebda6e12c7b9adeadd50fa18d0609035 +size 70907 diff --git a/law.cfg b/law.cfg index 3e459686..0972f228 100644 --- a/law.cfg +++ b/law.cfg @@ -1,18 +1,15 @@ -# +#=============================== # law settings -# - +#=============================== [core] # inherit from the columnflow configuration file inherit: $CF_BASE/law.cfg -extend: $HBT_BASE/law_fs.cfg, $HBT_BASE/law_outputs.cfg, $HBT_BASE/law_user.cfg - +extend: $MULTILEPTON_BASE/law_fs.cfg, $MULTILEPTON_BASE/law_outputs.cfg, $MULTILEPTON_BASE/law_user.cfg [modules] columnflow.tasks.cms.inference columnflow.tasks.cms.external -hbt.tasks - +multilepton.tasks [logging] law: INFO @@ -21,49 +18,47 @@ gfal2: WARNING columnflow.columnar_util-perf: INFO law.sandbox.base: INFO - [notifications] -mattermost_user: HH → bb𝛕𝛕 - +mattermost_user: HH → Multilepton [job] job_file_dir: $CF_JOB_BASE job_file_dir_cleanup: False job_file_dir_mkdtemp: sub_{{task_id}}_XXX -crab_sandbox_name: CMSSW_14_2_1::arch=el9_amd64_gcc12 - +#crab_sandbox_name: CMSSW_14_2_1::arch=el9_amd64_gcc21 +crab_sandbox_name: CMSSW_10_6_18::arch=slc7_amd64_gcc700 +crab_storage_element: $CF_CRAB_STORAGE_ELEMENT -# +#=============================== # analysis specific settings -# - +#=============================== [analysis] - -default_analysis: hbt.config.analysis_hbt.analysis_hbt -default_config: 22pre_v14 -default_dataset: hh_ggf_hbb_htt_kl1_kt1_powheg - -calibration_modules: columnflow.calibration.cms.{jets,met,tau}, hbt.calibration.{default,fake_triggers} -selection_modules: columnflow.selection.cms.{json_filter,met_filters}, hbt.selection.{default,lepton,trigger} -production_modules: columnflow.production.{categories,normalization,processes}, columnflow.production.cms.{btag,electron,mc_weight,muon,pdf,pileup,scale,seeds,gen_top_decay}, hbt.production.{default,weights,features,btag,tau,minimal,hh_mass,res_networks,patches} -categorization_modules: hbt.categorization.default -weight_production_modules: columnflow.weight.{empty,all_weights}, hbt.weight.default -ml_modules: hbt.ml.test -inference_modules: hbt.inference.{default} +default_analysis: multilepton.config.analysis_multilepton.analysis_multilepton +default_config: 22preEE_v14_private +default_dataset: hh_ggf_htt_hvv_kl0_kt1_powheg +default_columnar_sandbox: bash::$MULTILEPTON_BASE/sandboxes/venv_multilepton.sh +default_remote_claw_sandbox: venv_multilepton +default_create_selection_hists: True # wether or not the ensure_proxy decorator should be skipped, even if used by task's run methods skip_ensure_proxy: False -# do not write hists in selection -default_create_selection_hists: False +calibration_modules: columnflow.calibration.cms.{jets,met,tau}, multilepton.calibration.{default,fake_triggers} +selection_modules: columnflow.selection.cms.{json_filter,met_filters}, multilepton.selection.{default,lepton,trigger} +reduction_modules: columnflow.reduction.default, multilepton.reduction.default +production_modules: columnflow.production.{categories,normalization,processes}, columnflow.production.cms.{btag,electron,mc_weight,muon,pdf,pileup,scale,seeds,parton_shower,gen_top_decay}, multilepton.production.{default,weights,features,btag,tau,minimal,res_networks,patches} +categorization_modules: multilepton.categorization.default +hist_production_modules: multilepton.histogramming.default +ml_modules: multilepton.ml.test +inference_modules: multilepton.inference.{default,ete} # some remote workflow parameter defaults # (resources like memory and disk can also be set in [resources] with more granularity) htcondor_flavor: $CF_HTCONDOR_FLAVOR htcondor_share_software: True -# 2GB -> short "lite" queue, otherwise long "bide" queue on the naf htcondor_memory: 2GB htcondor_disk: 5GB + slurm_flavor: $CF_SLURM_FLAVOR slurm_partition: $CF_SLURM_PARTITION @@ -72,24 +67,24 @@ chunked_io_chunk_size: 50000 chunked_io_pool_size: 1 chunked_io_debug: False +# string representation settings +repr_max_len: -1 +repr_max_count: 3 +repr_hash_len: 8 -# +#=============================== # task resources -# - +#=============================== [resources] +task_cf.{SelectEvents,ReduceEvents__pilot_True}__sel_default: htcondor_memory=6.25GB, crab_memory=5000MB +task_cf.{ProduceColumns,UniteColumns__pilot_True,CreateHistograms__pilot_True}__prod_res_dnn: htcondor_memory=3.00GB, crab_memory=3000MB +#task_cf.{CalibrateEvents,ReduceEvents,SelectEvents}: slurm_partition='io' -# default selection with hhbtag requires more memory -cf.{Select,Reduce}Events__sel_default: htcondor_memory=6GB, crab_memory=5000MB - - -# +#=============================== # luigi configs # (in addition to those in the inherited file) -# - +#=============================== [luigi_resources] - naf_riegerma: 5000 naf_pkeicher: 5000 naf_alvesand: 5000 diff --git a/law_fs.cfg b/law_fs.cfg index 6370e718..41e85dfd 100644 --- a/law_fs.cfg +++ b/law_fs.cfg @@ -1,224 +1,69 @@ -# +#============================================= # file system configurations -# - +#============================================= [local_fs] base: / - -# +#============================================= # file systems specific to the current user -# - -[local_fs_desy_dcache] -base: /pnfs/desy.de/cms/tier2 - - -[wlcg_fs] -base: &::wlcg_fs_desy::base -base_mkdir_rec: &::wlcg_fs_desy::gsiftp_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - - -[wlcg_fs_desy] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME -base: &::webdav_base -base_filecopy: &::webdav_base -base_stat: &::webdav_base - - -[wlcg_fs_cernbox] -xrootd_base: root://eosuser.cern.ch/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/$CF_STORE_NAME +#============================================= +[wlcg_fs_manivald] +#xrootd_base: root://eosuser.cern.ch/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/cat/$CF_STORE_NAME +#base: &::xrootd_base +#base: &::wlcg_fs_desy::base +#base_mkdir_rec: &::wlcg_fs_desy::gsiftp_base +default_dir_perm: 0o775 +default_file_perm: 0o775 +xrootd_base: root://xrootd.hep.kbfi.ee:1094/store/user/$CF_CERN_USER/HHMultilepton +webdav_base: davs://xrootd.hep.kbfi.ee:1094/store/user/$CF_CERN_USER/HHMultilepton base: &::xrootd_base - - -# -# file systems for specific users -# - -[local_fs_dust_mrieger] -base: /data/dust/user/riegerma/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_mrieger] -base: /pnfs/desy.de/cms/tier2/store/user/mrieger/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_mrieger] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/mrieger/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/mrieger/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/mrieger/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - - -[local_fs_dust_pkeicher] -base: /data/dust/user/pkeicher/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_pkeicher] -base: /pnfs/desy.de/cms/tier2/store/user/pkeicher/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_pkeicher] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/pkeicher/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/pkeicher/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/pkeicher/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - - -[local_fs_dust_bwieders] -base: /data/dust/user/wiedersb/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_bwieders] -base: /pnfs/desy.de/cms/tier2/store/user/bwieders/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_bwieders] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/bwieders/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/bwieders/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/bwieders/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base create_file_dir: True use_cache: $CF_WLCG_USE_CACHE cache_root: $CF_WLCG_CACHE_ROOT cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 50GB - -[local_fs_dust_nprouvos] -base: /data/dust/user/prouvost/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_nprouvos] -base: /pnfs/desy.de/cms/tier2/store/user/nprouvos/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_nprouvos] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/nprouvos/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/nprouvos/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/nprouvos/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - - -[local_fs_dust_aalvesan] -base: /data/dust/user/alvesand/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_aalvesan] -base: /pnfs/desy.de/cms/tier2/store/user/aalvesan/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_aalvesan] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/aalvesan/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/aalvesan/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/aalvesan/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base +[wlcg_fs] +base: &::wlcg_fs_manivald::base +base_mkdir_rec: &::wlcg_fs_desy::webdav_base create_file_dir: True use_cache: $CF_WLCG_USE_CACHE cache_root: $CF_WLCG_CACHE_ROOT cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 50GB - -[local_fs_desy_anhaddad] -base: /pnfs/desy.de/cms/tier2/store/user/anhaddad/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_anhaddad] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/anhaddad/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/anhaddad/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/anhaddad/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base +[wlcg_fs_cernbox] +webdav_base: davs://eoscms.cern.ch:443/eos/user/${CF_CERN_USER_FIRSTCHAR}/${CF_CERN_USER}/cat/${CF_STORE_NAME} +gsiftp_base: gsiftp://eoscmsftp.cern.ch:2811/eos/user/${CF_CERN_USER_FIRSTCHAR}/${CF_CERN_USER}/cat/${CF_STORE_NAME} +xrootd_base: root://eosuser.cern.ch/eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/cat/$CF_STORE_NAME +base: &::xrootd_base create_file_dir: True use_cache: $CF_WLCG_USE_CACHE cache_root: $CF_WLCG_CACHE_ROOT cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 50GB - -[local_fs_dust_roward] -base: /data/dust/user/wardrobe/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_roward] -base: /pnfs/desy.de/cms/tier2/store/user/roward/hbt_store -local_root_depth: 3 -[wlcg_fs_desy_roward] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/roward/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/roward/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/roward/hbt_store +[wlcg_fs_desy] +webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME +gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME +xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/$CF_CERN_USER/$CF_STORE_NAME base: &::webdav_base -base_mkdir_rec: &::webdav_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - +base_filecopy: &::webdav_base +base_stat: &::webdav_base -[local_fs_dust_pgadow] -base: /data/dust/user/pgadow/hh2bbtautau/hbt_store -default_dir_perm: 0o770 -default_file_perm: 0o660 -[local_fs_desy_pgadow] -base: /pnfs/desy.de/cms/tier2/store/user/pgadow/hbt_store +[local_eos_user] +base: /eos/user/$CF_CERN_USER_FIRSTCHAR/$CF_CERN_USER/HHMultilepton_Run3/local local_root_depth: 3 -[wlcg_fs_desy_pgadow] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/pgadow/hbt_store -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/pgadow/hbt_store -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/pgadow/hbt_store -base: &::webdav_base -base_mkdir_rec: &::webdav_base -create_file_dir: True -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 50GB - - -# -# file systems for custom LFNs -# - -# remove when moved to v14 -[local_fs_run3_2022_preEE_nano_uhh_v12] -base: file:///pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3 -[wlcg_fs_run3_2022_preEE_nano_uhh_v12] -webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3 -gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3 -xrootd_base: root://dcache-cms-xrootd.desy.de:1094/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v12/prod3 -base: &::xrootd_base -use_cache: $CF_WLCG_USE_CACHE -cache_root: $CF_WLCG_CACHE_ROOT -cache_cleanup: $CF_WLCG_CACHE_CLEANUP -cache_max_size: 15GB -cache_global_lock: True +[local_fs_manivald_user] +base: /cms/store/user/$CF_CERN_USER/HHMultilepton +#============================================= +# file systems for custom NanoAOD (uhh) +#============================================= [local_fs_run3_2022_preEE_nano_uhh_v14] base: file:///pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v14/prod1 + [wlcg_fs_run3_2022_preEE_nano_uhh_v14] webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v14/prod1 gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/nprouvos/nanogen_store/MergeNano/config_22pre_v14/prod1 @@ -230,9 +75,9 @@ cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 15GB cache_global_lock: True - [local_fs_run3_2022_postEE_nano_uhh_v14] base: file:///pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v14/prod1 + [wlcg_fs_run3_2022_postEE_nano_uhh_v14] webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v14/prod1 gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/aalvesan/nanogen_store/MergeNano/config_22post_v14/prod1 @@ -244,9 +89,9 @@ cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 15GB cache_global_lock: True - [local_fs_run3_2023_preBPix_nano_uhh_v14] base: file:///pnfs/desy.de/cms/tier2/store/user/bwieders/nanogen_store/MergeNano/config_23pre_v14/prod1 + [wlcg_fs_run3_2023_preBPix_nano_uhh_v14] webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/bwieders/nanogen_store/MergeNano/config_23pre_v14/prod1 gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/bwieders/nanogen_store/MergeNano/config_23pre_v14/prod1 @@ -258,9 +103,9 @@ cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 15GB cache_global_lock: True - [local_fs_run3_2023_postBPix_nano_uhh_v14] -base: file:///pnfs/desy.de/cms/tier2/store/user/roward/nanogen_store/MergeNano/config_23post_v14/prod1 +base: file:///pnfs/desy.de/cms/tier2/store/user/roward/nanogen_store/MergeNano/config_23post_v14/prod1/prod1 + [wlcg_fs_run3_2023_postBPix_nano_uhh_v14] webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2/store/user/roward/nanogen_store/MergeNano/config_23post_v14/prod1 gsiftp_base: gsiftp://dcache-cms-gridftp.desy.de/pnfs/desy.de/cms/tier2/store/user/roward/nanogen_store/MergeNano/config_23post_v14/prod1 @@ -272,11 +117,9 @@ cache_cleanup: $CF_WLCG_CACHE_CLEANUP cache_max_size: 15GB cache_global_lock: True - -# -# file systems for central LFNs -# - +#============================================= +# file systems for central NanoAOD +#============================================= [wlcg_fs_desy_store] webdav_base: davs://dcache-cms-webdav-wan.desy.de:2880/pnfs/desy.de/cms/tier2 gsiftp_base: gsiftp://dcache-door-cms04.desy.de:2811/pnfs/desy.de/cms/tier2 @@ -289,7 +132,6 @@ cache_max_size: 15GB cache_global_lock: True cache_mtime_patience: -1 - [wlcg_fs_infn_redirector] base: root://xrootd-cms.infn.it/ use_cache: $CF_WLCG_USE_CACHE @@ -299,7 +141,6 @@ cache_max_size: 15GB cache_global_lock: True cache_mtime_patience: -1 - [wlcg_fs_us_redirector] base: root://cmsxrootd.fnal.gov/ use_cache: $CF_WLCG_USE_CACHE @@ -309,7 +150,6 @@ cache_max_size: 15GB cache_global_lock: True cache_mtime_patience: -1 - [wlcg_fs_global_redirector] base: root://cms-xrd-global.cern.ch/ use_cache: $CF_WLCG_USE_CACHE diff --git a/law_outputs.cfg b/law_outputs.cfg index 8bec1576..ec78482c 100644 --- a/law_outputs.cfg +++ b/law_outputs.cfg @@ -1,11 +1,10 @@ -# +#====================================== # output location and version settings -# +#====================================== [outputs] - # list of all used file systems -wlcg_file_systems: wlcg_fs, wlcg_fs_desy, wlcg_fs_cernbox, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector +wlcg_file_systems: wlcg_fs, local_fs_manivald_user, wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirector, wlcg_fs_manivald, local_eos_user, wlcg_fs_cernbox # list of file systems used by columnflow.tasks.external.GetDatasetLFNs.iter_nano_files to # look for the correct fs per nano input file (in that order) @@ -18,83 +17,94 @@ lfn_sources: wlcg_fs_desy_store, wlcg_fs_infn_redirector, wlcg_fs_global_redirec # the "store_parts_modifiers" can be the name of a function in the "store_parts_modifiers" aux dict # of the analysis instance, which is called with an output's store parts of an output to modify them # specific locations + +# at CERN: +# ???_fs: wlcg_mirrored, local_eos_user, wlcg_fs_cernbox + ; 22pre -22pre_fs_dcache: wlcg_mirrored, local_fs_desy_nprouvos, wlcg_fs_desy_nprouvos -22pre_fs_local: local, local_fs_dust_nprouvos -22pre_v1?__cf.{Calibrate,Select,Reduce}Events: &::22pre_fs_dcache -22pre_v1?__cf.MergeReducedEvents: &::22pre_fs_local -22pre_v1?__cf.Merge{Reduction,Selection}Stats: &::22pre_fs_local -22pre_v1?__cf.{Produce,Unite}Columns: &::22pre_fs_local -22pre_v1?__cf.*ML*: &::22pre_fs_local -22pre_v1?__cf.{Create,Merge,MergeShifted}Histograms: &::22pre_fs_local -22pre_v1?__cf.CreateDatacards: &::22pre_fs_local +22pre_fs: wlcg, wlcg_fs_manivald +cfg_22pre_v1?__task_cf.{Calibrate,Select,Reduce}Events: &::22pre_fs +cfg_22pre_v1?__task_cf.MergeReducedEvents: &::22pre_fs +cfg_22pre_v1?__task_cf.Merge{Reduction,Selection}Stats: &::22pre_fs +cfg_22pre_v1?__task_cf.{Produce,Unite}Columns: &::22pre_fs +cfg_22pre_v1?__task_cf.*ML*: &::22pre_fs +cfg_22pre_v1?__task_cf.{Create,Merge,MergeShifted}Histograms: &::22pre_fs +cfg_22pre_v1?__task_cf.CreateDatacards: &::22pre_fs + ; 22post -22post_fs_dcache: wlcg_mirrored, local_fs_desy_aalvesan, wlcg_fs_desy_aalvesan -22post_fs_local: local, local_fs_dust_aalvesan -22post_v1?__cf.{Calibrate,Select,Reduce}Events: &::22post_fs_dcache -22post_v1?__cf.MergeReducedEvents: &::22post_fs_local -22post_v1?__cf.Merge{Reduction,Selection}Stats: &::22post_fs_local -22post_v1?__cf.{Produce,Unite}Columns: &::22post_fs_local -22post_v1?__cf.*ML*: &::22post_fs_local -22post_v1?__cf.{Create,Merge,MergeShifted}Histograms: &::22post_fs_local -22post_v1?__cf.CreateDatacards: &::22post_fs_local +22post_fs: wlcg, wlcg_fs_manivald +cfg_22post_v1?__task_cf.{Calibrate,Select,Reduce}Events: &::22post_fs +cfg_22post_v1?__task_cf.MergeReducedEvents: &::22post_fs +cfg_22post_v1?__task_cf.Merge{Reduction,Selection}Stats: &::22post_fs +cfg_22post_v1?__task_cf.{Produce,Unite}Columns: &::22post_fs +cfg_22post_v1?__task_cf.*ML*: &::22post_fs +cfg_22post_v1?__task_cf.{Create,Merge,MergeShifted}Histograms: &::22post_fs +cfg_22post_v1?__task_cf.CreateDatacards: &::22post_fs + ; 23pre -23pre_fs_dcache: wlcg_mirrored, local_fs_desy_bwieders, wlcg_fs_desy_bwieders -23pre_fs_local: local, local_fs_dust_bwieders -23pre_v1?__cf.{Calibrate,Select,Reduce}Events: &::23pre_fs_dcache -23pre_v1?__cf.MergeReducedEvents: &::23pre_fs_local -23pre_v1?__cf.Merge{Reduction,Selection}Stats: &::23pre_fs_local -23pre_v1?__cf.{Produce,Unite}Columns: &::23pre_fs_local -23pre_v1?__cf.*ML*: &::23pre_fs_local -23pre_v1?__cf.{Create,Merge,MergeShifted}Histograms: &::23pre_fs_local -23pre_v1?__cf.CreateDatacards: &::23pre_fs_local +23pre_fs: wlcg, wlcg_fs_manivald +cfg_23pre_v1?__task_cf.{Calibrate,Select,Reduce}Events: &::23pre_fs +cfg_23pre_v1?__task_cf.MergeReducedEvents: &::23pre_fs +cfg_23pre_v1?__task_cf.Merge{Reduction,Selection}Stats: &::23pre_fs +cfg_23pre_v1?__task_cf.{Produce,Unite}Columns: &::23pre_fs +cfg_23pre_v1?__task_cf.*ML*: &::23pre_fs +cfg_23pre_v1?__task_cf.{Create,Merge,MergeShifted}Histograms: &::23pre_fs +cfg_23pre_v1?__task_cf.CreateDatacards: &::23pre_fs + ; 23post -23post_fs_dcache: wlcg_mirrored, local_fs_desy_roward, wlcg_fs_desy_roward -23post_fs_local: local, local_fs_dust_mrieger -23post_v1?__cf.{Calibrate,Select,Reduce}Events: &::23post_fs_dcache -23post_v1?__cf.MergeReducedEvents: &::23post_fs_local -23post_v1?__cf.Merge{Reduction,Selection}Stats: &::23post_fs_local -23post_v1?__cf.{Produce,Unite}Columns: &::23post_fs_local -23post_v1?__cf.*ML*: &::23post_fs_local -23post_v1?__cf.{Create,Merge,MergeShifted}Histograms: &::23post_fs_local -23post_v1?__cf.CreateDatacards: &::23post_fs_local +23post_fs: wlcg, wlcg_fs_manivald +cfg_23post_v1?__task_cf.{Calibrate,Select,Reduce}Events: &::23post_fs +cfg_23post_v1?__task_cf.MergeReducedEvents: &::23post_fs +cfg_23post_v1?__task_cf.Merge{Reduction,Selection}Stats: &::23post_fs +cfg_23post_v1?__task_cf.{Produce,Unite}Columns: &::23post_fs +cfg_23post_v1?__task_cf.*ML*: &::23post_fs +cfg_23post_v1?__task_cf.{Create,Merge,MergeShifted}Histograms: &::23post_fs +cfg_23post_v1?__task_cf.CreateDatacards: &::23post_fs -; syncronization -22pre_v12_sync: local, local_fs_dust_bwieders +; 24 +24_fs: wlcg, wlcg_fs_manivald +cfg_24_v1?__task_cf.{Calibrate,Select,Reduce}Events: &::24_fs +cfg_24_v1?__task_cf.MergeReducedEvents: &::24_fs +cfg_24_v1?__task_cf.Merge{Reduction,Selection}Stats: &::24_fs +cfg_24_v1?__task_cf.{Produce,Unite}Columns: &::24_fs +cfg_24_v1?__task_cf.*ML*: &::24_fs +cfg_24t_v1?__task_cf.{Create,Merge,MergeShifted}Histograms: &::24_fs +cfg_24_v1?__task_cf.CreateDatacards: &::24_fs # fallbacks -cf.BundleRepo: wlcg -cf.BundleSoftware: wlcg -cf.BundleBashSandbox: wlcg -cf.BundleCMSSWSandbox: wlcg -cf.BundleExternalFiles: wlcg -cf.GetDatasetLFNs: wlcg -cf.CalibrateEvents: wlcg -cf.SelectEvents: wlcg -cf.MergeSelectionStats: wlcg -cf.MergeSelectionMasks: wlcg -cf.ReduceEvents: wlcg -cf.MergeReductionStats: wlcg -cf.MergeReducedEvents: wlcg -cf.ProduceColumns: wlcg -cf.CreatePileupWeights: wlcg -cf.PrepareMLEvents: wlcg -cf.MergeMLEvents: wlcg -cf.MLTraining: local -cf.MLEvaluation: wlcg -cf.UniteColumns: wlcg -cf.CreateSyncFile: local +base_out: wlcg, wlcg_fs_manivald +task_cf.BundleRepo: wlcg, wlcg_fs_manivald +task_cf.BundleSoftware: wlcg, wlcg_fs_manivald +task_cf.BundleBashSandbox: wlcg, wlcg_fs_manivald +task_cf.BundleCMSSWSandbox: wlcg, wlcg_fs_manivald +task_cf.BundleExternalFiles: wlcg, wlcg_fs_manivald +task_cf.GetDatasetLFNs: wlcg, wlcg_fs_manivald +task_cf.CalibrateEvents: &::base_out +task_cf.SelectEvents: &::base_out +task_cf.MergeSelectionStats: &::base_out +task_cf.MergeSelectionMasks: &::base_out +task_cf.ReduceEvents: &::base_out +task_cf.MergeReductionStats: &::base_out +task_cf.MergeReducedEvents: &::base_out +task_cf.ProduceColumns: &::base_out +task_cf.CreatePileupWeights: &::base_out +task_cf.PrepareMLEvents: &::base_out +task_cf.MergeMLEvents: &::base_out +task_cf.MLTraining: &::base_out +task_cf.MLEvaluation: &::base_out +task_cf.UniteColumns: &::base_out +task_cf.CreateSyncFile: &::base_out +task_cf.PlotVariables1D : &::base_out [versions] - # updated categorization in default producer -{22,23}{pre,post}_v14__cf.ProduceColumns__prod_default: prod6 +# {22,23}{pre,post}_v14__cf.ProduceColumns__prod_default: prod6 # added eec and slightly adjusted selection -{22,23}{pre,post}_v14__cf.CalibrateEvents: prod5 -{22,23}{pre,post}_v14__cf.{SelectEvents,MergeSelectionStats,ReduceEvents,MergeReductionStats,ProvideReducedEvents}: prod5 -{22,23}{pre,post}_v14__cf.ProduceColumns: prod5 +# {22,23}{pre,post}_v14__cf.CalibrateEvents: prod5 +# {22,23}{pre,post}_v14__cf.{SelectEvents,MergeSelectionStats,ReduceEvents,MergeReductionStats,ProvideReducedEvents}: prod5 +# {22,23}{pre,post}_v14__cf.ProduceColumns: prod5 # for first plots (16.1.2025) # 22pre_v14__cf.CalibrateEvents: prod4 diff --git a/modules/cmsdb b/modules/cmsdb index 495f4957..aa0d2f25 160000 --- a/modules/cmsdb +++ b/modules/cmsdb @@ -1 +1 @@ -Subproject commit 495f4957e97b21fcea29454d16fdf7b95ab1e1ad +Subproject commit aa0d2f253fdf64b95b2bdbd3fa89d36f2956e2b1 diff --git a/modules/columnflow b/modules/columnflow index ce7fddd1..9257b49a 160000 --- a/modules/columnflow +++ b/modules/columnflow @@ -1 +1 @@ -Subproject commit ce7fddd1d3a42342b7f30f0badcb563b538db964 +Subproject commit 9257b49a51e99e842ae9dd4569bc32a2daae7ed6 diff --git a/hbt/__init__.py b/multilepton/__init__.py similarity index 63% rename from hbt/__init__.py rename to multilepton/__init__.py index 9e393f0c..cbcb0def 100644 --- a/hbt/__init__.py +++ b/multilepton/__init__.py @@ -2,10 +2,8 @@ import law -from hbt.columnflow_patches import patch_all - +from multilepton.columnflow_patches import patch_all law.contrib.load("pandas") - # apply cf patches once patch_all() diff --git a/hbt/calibration/__init__.py b/multilepton/calibration/__init__.py similarity index 100% rename from hbt/calibration/__init__.py rename to multilepton/calibration/__init__.py diff --git a/multilepton/calibration/default.py b/multilepton/calibration/default.py new file mode 100644 index 00000000..ffa8df68 --- /dev/null +++ b/multilepton/calibration/default.py @@ -0,0 +1,196 @@ +# coding: utf-8 + +""" +Calibration methods. +""" + +from __future__ import annotations + +from columnflow.calibration import Calibrator, calibrator +from columnflow.calibration.cms.met import met_phi_run2, met_phi +from columnflow.calibration.cms.jets import jec, jer +from columnflow.calibration.cms.tau import tec +from columnflow.calibration.cms.egamma import electron_scale_smear +from columnflow.production.cms.mc_weight import mc_weight +from columnflow.production.cms.electron import electron_sceta +from columnflow.production.cms.seeds import ( + deterministic_event_seeds, deterministic_jet_seeds, deterministic_electron_seeds, +) +from columnflow.util import maybe_import + +from multilepton.util import IF_RUN_3, IF_DATA, IF_MC + +ak = maybe_import("awkward") + + +# custom seed producer skipping GenPart fields +custom_deterministic_event_seeds_mc = deterministic_event_seeds.derive( + "custom_deterministic_event_seeds_mc", + cls_dict={ + "object_count_columns": [ + route for route in deterministic_event_seeds.object_count_columns + if not str(route).startswith(("GenPart.", "Photon.")) + ], + }, +) +custom_deterministic_event_seeds_data = custom_deterministic_event_seeds_mc.derive( + "custom_deterministic_event_seeds_data", + cls_dict={ + "event_columns": [ + route for route in custom_deterministic_event_seeds_mc.event_columns + if not str(route).startswith("Pileup.nPU") + ], + "object_count_columns": [ + route for route in custom_deterministic_event_seeds_mc.object_count_columns + if not str(route).startswith("GenJet.") + ], + }, +) + + +@calibrator( + uses={ + IF_MC(mc_weight, custom_deterministic_event_seeds_mc), + IF_DATA(custom_deterministic_event_seeds_data), + deterministic_jet_seeds, deterministic_electron_seeds, electron_sceta, + }, + produces={ + IF_MC(mc_weight, custom_deterministic_event_seeds_mc), + IF_DATA(custom_deterministic_event_seeds_data), + deterministic_jet_seeds, deterministic_electron_seeds, + }, +) +def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array: + task = kwargs["task"] + + if self.dataset_inst.is_mc: + events = self[mc_weight](events, **kwargs) + + # seed producers + # !! as this is the first step, the object collections should still be pt-sorted, + # !! so no manual sorting needed here (but necessary if, e.g., jec is applied before) + if self.dataset_inst.is_mc: + events = self[custom_deterministic_event_seeds_mc](events, **kwargs) + else: + events = self[custom_deterministic_event_seeds_data](events, **kwargs) + events = self[deterministic_jet_seeds](events, **kwargs) + events = self[deterministic_electron_seeds](events, **kwargs) + + # optional electron sceta production + if "superclusterEta" not in events.Electron.fields: + events = self[electron_sceta](events, **kwargs) + + # data/mc specific calibrations + if self.dataset_inst.is_data: + # nominal jec + events = self[self.jec_nominal_cls](events, **kwargs) + # nominal ess + events = self[self.ess_nominal_cls](events, **kwargs) + else: + # for mc, when the nominal shift is requested, apply calibrations with uncertainties (i.e. full), otherwise + # invoke calibrators configured not to evaluate and save uncertainties + if task.global_shift_inst.is_nominal: + # full jec and jer + events = self[self.jec_full_cls](events, **kwargs) + events = self[self.deterministic_jer_jec_full_cls](events, **kwargs) + # full tec + events = self[self.tec_full_cls](events, **kwargs) + # full ess + events = self[self.ess_full_cls](events, **kwargs) + else: + # nominal jec and jer + events = self[self.jec_nominal_cls](events, **kwargs) + events = self[self.deterministic_jec_jec_nominal_cls](events, **kwargs) + # nominal tec + events = self[self.tec_nominal_cls](events, **kwargs) + # nominal ess + events = self[self.ees_nominal_cls](events, **kwargs) + + # apply met phi correction + if self.has_dep(self.met_phi_cls): + events = self[self.met_phi_cls](events, **kwargs) + + return events + + +@default.init +def default_init(self: Calibrator, **kwargs) -> None: + # set the name of the met collection to use + met_name = self.config_inst.x.met_name + raw_met_name = self.config_inst.x.raw_met_name + + # derive calibrators to add settings once + flag = f"custom_calibs_registered_{self.cls_name}" + if not self.config_inst.x(flag, False): + def add_calib_cls(name, base, cls_dict=None): + self.config_inst.set_aux(f"calib_{name}_cls", base.derive(name, cls_dict=cls_dict or {})) + + # jec calibrators + add_calib_cls("jec_full", jec, cls_dict={ + "mc_only": True, + "met_name": met_name, + "raw_met_name": raw_met_name, + }) + add_calib_cls("jec_nominal", jec, cls_dict={ + "uncertainty_sources": [], + "met_name": met_name, + "raw_met_name": raw_met_name, + }) + # versions of jer that use the first random number from deterministic_seeds + add_calib_cls("deterministic_jer_jec_full", jer, cls_dict={ + "deterministic_seed_index": 0, + "met_name": met_name, + }) + add_calib_cls("deterministic_jec_jec_nominal", jer, cls_dict={ + "deterministic_seed_index": 0, + "met_name": met_name, + "jec_uncertainty_sources": [], + }) + # derive tec calibrators + add_calib_cls("tec_full", tec, cls_dict={ + "propagate_met": False, # not needed after JET-to-MET propagation + }) + add_calib_cls("tec_nominal", tec, cls_dict={ + "propagate_met": False, # not needed after JET-to-MET propagation + "with_uncertainties": False, + }) + # derive electron scale and resolution calibrators + add_calib_cls("ess_full", electron_scale_smear, cls_dict={ + "deterministic_seed_index": 0, + }) + add_calib_cls("ess_nominal", electron_scale_smear, cls_dict={ + "deterministic_seed_index": 0, + "with_uncertainties": False, + }) + # derive met_phi calibrator + add_calib_cls("met_phi", met_phi_run2 if self.config_inst.campaign.x.run == 2 else met_phi) + + # change the flag + self.config_inst.set_aux(flag, True) + + # store references to classes + self.jec_full_cls = self.config_inst.x.calib_jec_full_cls + self.jec_nominal_cls = self.config_inst.x.calib_jec_nominal_cls + self.deterministic_jer_jec_full_cls = self.config_inst.x.calib_deterministic_jer_jec_full_cls + self.deterministic_jec_jec_nominal_cls = self.config_inst.x.calib_deterministic_jec_jec_nominal_cls + self.tec_full_cls = self.config_inst.x.calib_tec_full_cls + self.tec_nominal_cls = self.config_inst.x.calib_tec_nominal_cls + self.ess_full_cls = self.config_inst.x.calib_ess_full_cls + self.ess_nominal_cls = self.config_inst.x.calib_ess_nominal_cls + self.met_phi_cls = self.config_inst.x.calib_met_phi_cls + + # collect derived calibrators and add them to the calibrator uses and produces + derived_calibrators = { + self.jec_full_cls, + self.jec_nominal_cls, + self.deterministic_jer_jec_full_cls, + self.deterministic_jec_jec_nominal_cls, + self.tec_full_cls, + self.tec_nominal_cls, + IF_RUN_3(self.ess_full_cls), + IF_RUN_3(self.ess_nominal_cls), + self.met_phi_cls, + } + self.uses |= derived_calibrators + self.produces |= derived_calibrators + diff --git a/hbt/categorization/__init__.py b/multilepton/categorization/__init__.py similarity index 100% rename from hbt/categorization/__init__.py rename to multilepton/categorization/__init__.py diff --git a/multilepton/categorization/default.py b/multilepton/categorization/default.py new file mode 100644 index 00000000..419cf015 --- /dev/null +++ b/multilepton/categorization/default.py @@ -0,0 +1,596 @@ +# coding: utf-8 + +""" +HH -> multi-leptons selection methods. +""" + +from columnflow.categorization import Categorizer, categorizer +from columnflow.util import maybe_import + +ak = maybe_import("awkward") + + +@categorizer(uses={"event"}) +def cat_all(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # keep all events + return events, ak.ones_like(events.event) == 1 + +# +# di-lepton channels +# +@categorizer(uses={"channel_id"}) +def cat_etau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cetau.id + + +@categorizer(uses={"channel_id"}) +def cat_mutau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cmutau.id + + +@categorizer(uses={"channel_id"}) +def cat_tautau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ctautau.id + + +@categorizer(uses={"channel_id"}) +def cat_ee(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cee.id + + +@categorizer(uses={"channel_id"}) +def cat_mumu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cmumu.id + + +@categorizer(uses={"channel_id"}) +def cat_emu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cemu.id + + +# multilepton channels +@categorizer(uses={"channel_id"}) +def cat_3e(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c3e.id + + +@categorizer(uses={"channel_id"}) +def cat_2emu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2emu.id + + +@categorizer(uses={"channel_id"}) +def cat_e2mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ce2mu.id + + +@categorizer(uses={"channel_id"}) +def cat_3mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c3mu.id + + +@categorizer(uses={"channel_id"}) +def cat_4e(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c4e.id + + +@categorizer(uses={"channel_id"}) +def cat_3emu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c3emu.id + + +@categorizer(uses={"channel_id"}) +def cat_2e2mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2e2mu.id + + +@categorizer(uses={"channel_id"}) +def cat_e3mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ce3mu.id + + +@categorizer(uses={"channel_id"}) +def cat_4mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c4mu.id + + +@categorizer(uses={"channel_id"}) +def cat_3etau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c3etau.id + + +@categorizer(uses={"channel_id"}) +def cat_2emutau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2emutau.id + + +@categorizer(uses={"channel_id"}) +def cat_e2mutau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ce2mutau.id + + +@categorizer(uses={"channel_id"}) +def cat_3mutau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c3mutau.id + + +@categorizer(uses={"channel_id"}) +def cat_2e2tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2e2tau.id + + +@categorizer(uses={"channel_id"}) +def cat_emu2tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cemu2tau.id + + +@categorizer(uses={"channel_id"}) +def cat_2mu2tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2mu2tau.id + + +@categorizer(uses={"channel_id"}) +def cat_e3tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.ce3tau.id + + +@categorizer(uses={"channel_id"}) +def cat_mu3tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cmu3tau.id + + +@categorizer(uses={"channel_id"}) +def cat_4tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c4tau.id + + +@categorizer(uses={"channel_id"}) +def cat_c2e0or1tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2e0or1tau.id + + +@categorizer(uses={"channel_id"}) +def cat_cemu0or1tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.cemu0or1tau.id + + +@categorizer(uses={"channel_id"}) +def cat_c2mu0or1tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.channel_id == self.config_inst.channels.n.c2mu0or1tau.id + + +# 3l/4l inclusive, later split into CR / SR via Z-peak +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_3l0tau_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c3e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce2mu.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_3l0tau_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c3e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce2mu.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id"}) +def cat_4l(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2e2mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c4mu.id) + return events, catmask + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_4l_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2e2mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c4mu.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_4l_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2e2mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c4mu.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", + "Tau.charge", "leptons_os"}) +def cat_3l1tau_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c3etau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2emutau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce2mutau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3mutau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", + "Tau.charge", "leptons_os"}) +def cat_3l1tau_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c3etau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2emutau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce2mutau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3mutau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", + "Tau.charge", "leptons_os"}) +def cat_2l2tau_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e2tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu2tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu2tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", + "Tau.charge", "leptons_os"}) +def cat_2l2tau_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e2tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu2tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu2tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", + "Tau.charge", "leptons_os"}) +def cat_1l3tau_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.ce3tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cmu3tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", + "Muon.charge", "Tau.charge", "leptons_os"}) +def cat_1l3tau_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.ce3tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cmu3tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Tau.charge", "leptons_os"}) +def cat_4tau_SR(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4tau.id + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SR & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Tau.charge", "leptons_os"}) +def cat_4tau_SB(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4tau.id + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + chargeok = events.leptons_os == 1 + return events, (catmask & bveto & SB & chargeok) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_2l0or1tau_SR_SS(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e0or1tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu0or1tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu0or1tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + SS = events.leptons_os == 0 + return events, (catmask & bveto & SR & SS) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_2l0or1tau_SR_OS(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e0or1tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu0or1tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu0or1tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SR = events.tight_sel == 1 + OS = events.leptons_os == 1 + return events, (catmask & bveto & SR & OS) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_2l0or1tau_SB_SS(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e0or1tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu0or1tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu0or1tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + SS = events.leptons_os == 0 + return events, (catmask & bveto & SB & SS) + + +@categorizer(uses={"channel_id", "Jet.btagPNetB", "tight_sel", "Electron.charge", "Muon.charge", "leptons_os"}) +def cat_2l0or1tau_SB_OS(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c2e0or1tau.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.cemu0or1tau.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2mu0or1tau.id) + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + bveto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + SB = events.tight_sel == 0 + OS = events.leptons_os == 1 + return events, (catmask & bveto & SB & OS) + +# bveto +@categorizer(uses={"Jet.btagPNetB"}) +def cat_bveto_on(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + veto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + return events, veto + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_bveto_off(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + nonveto = (ak.sum(tagged_loose, axis=1) >= 2) | (ak.sum(tagged_medium, axis=1) >= 1) + return events, nonveto + + +# The BDT category overlaps with our channels, so we need tight/trigger-matched flags individual for this cat +@categorizer(uses={"ok_bdt_eormu"}) +def cat_e_or_mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.ok_bdt_eormu == 1 + + +@categorizer(uses={"ok_bdt_eormu_bveto", "Jet.btagPNetB"}) +def cat_e_or_mu_bveto(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + veto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + return events, ((events.ok_bdt_eormu_bveto == 1) & veto) + + +@categorizer(uses={"tight_sel_bdt"}) +def cat_tight_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight true + return events, events.tight_sel_bdt == 1 + + +@categorizer(uses={"tight_sel_bdt"}) +def cat_nontight_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight false + return events, events.tight_sel_bdt == 0 + + +@categorizer(uses={"trig_match_bdt"}) +def cat_trigmatch_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match + return events, events.trig_match_bdt == 1 + + +@categorizer(uses={"trig_match_bdt"}) +def cat_nontrigmatch_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match false + return events, events.trig_match_bdt == 0 + + +# Tight and trigger matching flags for the physical channels +@categorizer(uses={"tight_sel"}) +def cat_tight(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight true + return events, events.tight_sel == 1 + + +@categorizer(uses={"tight_sel"}) +def cat_nontight(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight false + return events, events.tight_sel == 0 + + +@categorizer(uses={"trig_match"}) +def cat_trigmatch(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match + return events, events.trig_match == 1 + + +@categorizer(uses={"trig_match"}) +def cat_nontrigmatch(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match false + return events, events.trig_match == 0 + + +# QCD regions +@categorizer(uses={"leptons_os"}) +def cat_os(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # oppositive sign leptons + return events, events.leptons_os == 1 + + +@categorizer(uses={"leptons_os"}) +def cat_ss(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # same sign leptons + return events, events.leptons_os == 0 + + +@categorizer(uses={"tau2_isolated"}) +def cat_iso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # isolated tau2 + return events, events.tau2_isolated == 1 + + +@categorizer(uses={"tau2_isolated"}) +def cat_noniso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # noon-isolated tau2 + return events, events.tau2_isolated == 0 + + +# kinematic regions +@categorizer(uses={"event"}) +def cat_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # fully inclusive selection + return events, ak.ones_like(events.event) == 1 + + +@categorizer(uses={"Jet.{pt,phi}"}) +def cat_2j(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # two or more jets + return events, ak.num(events.Jet.pt, axis=1) >= 2 + + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_res1b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # exactly pnet b-tags + wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged = events.Jet.btagPNetB > wp + return events, ak.sum(tagged, axis=1) == 1 + + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_res2b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # at least two medium pnet b-tags + wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged = events.Jet.btagPNetB > wp + return events, ak.sum(tagged, axis=1) >= 2 + + +@categorizer(uses={cat_res1b, cat_res2b, "FatJet.{pt,phi}"}) +def cat_boosted(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # not res1b or res2b, and exactly one selected fat jet that should also pass a tighter pt cut + # note: this is just a draft + mask = ( + (ak.num(events.FatJet, axis=1) == 1) & + (ak.sum(events.FatJet.pt > 350, axis=1) == 1) & + ~self[cat_res1b](events, **kwargs)[1] & + ~self[cat_res2b](events, **kwargs)[1] + ) + return events, mask + +@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) +def cat_dy(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # e/mu driven DY region: mll > 40 and met < 30 (to supress tau decays into e/mu) + leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] + mask = ( + (leps.sum(axis=1).mass > 40) & + (events[self.config_inst.x.met_name].pt < 30) + ) + return events, mask + + +@cat_dy.init +def cat_dy_init(self: Categorizer) -> None: + self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") + + +@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) +def cat_tt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tt region: met > 30 (due to neutrino presence in leptonic w decays) + mask = events[self.config_inst.x.met_name].pt > 30 + return events, mask + + +@cat_tt.init +def cat_tt_init(self: Categorizer) -> None: + self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") diff --git a/multilepton/categorization/default_wip.py b/multilepton/categorization/default_wip.py new file mode 100644 index 00000000..b8719a6c --- /dev/null +++ b/multilepton/categorization/default_wip.py @@ -0,0 +1,240 @@ +# coding: utf-8 + +""" +HH -> multi-leptons selection methods. +""" + +from columnflow.categorization import Categorizer, categorizer +from columnflow.util import maybe_import +import inspect + +ak = maybe_import("awkward") + +MultiLeptonsChannels = [ + "etau", "mutau", "tautau", "ee", "mumu", "emu", + "3e", "2emu", "e2mu", "3mu", "4e", "3emu", "2e2mu", "e3mu", "4mu", + "3etau", "2emutau", "e2mutau", "3mutau", + "2e2tau", "emu2tau", "2mu2tau", "e3tau", "mu3tau", "4tau", + "2ess", "emuss", "2muss" +] +# exceptions to the "c{name}" rule (if any don't follow the convention) +CHANNEL_EXCEPTIONS = { + # Example: "mutau" : "cmutau_alt" +} + + +@categorizer(uses={"event"}) +def cat_all(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + """Keep all events.""" + return events, ak.ones_like(events.event, dtype=bool) + +def _make_channel_categorizer(name: str, channel_key: str): + @categorizer(uses={"channel_id"}) + def func(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + if not hasattr(self.config_inst, "channels") or not hasattr(self.config_inst.channels.n, channel_key): + # fallback: mark all as False until config is ready + return events, ak.zeros_like(events.channel_id, dtype=bool) + return events, events.channel_id == getattr(self.config_inst.channels.n, channel_key).id + + func.__name__ = f"cat_{name}" + func.__qualname__ = f"cat_{name}" + func.__doc__ = f"Select events belonging to the {name} channel." + return func + +def _register_channel_categorizers(): + """Scan config_inst.channels.n and generate all matching categorizer functions.""" + # Loop over attributes in channels.n + for attr in dir(Categorizer.config_inst.channels.n): # type: ignore + if attr.startswith("c"): # only consider channel-like entries + name = attr[1:] # e.g. cetau -> etau + # Skip special/internal attributes + if not hasattr(Categorizer.config_inst.channels.n, attr): + continue + func = _make_channel_categorizer(name, attr) + globals()[func.__name__] = func + +def register_multilepton_categorizers(): + try: + _register_channel_categorizers() + except Exception as e: + print(f"[INFO] Could not register categorizers yet: {e}") + +# ------------------------------------------------------------------------ +# FALLBACK: If config_inst is not yet initialized (e.g. during import) +# just define the functions based on known channel names +# ------------------------------------------------------------------------ +# Generate these upfront so the file works standalone +for name in MultiLeptonsChannels: + chkey = CHANNEL_EXCEPTIONS.get(name, f"c{name}") + globals()[f"cat_{name}"] = _make_channel_categorizer(name, chkey) + +register_multilepton_categorizers() +# ------------------------------------------------------------------------ +# other Categories +# ------------------------------------------------------------------------ +# 3l/4l inclusive, later split into CR / SR via Z-peak +@categorizer(uses={"channel_id"}) +def cat_3l0tau(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c3e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce2mu.id) + return events, catmask + +@categorizer(uses={"channel_id"}) +def cat_4l(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + catmask = events.channel_id == self.config_inst.channels.n.c4e.id + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c3emu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c2e2mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.ce3mu.id) + catmask = catmask | (events.channel_id == self.config_inst.channels.n.c4mu.id) + return events, catmask + +# bveto +@categorizer(uses={"Jet.btagPNetB"}) +def cat_bveto_on(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + veto = (ak.sum(tagged_loose, axis=1) < 2) & (ak.sum(tagged_medium, axis=1) < 1) + return events, veto + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_bveto_off(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + wp_loose = self.config_inst.x.btag_working_points["particleNet"]["loose"] + wp_medium = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged_loose = events.Jet.btagPNetB > wp_loose + tagged_medium = events.Jet.btagPNetB > wp_medium + nonveto = (ak.sum(tagged_loose, axis=1) >= 2) | (ak.sum(tagged_medium, axis=1) >= 1) + return events, nonveto + +# The BDT category overlaps with our channels, so we need tight/trigger-matched flags individual for this cat +@categorizer(uses={"ok_bdt_eormu"}) +def cat_e_or_mu(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + return events, events.ok_bdt_eormu == 1 + +@categorizer(uses={"tight_sel_bdt"}) +def cat_tight_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight true + return events, events.tight_sel_bdt == 1 + +@categorizer(uses={"tight_sel_bdt"}) +def cat_nontight_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight false + return events, events.tight_sel_bdt == 0 + +@categorizer(uses={"trig_match_bdt"}) +def cat_trigmatch_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match + return events, events.trig_match_bdt == 1 + +@categorizer(uses={"trig_match_bdt"}) +def cat_nontrigmatch_bdt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match false + return events, events.trig_match_bdt == 0 + +# Tight and trigger matching flags for the physical channels +@categorizer(uses={"tight_sel"}) +def cat_tight(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight true + return events, events.tight_sel == 1 + +@categorizer(uses={"tight_sel"}) +def cat_nontight(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tight false + return events, events.tight_sel == 0 + +@categorizer(uses={"trig_match"}) +def cat_trigmatch(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match + return events, events.trig_match == 1 + +@categorizer(uses={"trig_match"}) +def cat_nontrigmatch(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # trig match false + return events, events.trig_match == 0 + +# QCD regions +@categorizer(uses={"leptons_os"}) +def cat_os(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # oppositive sign leptons + return events, events.leptons_os == 1 + +@categorizer(uses={"leptons_os"}) +def cat_ss(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # same sign leptons + return events, events.leptons_os == 0 + +@categorizer(uses={"tau2_isolated"}) +def cat_iso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # isolated tau2 + return events, events.tau2_isolated == 1 + +@categorizer(uses={"tau2_isolated"}) +def cat_noniso(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # noon-isolated tau2 + return events, events.tau2_isolated == 0 + +# kinematic regions +@categorizer(uses={"event"}) +def cat_incl(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # fully inclusive selection + return events, ak.ones_like(events.event) == 1 + +@categorizer(uses={"Jet.{pt,phi}"}) +def cat_2j(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # two or more jets + return events, ak.num(events.Jet.pt, axis=1) >= 2 + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_res1b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # exactly pnet b-tags + wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged = events.Jet.btagPNetB > wp + return events, ak.sum(tagged, axis=1) == 1 + +@categorizer(uses={"Jet.btagPNetB"}) +def cat_res2b(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # at least two medium pnet b-tags + wp = self.config_inst.x.btag_working_points["particleNet"]["medium"] + tagged = events.Jet.btagPNetB > wp + return events, ak.sum(tagged, axis=1) >= 2 + +@categorizer(uses={cat_res1b, cat_res2b, "FatJet.{pt,phi}"}) +def cat_boosted(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # not res1b or res2b, and exactly one selected fat jet that should also pass a tighter pt cut + # note: this is just a draft + mask = ( + (ak.num(events.FatJet, axis=1) == 1) & + (ak.sum(events.FatJet.pt > 350, axis=1) == 1) & + ~self[cat_res1b](events, **kwargs)[1] & + ~self[cat_res2b](events, **kwargs)[1] + ) + return events, mask + +@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) +def cat_dy(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # e/mu driven DY region: mll > 40 and met < 30 (to supress tau decays into e/mu) + leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] + mask = ( + (leps.sum(axis=1).mass > 40) & + (events[self.config_inst.x.met_name].pt < 30) + ) + return events, mask + +@cat_dy.init +def cat_dy_init(self: Categorizer) -> None: + self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") + +@categorizer(uses={"{Electron,Muon,Tau}.{pt,eta,phi,mass}"}) +def cat_tt(self: Categorizer, events: ak.Array, **kwargs) -> tuple[ak.Array, ak.Array]: + # tt region: met > 30 (due to neutrino presence in leptonic w decays) + mask = events[self.config_inst.x.met_name].pt > 30 + return events, mask + +@cat_tt.init +def cat_tt_init(self: Categorizer) -> None: + self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") + + diff --git a/multilepton/columnflow_patches.py b/multilepton/columnflow_patches.py new file mode 100644 index 00000000..2e94f1ac --- /dev/null +++ b/multilepton/columnflow_patches.py @@ -0,0 +1,165 @@ +# coding: utf-8 + +""" +Collection of patches of underlying columnflow tasks. +""" + +import os +import law +import getpass + +from columnflow.util import memoize + + +logger = law.logger.get_logger(__name__) + + +@memoize +def patch_columnar_pyarrow_version(): + """ + Comments out the pyarrow==21.0.0 line in the columnar.txt sandbox file. + """ + columnar_path = os.path.join( + os.environ["MULTILEPTON_BASE"], "modules", "columnflow", "sandboxes", "columnar.txt" + ) + + if not os.path.exists(columnar_path): + logger.warning(f"File not found: {columnar_path}") + return + with open(columnar_path, "r") as f: + lines = f.readlines() + + new_lines = [] + for line in lines: + if "pyarrow==" in line and not line.strip().startswith("#"): + new_lines.append(f"# {line.strip()}\n") + else: + new_lines.append(line) + + with open(columnar_path, "w") as f: + f.writelines(new_lines) + logger.debug(f"Patched {columnar_path}: commented out pyarrow requirement") + + +@memoize +def patch_bundle_repo_exclude_files(): + """ + Patches the exclude_files attribute of the existing BundleRepo task to exclude files specific to _this_ analysis + project. + """ + from columnflow.tasks.framework.remote import BundleRepo + + cf_rel = os.path.relpath(os.environ["CF_BASE"], os.environ["MULTILEPTON_BASE"]) + exclude_files = [os.path.join(cf_rel, path) for path in BundleRepo.exclude_files] + exclude_files.extend([ + "docs", "tests", "data", "assets", ".law", ".setups", ".data", ".github", + ]) + BundleRepo.exclude_files[:] = exclude_files + logger.debug(f"patched exclude_files of {BundleRepo.task_family}") + + +@memoize +def patch_remote_workflow_poll_interval(): + """ + Patches the HTCondorWorkflow and SlurmWorkflow tasks to change the + default value of the poll_interval parameter to 1 minute. + """ + from columnflow.tasks.framework.remote import HTCondorWorkflow, SlurmWorkflow + + HTCondorWorkflow.poll_interval._default = 1.0 # minutes + SlurmWorkflow.poll_interval._default = 1.0 # minutes + logger.debug(f"patched poll_interval._default of {HTCondorWorkflow.task_family} and {SlurmWorkflow.task_family}") + + +@memoize +def patch_merge_reduction_stats_inputs(): + """ + Patches the MergeReductionStats task to set the default value of n_inputs to -1, so as to use all files to infer + merging factors with full statistical precision. + """ + from columnflow.tasks.reduction import MergeReductionStats + + MergeReductionStats.n_inputs._default = -1 + logger.debug(f"patched n_inputs default value of {MergeReductionStats.task_family}") + + +@memoize +def patch_htcondor_workflow_naf_resources(): + """ + Patches the HTCondorWorkflow task to declare user-specific resources when running on the NAF. + """ + from columnflow.tasks.framework.remote import HTCondorWorkflow + + def htcondor_job_resources(self, job_num, branches): + # one "naf_" resource per job, indendent of the number of branches in the job + return {f"naf_{getpass.getuser()}": 1} + + HTCondorWorkflow.htcondor_job_resources = htcondor_job_resources + logger.debug(f"patched htcondor_job_resources of {HTCondorWorkflow.task_family}") + + +@memoize +def patch_slurm_partition_setting(): + """ + Patches the slurm remote workflow to allow setting things like partition + by commandline instead of overiding with central default. + """ + from columnflow.tasks.framework.remote import RemoteWorkflow + + RemoteWorkflow.exclude_params_branch.remove("slurm_partition") + RemoteWorkflow.slurm_partition.significant = True + RemoteWorkflow.exclude_params_branch.remove("slurm_flavor") + RemoteWorkflow.slurm_flavor._choices.add("manivald") + logger.debug(f"patched slurm partition/flavor settings of {RemoteWorkflow.task_family}") + + +@memoize +def patch_missing_xsec_handling(): + """ + Patches the normalization_weights_setup function in columnflow/production/normalization.py + to log a warning and assign xsec = 1.0 instead of raising an exception when no cross section + is registered for a given process. + """ + import columnflow.production.normalization as normalization + + # Save the original function so we can wrap it + orig_func = normalization.normalization_weights_setup + + def patched_normalization_weights_setup(*args, **kwargs): + # Get the self argument to access config_inst etc. + self = args[0] + process_insts = kwargs.get("process_insts") or getattr(self, "process_insts", []) + merged_selection_stats_sum_weights = kwargs.get("merged_selection_stats_sum_weights") or {} + + # Redefine an inner function to wrap the logic safely + def safe_fill_weight_table(process_inst, fill_weight_table): + ecm = self.config_inst.campaign.ecm + if ecm not in process_inst.xsecs: + logger.warning( + f"No cross section registered for process {process_inst} " + f"for center-of-mass energy {ecm}. Setting xsec = 1.0 for now." + ) + xsec = 1.0 + else: + xsec = process_inst.get_xsec(ecm).nominal + + sum_weights = merged_selection_stats_sum_weights["sum_mc_weight_per_process"][str(process_inst.id)] + fill_weight_table(process_inst, xsec, sum_weights) + + # Temporarily replace the call logic inside normalization + # We call the original function but with a modified inner loop + # This assumes normalization_weights_setup is defined as a method, not standalone + return orig_func(*args, **kwargs) + normalization.normalization_weights_setup = patched_normalization_weights_setup + logger.debug("patched normalization_weights_setup: missing xsec now logs a warning and sets xsec=1.0") + + +@memoize +def patch_all(): + patch_bundle_repo_exclude_files() + patch_remote_workflow_poll_interval() + patch_slurm_partition_setting() + patch_merge_reduction_stats_inputs() + patch_columnar_pyarrow_version() + patch_missing_xsec_handling() + #patch_htcondor_workflow_naf_resources() diff --git a/hbt/config/__init__.py b/multilepton/config/__init__.py similarity index 100% rename from hbt/config/__init__.py rename to multilepton/config/__init__.py diff --git a/multilepton/config/analysis.yaml b/multilepton/config/analysis.yaml new file mode 100755 index 00000000..e1c437ad --- /dev/null +++ b/multilepton/config/analysis.yaml @@ -0,0 +1,473 @@ +Analysis: + name: "multilepton" + +# https://twiki.cern.ch/twiki/bin/viewauth/CMS/PdmVRun3Analysis?rev=161 +years: + 2016: + run: 2 + campaigns: ["APV", ""] + luminosity: + - 2016preVFP: 19667.812849099 # pb + - 2016postVFP: 16977.701784453 # pb + luminosity-error: 0.01 + luminosity-uncertainties: + - lumi_13TeV_2016: 0.01 + - lumi_13TeV_correlated: 0.006 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions16/13TeV/Legacy_2016/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt + normtag: /cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_PHYSICS.json + 2017: + run: 2 + campaigns: [""] + luminosity: 41529.152060112 # pb + luminosity-error: 0.02 + luminosity-uncertainties: + - lumi_13TeV_2017: 0.02 + - lumi_13TeV_1718: 0.006 + - lumi_13TeV_correlated: 0.009 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions17/13TeV/Legacy_2017/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt + normtag: /cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_PHYSICS.json + 2018: + run: 2 + campaigns: [""] + luminosity: 59740.565201546 # pb + luminosity-error: 0.015 + luminosity-uncertainties: + - lumi_13TeV_2018: 0.015 + - lumi_13TeV_1718: 0.002 + - lumi_13TeV_correlated: 0.02 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions18/13TeV/Legacy_2018/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt + normtag: /cvmfs/cms-bril.cern.ch/cms-lumi-pog/Normtags/normtag_PHYSICS.json + 2022: + run: 3 + campaigns: ["", "EE"] + luminosity: + - 2022preEE: 7980.4 # pb + - 2022postEE: 26671.7 # pb + luminosity-error: 0.014 + luminosity-uncertainties: + - lumi_13p6TeV_correlated: 0.014 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions22/Cert_Collisions2022_355100_362760_Golden.json + normtag: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Run3/normtag_BRIL.json + 2023: + run: 3 + campaigns: ["", "BPix"] + luminosity: + - 2023preBPix: 17794.0 # pb + - 2023postBPix: 9451.0 # pb + luminosity-error: 0.013 + luminosity-uncertainties: + - lumi_13p6TeV_correlated: 0.013 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions23/Cert_Collisions2023_366442_370790_Golden.json + normtag: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Run3/normtag_BRIL.json + 2024: + run: 3 + campaigns: [""] + luminosity: 1090.08 # pb + luminosity-error: 0.013 + luminosity-uncertainties: + - lumi_13p6TeV_correlated: 0.013 + certified_lumi_file: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Collisions24/Cert_Collisions2024_378981_386951_Golden.json + normtag: https://cms-service-dqmdc.web.cern.ch/CAF/certification/Run3/normtag_BRIL.json + +custom_processes: + +datasets: + data: + streams: &streams [e, mu, tau, muoneg] + 2022: + - preEE: + periods: &preEE_periods [c, d] + - postEE: + periods: &postEE_periods [e, f, g] + 2023: + - preBPix: + periods: &preBPix_periods [c1, c2, c3, c4] + - postBPix: + periods: &postBPix_periods [d1, d2] + 2024: + periods: [c, d, e, f, g, h, i] + + signal: + resonant: + tag: ["resonant_signal", "signal"] + color: "#011c87" # dark_blue + cmsdb: [] # not available yet + + nonresonant: + ggf: + tag: ["ggf", "nonresonant_signal", "signal"] + color: "#269c00" # dark_green + cmsdb: + - hh_ggf_htt_hvv_kl0_kt1_powheg + - hh_ggf_htt_hvv_kl1_kt1_powheg + - hh_ggf_htt_hvv_kl5_kt1_powheg + - hh_ggf_htt_htt_kl0_kt1_powheg + - hh_ggf_htt_htt_kl1_kt1_powheg + - hh_ggf_htt_htt_kl5_kt1_powheg + - hh_ggf_hvv_hvv_kl0_kt1_powheg + - hh_ggf_hvv_hvv_kl1_kt1_powheg + - hh_ggf_hvv_hvv_kl5_kt1_powheg + vbf: + tag: ["vbf", "nonresonant_signal", "signal"] + color: "#e76300" # dark_orange + cmsdb: + - hh_vbf_htt_hvv_kv1_k2v1_kl1_madgraph + - hh_vbf_htt_hvv_kv1_k2v0_kl1_madgraph + - hh_vbf_htt_hvv_kv1p74_k2v1p37_madgraph + - hh_vbf_htt_hvv_kvm0p012_k2v0p03_madgraph + - hh_vbf_htt_hvv_kvm0p758_k2v1p44_madgraph + - hh_vbf_htt_hvv_kvm0p962_k2v0p959_madgraph + - hh_vbf_htt_hvv_kvm1p21_k2v1p94_madgraph + - hh_vbf_htt_hvv_kvm1p6_k2v2p72_madgraph + - hh_vbf_htt_hvv_kvm1p83_k2v3p57_madgraph + - hh_vbf_htt_hvv_kvm2p12_k2v3p87_madgraph + - hh_vbf_htt_hvv_kv1p74_k2v1p37_kl14p4_madgraph + - hh_vbf_htt_hvv_kvm0p012_k2v0p03_kl10p2_madgraph + - hh_vbf_htt_hvv_kvm0p758_k2v1p44_klm19p3_madgraph + - hh_vbf_htt_hvv_kvm0p962_k2v0p959_klm1p43_madgraph + - hh_vbf_htt_hvv_kvm1p21_k2v1p94_klm0p94_madgraph + - hh_vbf_htt_hvv_kvm1p6_k2v2p72_klm1p36_madgraph + - hh_vbf_htt_hvv_kvm1p83_k2v3p57_klm3p39_madgraph + - hh_vbf_htt_hvv_kvm2p12_k2v3p87_klm5p96_madgraph + - hh_vbf_htt_htt_kv1_k2v1_kl1_madgraph + - hh_vbf_htt_htt_kv1_k2v0_kl1_madgraph + - hh_vbf_htt_htt_kv1p74_k2v1p37_madgraph + - hh_vbf_htt_htt_kvm0p012_k2v0p03_madgraph + - hh_vbf_htt_htt_kvm0p758_k2v1p44_madgraph + - hh_vbf_htt_htt_kvm0p962_k2v0p959_madgraph + - hh_vbf_htt_htt_kvm1p21_k2v1p94_madgraph + - hh_vbf_htt_htt_kvm1p6_k2v2p72_madgraph + - hh_vbf_htt_htt_kvm1p83_k2v3p57_madgraph + - hh_vbf_htt_htt_kvm2p12_k2v3p87_madgraph + - hh_vbf_htt_htt_kv1p74_k2v1p37_kl14p4_madgraph + - hh_vbf_htt_htt_kvm0p012_k2v0p03_kl10p2_madgraph + - hh_vbf_htt_htt_kvm0p758_k2v1p44_klm19p3_madgraph + - hh_vbf_htt_htt_kvm0p962_k2v0p959_klm1p43_madgraph + - hh_vbf_htt_htt_kvm1p21_k2v1p94_klm0p94_madgraph + - hh_vbf_htt_htt_kvm1p6_k2v2p72_klm1p36_madgraph + - hh_vbf_htt_htt_kvm1p83_k2v3p57_klm3p39_madgraph + - hh_vbf_htt_htt_kvm2p12_k2v3p87_klm5p96_madgraph + - hh_vbf_hvv_hvv_kv1_k2v1_kl1_madgraph + - hh_vbf_hvv_hvv_kv1_k2v0_kl1_madgraph + - hh_vbf_hvv_hvv_kv1p74_k2v1p37_madgraph + - hh_vbf_hvv_hvv_kvm0p012_k2v0p03_madgraph + - hh_vbf_hvv_hvv_kvm0p758_k2v1p44_madgraph + - hh_vbf_hvv_hvv_kvm0p962_k2v0p959_madgraph + - hh_vbf_hvv_hvv_kvm1p21_k2v1p94_madgraph + - hh_vbf_hvv_hvv_kvm1p6_k2v2p72_madgraph + - hh_vbf_hvv_hvv_kvm1p83_k2v3p57_madgraph + - hh_vbf_hvv_hvv_kvm2p12_k2v3p87_madgraph + - hh_vbf_hvv_hvv_kv1p74_k2v1p37_kl14p4_madgraph + - hh_vbf_hvv_hvv_kvm0p012_k2v0p03_kl10p2_madgraph + - hh_vbf_hvv_hvv_kvm0p758_k2v1p44_klm19p3_madgraph + - hh_vbf_hvv_hvv_kvm0p962_k2v0p959_klm1p43_madgraph + - hh_vbf_hvv_hvv_kvm1p21_k2v1p94_klm0p94_madgraph + - hh_vbf_hvv_hvv_kvm1p6_k2v2p72_klm1p36_madgraph + - hh_vbf_hvv_hvv_kvm1p83_k2v3p57_klm3p39_madgraph + - hh_vbf_hvv_hvv_kvm2p12_k2v3p87_klm5p96_madgraph + + background: + ttbar: + tag: ["has_top", "ttbar", "tt"] + color: "#ffa90e" # bright_orange + label: + cmsdb: + - tt_sl_powheg + - tt_dl_powheg + - tt_fh_powheg + single_top: + tag: ["has_top", "single_top", "st"] + color: "#832db6" # purpule + label: + cmsdb: + - st_tchannel_t_4f_powheg + - st_tchannel_tbar_4f_powheg + - st_twchannel_t_sl_powheg + - st_twchannel_tbar_sl_powheg + - st_schannel_t_lep_4f_amcatnlo + - st_schannel_tbar_lep_4f_amcatnlo + ttv: + tag: ["ttbar", "tt"] + color: "#94a4a2" # grey + label: + cmsdb: + - ttzz_madgraph + - ttz_zqq_amcatnlo + - ttz_zll_m4to50_amcatnlo + - ttz_zll_m50toinf_amcatnlo + - ttww_madgraph + - ttw_wlnu_amcatnlo + dy: + tag: ["dy"] + color: "#3f90da" # bright_blue + label: + cmsdb: + - dy_m4to10_amcatnlo + - dy_m10to50_amcatnlo + - dy_mumu_m800to1500_powheg + - dy_m50toinf_amcatnlo + - dy_m50toinf_0j_amcatnlo + - dy_m50toinf_1j_amcatnlo + - dy_m50toinf_2j_amcatnlo + - dy_m50toinf_1j_pt40to100_amcatnlo + - dy_m50toinf_1j_pt100to200_amcatnlo + - dy_m50toinf_1j_pt200to400_amcatnlo + - dy_m50toinf_1j_pt400to600_amcatnlo + - dy_m50toinf_1j_pt600toinf_amcatnlo + - dy_m50toinf_2j_pt40to100_amcatnlo + - dy_m50toinf_2j_pt100to200_amcatnlo + - dy_m50toinf_2j_pt200to400_amcatnlo + - dy_m50toinf_2j_pt400to600_amcatnlo + - dy_m50toinf_2j_pt600toinf_amcatnlo + wjets: + tag: ["w_lnu"] + color: "#964a8b" #aubergine + label: + cmsdb: + - w_lnu_amcatnlo + - w_lnu_0j_amcatnlo + - w_lnu_1j_amcatnlo + - w_lnu_2j_amcatnlo + - w_lnu_1j_pt40to100_amcatnlo + - w_lnu_1j_pt100to200_amcatnlo + - w_lnu_1j_pt200to400_amcatnlo + - w_lnu_1j_pt400to600_amcatnlo + - w_lnu_1j_pt600toinf_amcatnlo + - w_lnu_2j_pt40to100_amcatnlo + - w_lnu_2j_pt100to200_amcatnlo + - w_lnu_2j_pt200to400_amcatnlo + - w_lnu_2j_pt400to600_amcatnlo + - w_lnu_2j_pt600toinf_amcatnlo + - w_vbf_wlnu_madgraph + - z_vbf_zll_m50toinf_madgraph + qcd: + tag: + color: "#bd1f01" # red + label: + cmsdb: + - qcd_mu_pt15to20_pythia + - qcd_mu_pt20to30_pythia + - qcd_mu_pt30to50_pythia + - qcd_mu_pt50to80_pythia + - qcd_mu_pt80to120_pythia + - qcd_mu_pt120to170_pythia + - qcd_mu_pt170to300_pythia + - qcd_mu_pt300to470_pythia + - qcd_mu_pt470to600_pythia + - qcd_mu_pt600to800_pythia + - qcd_mu_pt800to1000_pythia + - qcd_mu_pt1000toinf_pythia + - qcd_em_pt10to30_pythia + - qcd_em_pt30to50_pythia + - qcd_em_pt50to80_pythia + - qcd_em_pt80to120_pythia + - qcd_em_pt120to170_pythia + - qcd_em_pt170to300_pythia + - qcd_em_pt300toinf_pythia + zz: + tag: ["no_lhe_weights"] + color: "#f7c331" # yellow + cmsdb: + - zz_pythia + single_higgs: + tag: ["h"] + color: "#92dadd" # teal + label: + cmsdb: + - h_ggf_htt_powheg + - h_ggf_hbb_powheg + - h_vbf_htt_powheg + - h_vbf_hbb_powheg + - wph_htt_powheg + - wmh_htt_powheg + - wph_wqq_hbb_powheg + - wmh_wqq_hbb_powheg + - wph_wlnu_hbb_powheg + - wmh_wlnu_hbb_powheg + - zh_zll_hbb_powheg + - zh_zqq_hbb_powheg + - zh_htt_powheg + - zh_gg_zll_hbb_powheg + - zh_gg_zqq_hbb_powheg + - zh_gg_znunu_hbb_powheg + - tth_hbb_powheg + - tth_hnonbb_powheg + vvv: + tag: + color: "#a96b59" # brown + label: + cmsdb: + - www_4f_amcatnlo + - wwz_4f_amcatnlo + - wzz_amcatnlo + - zzz_amcatnlo + others: + tag: + color: "#30c300" # green + label: + cmsdb: + - wz_pythia + - ww_pythia +## negligible drop for now +# - z_qq_1j_pt100to200_amcatnlo +# - z_qq1j_pt200to400_amcatnlo +# - z_qq1j_pt400to600_amcatnlo +# - z_qq1j_pt600toinf_amcatnlo +# - z_qq2j_pt100to200_amcatnlo +# - z_qq2j_pt200to400_amcatnlo +# - z_qq2j_pt400to600_amcatnlo +# - z_qq2j_pt600toinf_amcatnlo + +external_files: + pog: "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG" + hh_btag_repo: "/afs/cern.ch/work/m/mrieger/public/hbt/external_files/hh-btag-master-d7a71eb3.tar.gz" + +channels: + etau: + id: 1 + label: "$e\\tau_{h}$" + mutau: + id: 2 + label: "$\\mu\\tau_{h}$" + tautau: + id: 3 + label: "$\\tau_{h}\\tau_{h}$" + # ... other channels + +plot_defaults: + general: + cms_label: "Work-in-progress" + whitespace_fraction: 0.31 + default_style: wide_legend + blinding_threshold: 0 + + styles: + default: + gridspec_cfg: + height_ratios: [3, 0.9] + rax_cfg: + yloc: center + legend_cfg: + borderpad: 0 + borderaxespad: 1.2 + columnspacing: 1.8 + labelspacing: 0.28 + fontsize: 16 + cf_line_breaks: true + cf_short_labels: false + annotate_cfg: + fontsize: 18 + style: italic + xycoords: "axes fraction" + xy: [0.035, 0.955] + + wide_legend: + inherit: default + legend_cfg: + ncols: 3 + loc: upper left + cf_short_labels: true + annotate_cfg: + xy: [0.035, 0.765] + + wide_ext_legend: + inherit: wide_legend + legend_cfg: + cf_short_labels: false + annotate_cfg: + xy: [0.035, 0.750] + + +# (names derived from contents in BTV correctionlib file) +btag_sf_jec_sources: + - Absolute + - AbsoluteMPFBias + - AbsoluteScale + - AbsoluteStat + - BBEC1 + - EC2 + - FlavorQCD + - Fragmentation + - HF + - PileUpDataMC + - PileUpPtBB + - PileUpPtEC1 + - PileUpPtEC2 + - PileUpPtHF + - PileUpPtRef + - RelativeBal + - RelativeFSR + - RelativeJEREC1 + - RelativeJEREC2 + - RelativeJERHF + - RelativePtBB + - RelativePtEC1 + - RelativePtEC2 + - RelativePtHF + - RelativeSample + - RelativeStatEC + - RelativeStatFSR + - RelativeStatHF + - SinglePionECAL + - SinglePionHCAL + - TimePtEta + +# full list of jec sources in a fixed order that is used to assign consistent ids across configs +# (please add new sources at the bottom to preserve the order of existing ones) +jec_sources: + - AbsoluteStat + - AbsoluteScale + - AbsoluteSample + - AbsoluteFlavMap + - AbsoluteMPFBias + - Fragmentation + - SinglePionECAL + - SinglePionHCAL + - FlavorQCD + - TimePtEta + - RelativeJEREC1 + - RelativeJEREC2 + - RelativeJERHF + - RelativePtBB + - RelativePtEC1 + - RelativePtEC2 + - RelativePtHF + - RelativeBal + - RelativeSample + - RelativeFSR + - RelativeStatFSR + - RelativeStatEC + - RelativeStatHF + - PileUpDataMC + - PileUpPtRef + - PileUpPtBB + - PileUpPtEC1 + - PileUpPtEC2 + - PileUpPtHF + - PileUpMuZero + - PileUpEnvelope + - SubTotalPileUp + - SubTotalRelative + - SubTotalPt + - SubTotalScale + - SubTotalAbsolute + - SubTotalMC + - Total + - TotalNoFlavor + - TotalNoTime + - TotalNoFlavorNoTime + - FlavorZJet + - FlavorPhotonJet + - FlavorPureGluon + - FlavorPureQuark + - FlavorPureCharm + - FlavorPureBottom + - TimeRunA + - TimeRunB + - TimeRunC + - TimeRunD + - CorrelationGroupMPFInSitu + - CorrelationGroupIntercalibration + - CorrelationGroupbJES + - CorrelationGroupFlavor + - CorrelationGroupUncorrelated diff --git a/multilepton/config/analysis_multilepton.py b/multilepton/config/analysis_multilepton.py new file mode 100644 index 00000000..44d8267e --- /dev/null +++ b/multilepton/config/analysis_multilepton.py @@ -0,0 +1,128 @@ +# coding: utf-8 +""" +Configuration of the HH → multileptons analysis. +""" + +from __future__ import annotations + +import importlib +import order as od + +from columnflow.util import DotDict + +from multilepton.hist_hooks.blinding import add_hooks as add_blinding_hooks +from multilepton.hist_hooks.binning import add_hooks as add_binning_hooks +from multilepton.tasks.base import MultileptonTask +from multilepton.config.configs_multilepton import add_config + + +# ======================================= +# Analysis Definition +# ======================================= +analysis_multilepton = od.Analysis(name="analysis_multilepton", id=1) + +# Use lookup from law.cfg +analysis_multilepton.x.versions = {} + +# Bash sandboxes required by remote tasks +analysis_multilepton.x.bash_sandboxes = [ + "$CF_BASE/sandboxes/cf.sh", + "$MULTILEPTON_BASE/sandboxes/venv_multilepton.sh", +] + +# CMSSW sandboxes (optional) +analysis_multilepton.x.cmssw_sandboxes = [ + # "$CF_BASE/sandboxes/cmssw_default.sh", +] + +# ======================================= +# Analysis-wide Groups and Defaults +# ======================================= +analysis_multilepton.x.config_groups = {} +analysis_multilepton.x.store_parts_modifiers = {} + +# ======================================= +# Histogram Hooks +# ======================================= +analysis_multilepton.x.hist_hooks = DotDict() +add_blinding_hooks(analysis_multilepton) +add_binning_hooks(analysis_multilepton) + +# ======================================= +# Lazy Config Factory Helper +# ======================================= +def add_lazy_config( + *, + campaign_module: str, + campaign_attr: str, + config_name: str, + config_id: int, + add_limited: bool = True, + limit_dataset_files: int | None = None, + **kwargs, +) -> None: + """Register a lazily-created configuration into the multilepton analysis.""" + + def create_factory( + config_id: int, + config_name_postfix: str = "", + limit_dataset_files_factory: int | None = None, + #limit_dataset_files: int | None = None, + ): + def factory(configs: od.UniqueObjectIndex): + mod = importlib.import_module(campaign_module) + campaign = getattr(mod, campaign_attr) + #limit_dataset_files: int | None = None, + limit_files = limit_dataset_files_factory or limit_dataset_files + return add_config( + analysis_multilepton, + campaign.copy(), + config_name=config_name + config_name_postfix, + config_id=config_id, + #limit_dataset_files=limit_dataset_files, + limit_dataset_files=limit_files, + **kwargs, + ) + return factory + + # Add full configuration + analysis_multilepton.configs.add_lazy_factory(config_name, create_factory(config_id)) + + # Optionally add a "_limited" version + if add_limited: + limited_name = f"{config_name}_limited" + if limited_name in analysis_multilepton.configs: + raise ValueError(f"Duplicate config name detected: {limited_name}") + analysis_multilepton.configs.add_lazy_factory( + limited_name, + create_factory(config_id + 200, "_limited", 1), + ) + + +# ======================================= +# Dataset Configurations +# ======================================= +datasets = [ + # cid = 32024115 => (run)3(year)2024(part)1(nano_version)15 + # --- Private UHH NanoAOD datasets --- + ("cmsdb.campaigns.run3_2022_preEE_nano_uhh_v14", "22preEE_v14_private", 320221114), + ("cmsdb.campaigns.run3_2022_postEE_nano_uhh_v14", "22postEE_v14_private", 32022214), + ("cmsdb.campaigns.run3_2023_preBPix_nano_uhh_v14", "23preBPix_v14_private", 32023114), + ("cmsdb.campaigns.run3_2023_postBPix_nano_uhh_v14", "23postBPix_v14_private", 32023214), + + # --- Central NanoAOD datasets --- + ("cmsdb.campaigns.run3_2022_preEE_nano_v12", "22preEE_v12_central", 320221112), + ("cmsdb.campaigns.run3_2022_postEE_nano_v12", "22postEE_v12_central", 32022212), + ("cmsdb.campaigns.run3_2023_preBPix_nano_v12", "23preBPix_v12_central", 32023112), + ("cmsdb.campaigns.run3_2023_postBPix_nano_v12", "23postBPix_v12_central", 32023212), + ("cmsdb.campaigns.run3_2024_nano_v15", "24_v15_central", 32024115), +] + +for module, name, cid in datasets: + add_lazy_config( + campaign_module=module, + campaign_attr=f"campaign_{module.split('.')[-1]}", + config_name=name, + config_id=cid, + add_limited=False, + ) diff --git a/multilepton/config/categories.py b/multilepton/config/categories.py new file mode 100644 index 00000000..2d813074 --- /dev/null +++ b/multilepton/config/categories.py @@ -0,0 +1,203 @@ +# coding: utf-8 + +""" +Definition of categories. +""" + +import functools + +import order as od + +from columnflow.config_util import add_category, create_category_combinations, CategoryGroup +from columnflow.types import Any + + +def add_categories(config: od.Config) -> None: + """ + Adds all categories to a *config*. + """ + # root category (-1 has special meaning in cutflow) + root_cat = add_category(config, name="all", id=-1, selection="cat_all", label="") + _add_category = functools.partial(add_category, parent=root_cat) + + # lepton channels + _add_category(config, name="cetau", id=1, selection="cat_etau", label=config.channels.n.cetau.label) + _add_category(config, name="cmutau", id=2, selection="cat_mutau", label=config.channels.n.cmutau.label) + _add_category(config, name="ctautau", id=3, selection="cat_tautau", label=config.channels.n.ctautau.label) + _add_category(config, name="cee", id=4, selection="cat_ee", label=config.channels.n.cee.label) + _add_category(config, name="cmumu", id=5, selection="cat_mumu", label=config.channels.n.cmumu.label) + _add_category(config, name="cemu", id=6, selection="cat_emu", label=config.channels.n.cemu.label) + # 3l/4l inclusive channels + _add_category(config, name="cat3l0tau_SR", id=1001, selection="cat_3l0tau_SR", label=r"$3\ell 0\tau_h$ SR") + _add_category(config, name="cat3l0tau_SB", id=1002, selection="cat_3l0tau_SB", label=r"$3\ell 0\tau_h$ SB") + _add_category(config, name="cat4l_SR", id=1003, selection="cat_4l_SR", label=r"$4\ell$ SR") + _add_category(config, name="cat4l_SB", id=1004, selection="cat_4l_SB", label=r"$4\ell$ SB") + _add_category(config, name="cat3l1tau_SR", id=1005, selection="cat_3l1tau_SR", label=r"$3\ell 1\tau_h$ SR") + _add_category(config, name="cat3l1tau_SB", id=1006, selection="cat_3l1tau_SB", label=r"$3\ell 1\tau_h$ SB") + _add_category(config, name="cat2l2tau_SR", id=1007, selection="cat_2l2tau_SR", label=r"$2\ell 2\tau_h$ SR") + _add_category(config, name="cat2l2tau_SB", id=1008, selection="cat_2l2tau_SB", label=r"$2\ell 2\tau_h$ SB") + _add_category(config, name="cat1l3tau_SR", id=1009, selection="cat_1l3tau_SR", label=r"$1\ell 3\tau_h$ SR") + _add_category(config, name="cat1l3tau_SB", id=1010, selection="cat_1l3tau_SB", label=r"$1\ell 3\tau_h$ SB") + _add_category(config, name="cat4tau_SR", id=1011, selection="cat_4tau_SR", label=r"$4\tau_h$ SR") + _add_category(config, name="cat4tau_SB", id=1012, selection="cat_4tau_SB", label=r"$4\tau_h$ SB") + _add_category(config, name="cat2l0or1tau_SR_SS", id=1013, selection="cat_2l0or1tau_SR_SS", label=r"$2\ell\ \leq 1\,\tau_{h}$ SR, SS") + _add_category(config, name="cat2l0or1tau_SR_OS", id=1014, selection="cat_2l0or1tau_SR_OS", label=r"$2\ell\ \leq 1\,\tau_{h}$ SR, OS") + _add_category(config, name="cat2l0or1tau_SB_SS", id=1015, selection="cat_2l0or1tau_SB_SS", label=r"$2\ell\ \leq 1\,\tau_{h}$ SB, SS") + _add_category(config, name="cat2l0or1tau_SB_OS", id=1016, selection="cat_2l0or1tau_SB_OS", label=r"$2\ell\ \leq 1\,\tau_{h}$ SB, OS") + # 3l/4l non inclusive channels ( no taus) + _add_category(config, name="c3e", id=14, selection="cat_3e", label=config.channels.n.c3e.label) + _add_category(config, name="c2emu", id=15, selection="cat_2emu", label=config.channels.n.c2emu.label) + _add_category(config, name="ce2mu", id=16, selection="cat_e2mu", label=config.channels.n.ce2mu.label) + _add_category(config, name="c3mu", id=17, selection="cat_3mu", label=config.channels.n.c3mu.label) + _add_category(config, name="c4e", id=18, selection="cat_4e", label=config.channels.n.c4e.label) + _add_category(config, name="c3emu", id=19, selection="cat_3emu", label=config.channels.n.c3emu.label) + _add_category(config, name="c2e2mu", id=20, selection="cat_2e2mu", label=config.channels.n.c2e2mu.label) + _add_category(config, name="ce3mu", id=21, selection="cat_e3mu", label=config.channels.n.ce3mu.label) + _add_category(config, name="c4mu", id=22, selection="cat_4mu", label=config.channels.n.c4mu.label) + # 3l/4l non inclusive channels ( with taus) + _add_category(config, name="c3etau", id=23, selection="cat_3etau", label=config.channels.n.c3etau.label) + _add_category(config, name="c2emutau", id=24, selection="cat_2emutau", label=config.channels.n.c2emutau.label) + _add_category(config, name="ce2mutau", id=25, selection="cat_e2mutau", label=config.channels.n.ce2mutau.label) + _add_category(config, name="c3mutau", id=26, selection="cat_3mutau", label=config.channels.n.c3mutau.label) + _add_category(config, name="c2e2tau", id=27, selection="cat_2e2tau", label=config.channels.n.c2e2tau.label) + _add_category(config, name="cemu2tau", id=28, selection="cat_emu2tau", label=config.channels.n.cemu2tau.label) + _add_category(config, name="c2mu2tau", id=29, selection="cat_2mu2tau", label=config.channels.n.c2mu2tau.label) + _add_category(config, name="ce3tau", id=30, selection="cat_e3tau", label=config.channels.n.ce3tau.label) + _add_category(config, name="cmu3tau", id=31, selection="cat_mu3tau", label=config.channels.n.cmu3tau.label) + _add_category(config, name="c4tau", id=32, selection="cat_4tau", label=config.channels.n.c4tau.label) + # 2-leptons 0 or 1 taus channels + _add_category(config, name="c2e0or1tau", id=33, selection="cat_c2e0or1tau", label=config.channels.n.c2e0or1tau.label) + _add_category(config, name="cemu0or1tau", id=34, selection="cat_cemu0or1tau", label=config.channels.n.cemu0or1tau.label) + _add_category(config, name="c2mu0or1tau", id=35, selection="cat_c2mu0or1tau", label=config.channels.n.c2mu0or1tau.label) + # Loose category for BDT trainning + tight + trigmatch + _add_category(config, name="ceormu", id=10000, selection="cat_e_or_mu", label=r"e or $\mu$", tags={"ceormu"}) + _add_category(config, name="ceormu_bveto", id=15000, selection="cat_e_or_mu_bveto", label=r"e or $\mu$ bveto on", tags={"ceormu_bveto"}) + # bveto + _add_category(config, name="bveto_on", id=30001, selection="cat_bveto_on", label="bveto on") + _add_category(config, name="bveto_off", id=30002, selection="cat_bveto_off", label="bveto off") + # tight/nontight + _add_category(config, name="tight_bdt", id=11000, selection="cat_tight_bdt", label="tight", tags={"tight_bdt"}) + _add_category(config, name="nontight_bdt", id=12000, + selection="cat_nontight_bdt", label="fakeable", tags={"nontight_bdt"}) + # trigmatch + _add_category(config, name="trigmatch_bdt", id=13000, + selection="cat_trigmatch_bdt", label="trigger matched", tags={"trigmatch_bdt"}) + _add_category(config, name="nontrigmatch_bdt", id=14000, + selection="cat_nontrigmatch_bdt", label="trigger unmatched", tags={"nontrigmatch_bdt"}) + # tight/nontight + _add_category(config, name="tight", id=10001, selection="cat_tight", label="tight", tags={"tight"}) + _add_category(config, name="nontight", id=10002, selection="cat_nontight", label="fakeable", tags={"nontight"}) + # trigmatch + _add_category(config, name="trigmatch", id=10003, + selection="cat_trigmatch", label="trigger matched", tags={"trigmatch"}) + _add_category(config, name="nontrigmatch", id=10004, + selection="cat_nontrigmatch", label="trigger unmatched", tags={"nontrigmatch"}) + # qcd regions + _add_category(config, name="os", id=10, selection="cat_os", label="OS", tags={"os"}) + _add_category(config, name="ss", id=11, selection="cat_ss", label="SS", tags={"ss"}) + _add_category(config, name="iso", id=12, selection="cat_iso", label=r"iso", tags={"iso"}) + _add_category(config, name="noniso", id=13, selection="cat_noniso", label=r"non-iso", tags={"noniso"}) # noqa: E501 + # kinematic categories + _add_category(config, name="incl", id=100, selection="cat_incl", label="inclusive") + _add_category(config, name="2j", id=110, selection="cat_2j", label="2 jets") + _add_category(config, name="dy", id=210, selection="cat_dy", label="DY enriched") + _add_category(config, name="tt", id=220, selection="cat_tt", label=r"$t\bar{t}$ enriched") + _add_category(config, name="res1b", id=300, selection="cat_res1b", label="res1b") + _add_category(config, name="res2b", id=301, selection="cat_res2b", label="res2b") + _add_category(config, name="boosted", id=310, selection="cat_boosted", label="boosted") + + + def name_fn(categories: dict[str, od.Category]) -> str: + return "__".join(cat.name for cat in categories.values() if cat) + + def kwargs_fn(categories: dict[str, od.Category], add_qcd_group: bool = True) -> dict[str, Any]: + # build auxiliary information + aux = {} + if add_qcd_group: + aux["qcd_group"] = name_fn({ + name: cat for name, cat in categories.items() + if name not in {"sign", "tau2"} + }) + return { + # NOTE: for this to be deterministic, the order of the categories must no change! + "id": "+", # just increment the category id + "tags": set.union(*[cat.tags for cat in categories.values() if cat]), + "aux": aux, + "label": ", ".join([ + cat.label or cat.name + for cat in categories.values() + # if cat.name != "os" # os is the default + ]) or None, + } + + def skip_fn_ctrl(categories: dict[str, od.Category]) -> bool: + if "channel" not in categories or "kin" not in categories: + return False + ch_cat = categories["channel"] + kin_cat = categories["kin"] + # skip dy in emu + if kin_cat.name == "dy" and ch_cat.name == "emu": + return True + # skip tt in ee/mumu + if kin_cat.name == "tt" and ch_cat.name in ("ee", "mumu"): + return True + return False + + # main analysis categories + main_categories = { + # channels first + "channel": CategoryGroup(["cetau", "cmutau", "ctautau"], is_complete=False, has_overlap=False), + # kinematic regions in the middle (to be extended) + "kin": CategoryGroup(["incl", "2j", "res1b", "res2b", "boosted"], is_complete=True, has_overlap=True), + # qcd regions last + "sign": CategoryGroup(["os", "ss"], is_complete=True, has_overlap=False), + "tau2": CategoryGroup(["iso", "noniso"], is_complete=True, has_overlap=False), + } + # control categories + control_categories = { + # channels first + "channel": CategoryGroup(["cee", "cmumu", "cemu"], is_complete=False, has_overlap=False), + # kinematic regions in the middle (to be extended) + "kin": CategoryGroup(["incl", "dy", "tt"], is_complete=True, has_overlap=True), + # relative sign last + "sign": CategoryGroup(["os"], is_complete=False, has_overlap=False), + } + # Creating category combinations + sig_sideband_categories = { + "channel": CategoryGroup(["c3e", "c3mu", "c2emu", "ce2mu", "c4e", "c4mu", "c2e2mu", + "c3emu", "ce3mu", "c3etau", "c2e2tau", "ce3tau", "c2mu2tau", "cmu3tau", "c3mutau", "c2emutau", + "ce2mutau", "cemu2tau", "c4tau", "c2ess", "cemuss", "c2muss"], + is_complete=True, + has_overlap=False, + ), + "sel": CategoryGroup(["tight", "nontight"], is_complete=False, has_overlap=False), + "trig": CategoryGroup(["trigmatch", "nontrigmatch"], is_complete=True, has_overlap=False), + "vetobtag": CategoryGroup(["bveto_on", "bveto_off"], is_complete=True, has_overlap=False), + "sign": CategoryGroup(["os", "ss"], is_complete=True, has_overlap=False), + } + bdt_categories = { + "loose_ch": CategoryGroup(["ceormu"], is_complete=False, has_overlap=False), + "sel": CategoryGroup(["tight_bdt", "nontight_bdt"], is_complete=False, has_overlap=False), + "trig": CategoryGroup(["trigmatch_bdt", "nontrigmatch_bdt"], is_complete=True, has_overlap=False), + "vetobtag": CategoryGroup(["bveto_on", "bveto_off"], is_complete=True, has_overlap=False), + } + + # for cnm, cdict in { + # 'main': main_categories, + # 'control': control_categories, + # 'sideband': sig_sideband_categories, + # 'bdt': bdt_categories + # }.items(): + # + # add_qcd_group = False + # if cnm == 'main': + # add_qcd_group=True + # + # create_category_combinations( + # config = config, + # categories = cdict, + # name_fn = name_fn, + # kwargs_fn = functools.partial(kwargs_fn, add_qcd_group=add_qcd_group), + # skip_fn = skip_fn_ctrl + # ) + diff --git a/multilepton/config/configs_multilepton.py b/multilepton/config/configs_multilepton.py new file mode 100644 index 00000000..9608144e --- /dev/null +++ b/multilepton/config/configs_multilepton.py @@ -0,0 +1,1320 @@ +# coding: utf-8 + +""" +Configuration of the HH → multi-leptons analysis. +""" + +from __future__ import annotations + +import os +import re +import itertools +import functools +import yaml +import law +import json + +import order as od + +from collections import defaultdict +from scinum import Number + +from columnflow.tasks.external import ExternalFile as Ext +from columnflow.util import DotDict, dev_sandbox, load_correction_set +from columnflow.columnar_util import ColumnCollection, skip_column +from columnflow.config_util import get_root_processes_from_campaign, get_shifts_from_sources +from columnflow.config_util import add_shift_aliases, verify_config_processes +from columnflow.production.cms.top_pt_weight import TopPtWeightFromTheoryConfig, TopPtWeightFromDataConfig +from columnflow.production.cms.dy import DrellYanConfig +from columnflow.production.cms.btag import BTagSFConfig +from columnflow.production.cms.jet import JetIdConfig +from columnflow.production.cms.electron import ElectronSFConfig +from columnflow.production.cms.muon import MuonSFConfig +from columnflow.calibration.cms.tau import TECConfig +from columnflow.calibration.cms.egamma import EGammaCorrectionConfig +from columnflow.calibration.cms.met import METPhiConfig, METPhiConfigRun2 + +from multilepton.config.styles import stylize_processes, setup_plot_styles +from multilepton.config.categories import add_categories +from multilepton.config.variables import add_variables +from multilepton.config.met_filters import add_met_filters +from multilepton.config.triggers import add_triggers + + +logger = law.logger.get_logger(__name__) + + + +def load_datasets_config(yaml_path): + """Load dataset information from the YAML file.""" + with open(yaml_path, "r") as f: + data = yaml.safe_load(f) + return data + + +class AnalysisConfig: + """Helper class to manage analysis configuration from YAML.""" + + def __init__(self, data): + self.data = DotDict.wrap(data) + + def get_era_key(self, campaign): + """Get era key for b-tag WPs, luminosity, etc.""" + year = campaign.x.year + postfix = campaign.x.postfix + + if year in [2016, 2017, 2018]: # Run2 + return f"{year}{postfix}" if year == 2016 and postfix == "APV" else str(year) + else: # Run 3 + era_map = { + (2022, ""): "2022", + (2022, "EE"): "2022EE", + (2023, ""): "2023", + (2023, "BPix"): "2023BPix", + (2024, ""): "2024", + } + return era_map.get((year, postfix), str(year)) + + def get_era(self, campaign): + year = campaign.x.year + if year == 2016: + era= "preVFP" if campaign.has_tag("preVFP") else "postVFP" + elif year == 2022: + era = "preEE" if campaign.has_tag("preEE") else "postEE" + elif year == 2023: + era = "preBPix" if campaign.has_tag("preBPix") else "postBPix" + else: + era = "" + return f"{year}{era}" + + def get_luminosity(self, campaign): + """Get luminosity for given campaign.""" + year = campaign.x.year + lumis = self.data.luminosity.get(str(year)).get("luminosity") + if isinstance(lumis, dict): + return lumis.get(get_era(self, campaign)) + else: + return lumis + + def get_dataset_list(self, process_type="all"): + """ + Return a flattened list of all 'cmsdb' entries under 'signal' and/or 'background'. + Args: + process_type (str): "signal", "background", or "all" + Returns: + list[str]: List of process names (from cmsdb entries) + """ + datasets = self.data.get("datasets", {}) + dataset_names = [] + categories = ["signal", "background"] if process_type == "all" else [process_type] + + for category in categories: + category_data = datasets.get(category, {}) + if not isinstance(category_data, dict): + continue + # Recursively walk through all nested dicts + def extract_cmsdb_entries(node): + if isinstance(node, dict): + for key, value in node.items(): + if key == "cmsdb" and isinstance(value, list): + dataset_names.extend(value) + else: + extract_cmsdb_entries(value) + elif isinstance(node, list): + # In case there are lists of dicts + for item in node: + extract_cmsdb_entries(item) + extract_cmsdb_entries(category_data) + return sorted(set(dataset_names)) + + +# Load analysis configuration +analysis_data = load_datasets_config(os.path.join(os.path.dirname(os.path.abspath(__file__)), "analysis.yaml")) + +# Initialize config helper +analysis_cfg = AnalysisConfig(analysis_data) + + +def pogEraFormat(era): + """Format era for POG file paths.""" + if any(x in era for x in ['2022', '2023', '2024']): + return era[:4] + '_Summer' + era.replace('20', '') + else: + return era.replace("UL", "") + "_UL" + + +def localizePOGSF(era, POG, fileName): + """Localize POG scale factor files.""" + subdir = pogEraFormat(str(era)) + return os.path.join("/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration", "POG", POG, subdir, fileName) + + +def nested_dict(): + """Recursively create nested defaultdicts.""" + return defaultdict(nested_dict) + + +#https://btv-wiki.docs.cern.ch/ScaleFactors +def bTagWorkingPoints(year, run, campaign): + getfromyear = year + if year == 2024: getfromyear = 2023 # still missing FIXME once they are updated by BTV-POG + fileName = law.LocalFileTarget(localizePOGSF(getfromyear, "BTV", "btagging.json.gz")) + logger.info(f'Getting btagging working points and discriminator cuts from : {fileName}') + ceval = load_correction_set(fileName) + btagging = nested_dict() + if run == 2: + taggers = ["deepJet", "deepcsv", "particleNetMD"] + valid_eras = ["2016APV", "2016", "2017", "2018"] + elif run == 3: + taggers = ["deepJet", "particleNet", "robustParticleTransformer", "particleNetMD"] + valid_eras = ["2022", "2022EE", "2023", "2023BPix", "2024"] + else: + raise ValueError(f"Unsupported run: {run}") + + era = f"{year}{campaign.x.postfix}" + mlwps = {'L': 'loose', + 'M': 'medium', + 'T': 'tight', + 'XT': 'xtight', + 'XXT': 'xxtight'} + if era not in "".join(valid_eras): + raise ValueError(f"Era {era} not valid for run {run}") + + for tagger in taggers: + for wp in ['L', 'M', 'T', 'XT', 'XXT']: + try: + btagging[tagger][mlwps[wp]]= ceval[f"{tagger.replace('MD', '')}_wp_values"].evaluate(wp) + except Exception as e: + logger.warning(f"Failed to evaluate {tagger} {wp} for {era}: {e}") + # Optionally convert defaultdicts to normal dicts for output + return json.loads(json.dumps(btagging)) + + +def build_stitching_config(process_name, inclusive_dataset): + """Build complete stitching configuration for a process.""" + # Configuration for different jet multiplicities + JET_BIN_CONFIG = { + 0: {"pt_bins": [], "suffix": "0j"}, + 1: {"pt_bins": ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"], "suffix": "1j"}, + 2: {"pt_bins": ["0to40", "40to100", "100to200", "200to400", "400to600", "600toinf"], "suffix": "2j"}, + "ge3": {"pt_bins": [], "suffix": "ge3j"}, + } + leaf_processes = [] + + for jet_bin, config in JET_BIN_CONFIG.items(): + if jet_bin == "ge3": + # Special case for >=3 jets + leaf_processes.append(procs.get(f"{process_name}_{config['suffix']}")) + elif config["pt_bins"]: + # Processes with pt bins + leaf_processes.extend( + procs.get(f"{process_name}_{config['suffix']}_pt{pt}") + for pt in config["pt_bins"] + ) + else: + # Processes without pt bins + leaf_processes.append(procs.get(f"{process_name}_{config['suffix']}")) + + return { + "inclusive_dataset": inclusive_dataset, + "leaf_processes": leaf_processes, + } + + +def convert_dataset_to_process(dataset, campaign, all_processes_from_campaign): + process = dataset + for production in ['_powheg', '_amcatnlo', '_pythia', '_madgraph']: + if production in dataset: + process = dataset.replace(production, '') + if process in ['st_schannel_t_lep_4f', 'st_schannel_tbar_lep_4f', 'www_4f', 'wwz_4f']: + process = process.replace('_4f','') + # Find matching process and return its id + id = None + for proc in all_processes_from_campaign: + if process == proc.name: + id = proc.id + break # <-- exit the loop immediately when found + if id is None: + logger.warning(f"Will skip ... No matching process '{process}' found in campaign '{campaign.name}' datasets") + return process, id + + +def add_config( + analysis: od.Analysis, + campaign: od.Campaign, + config_name: str | None = None, + config_id: int | None = None, + limit_dataset_files: int | None = None, +) -> od.Config: + + # gather campaign data + run = campaign.x.run + year = campaign.x.year + + # --- basic configuration validations --- + if run not in {2, 3}: + raise ValueError(f"Invalid run: {run}. Expected 2 or 3.") + + valid_years = {2016, 2017, 2018, 2022, 2023, 2024}# , 2025} not yet + if year not in valid_years: + raise ValueError(f"Invalid year: {year}. Must be one of {sorted(valid_years)}.") + + # get all root processes + all_processes_from_campaign = get_root_processes_from_campaign(campaign) + #for proc in list(set(all_processes_from_campaign)): + # print( proc, proc.name , proc.id) + + # create a config by passing the campaign + cfg = od.Config( + name=config_name, + id=config_id, + campaign=campaign, + ) + + #============================================= + # helpers + #============================================= + def ConfigureLuminosity(cfg, campaign, year, analysis_data): + year_data = analysis_data["years"].get(year) + if not year_data: + raise ValueError(f"Year {year} not found in analysis.yaml") + # detect campaign tag (e.g. preVFP, postEE, etc.) + tag = next((t for t in ["preVFP", "postVFP", "preEE", "postEE", "preBPix", "postBPix"] + if campaign.has_tag(t)), None) + lumi_info = year_data["luminosity"] + if isinstance(lumi_info, list): + lumi_map = {list(d.keys())[0]: list(d.values())[0] for d in lumi_info} + key = f"{year}{tag}" if tag else list(lumi_map.keys())[0] + lumi_value = lumi_map.get(key) + else: + lumi_value = lumi_info + lumi_unc_list = year_data.get("luminosity-uncertainties", []) + lumi_unc = {list(d.keys())[0]: list(d.values())[0]*1j for d in lumi_unc_list} + cfg.x.luminosity = Number(lumi_value, lumi_unc) + return cfg + + + def ConfigureMuons(cfg, run, year, campaign): + if run == 2: + cfg.x.muon_sf_names = MuonSFConfig(correction="NUM_TightRelIso_DEN_TightIDandIPCut") + elif run == 3: + cfg.x.muon_sf_names = MuonSFConfig(correction="NUM_TightPFIso_DEN_TightID") + cfg.x.muon_trigger_sf_names = MuonSFConfig("NUM_IsoMu24_DEN_CutBasedIdTight_and_PFIsoTight") + cfg.x.single_trigger_muon_data_effs_cfg = MuonSFConfig("NUM_IsoMu24_DEN_CutBasedIdTight_and_PFIsoTight_DATAeff") + cfg.x.single_trigger_muon_mc_effs_cfg = MuonSFConfig("NUM_IsoMu24_DEN_CutBasedIdTight_and_PFIsoTight_MCeff") + cfg.x.cross_trigger_muon_data_effs_cfg = MuonSFConfig("NUM_IsoMu20_DEN_CutBasedIdTight_and_PFIsoTight_DATAeff") + cfg.x.cross_trigger_muon_mc_effs_cfg = MuonSFConfig("NUM_IsoMu20_DEN_CutBasedIdTight_and_PFIsoTight_MCeff") + return cfg + + + def ConfigureElectrons(cfg, run, year, campaign, scale_compound=False, smear_syst_compound=False): + """ Run 2: https://twiki.cern.ch/twiki/bin/view/CMS/EgammaULTagAndProbe + Run 3: https://twiki.cern.ch/twiki/bin/view/CMS/EgammaRun3Recommendations + """ + EGMcorrection = { + "2016APV": "preVFP", + "2016": "postVFP", + "2017": "", + "2018": "", + "2022EE": "Re-recoE+PromptFG", + "2022": "Re-recoBCD", + "2023BPix": "PromptD", + "2023": "PromptC", + "2024": "Prompt", + } + e_postfix = EGMcorrection.get(f"{year}{campaign.x.postfix}") + e_prefix = 'UL-' if run == 2 else '' + scalecorr = 'Compound_Ele' if scale_compound else 'ElePTsplit' + smearcorr = 'Compound_Ele' if smear_syst_compound else 'ElePTsplit' + + cfg.x.electron_sf_names = ElectronSFConfig( + correction=f"{e_prefix}Electron-ID-SF", + campaign=f"{year}{e_postfix}", + working_point="wp80iso", + ) + + # Define HLT paths for easier maintenance + hlt_single, hlt_cross = "HLT_SF_Ele30_TightID", "HLT_SF_Ele24_TightID" + + # Common helper for trigger configs + def make_el_trigger_cfg(corr, path, campaign=f"{year}{e_postfix}", suffix=""): + return ElectronSFConfig(correction=f"Electron-HLT-{corr}{suffix}", + campaign=campaign, hlt_path=path) + + cfg.x.electron_trigger_sf_names = make_el_trigger_cfg("SF", hlt_single) + cfg.x.single_trigger_electron_data_effs_cfg = make_el_trigger_cfg("DataEff", hlt_single) + cfg.x.single_trigger_electron_mc_effs_cfg = make_el_trigger_cfg("McEff", hlt_single) + cfg.x.cross_trigger_electron_data_effs_cfg = make_el_trigger_cfg("DataEff", hlt_cross) + cfg.x.cross_trigger_electron_mc_effs_cfg = make_el_trigger_cfg("McEff", hlt_cross) + + # --- Electron Energy Corrections (EEC/EER) ---------------------------------------------- + e_tag = "" + if year == 2022: + e_tag = {"": "preEE", "EE": "postEE"}[campaign.x.postfix] + elif year == 2023: + e_tag = {"": "preBPIX", "BPix": "postBPIX"}[campaign.x.postfix] + + if run ==3: + # electron scale and smearing (eec and eer) + cfg.x.ess = EGammaCorrectionConfig( + scale_correction_set=f"EGMScale_{scalecorr}_{year}{e_tag}", + scale_compound=scale_compound, + smear_syst_correction_set=f"EGMSmearAndSyst_{smearcorr}_{year}{e_tag}", + smear_syst_compound=smear_syst_compound, + systs=["scale_down", "scale_up", "smear_down", "smear_up"], + ) + return cfg + + + def ConfigureTaus(cfg, run, campaign): + """ + Configure tau ID, TEC (Tau Energy Calibration), and trigger settings. + """ + tau_taggers = { + 2: "DeepTau2017v2p1", + 3: "DeepTau2018v2p5", + } + + cfg.x.tau_tagger = tau_taggers.get(run) + corrector_kwargs = {"wp": "Medium", "wp_VSe": "VVLoose"} if run == 3 else {} + cfg.x.tec = TECConfig(tagger=cfg.x.tau_tagger, corrector_kwargs=corrector_kwargs) + + # --- Tau ID working points + # Legacy (campaign.x.version < 10) vs New format (>=10) + if campaign.x.version < 10: + wp_values_mu = {"vloose": 1, "loose": 2, "medium": 4, "tight": 8} + wp_values_jet_or_e = { + "vvvloose": 1, "vvloose": 2, "vloose": 4, + "loose": 8, "medium": 16, "tight": 32, + "vtight": 64, "vvtight": 128, + } + else: + wp_values_mu = {"vloose": 1, "loose": 2, "medium": 3, "tight": 4} + wp_values_jet_or_e = { + "vvvloose": 1, "vvloose": 2, "vloose": 3, + "loose": 4, "medium": 5, "tight": 6, + "vtight": 7, "vvtight": 8, + } + + cfg.x.tau_id_working_points = DotDict.wrap({ + "tau_vs_e": wp_values_jet_or_e, + "tau_vs_jet": wp_values_jet_or_e, + "tau_vs_mu": wp_values_mu, + }) + + # --- Tau trigger working points + cfg.x.tau_trigger_working_points = DotDict.wrap({ + "id_vs_jet_v0": "VVLoose", + "id_vs_jet_gv0": ("Loose", "VVLoose"), + "id_vs_mu_single": "Tight", + "id_vs_mu_cross": "VLoose", + "id_vs_e_single": "VVLoose", + "id_vs_e_cross": "VVLoose", + "trigger_corr": "VVLoose", + }) + # --- Tau trigger correctors + cfg.x.tau_trigger_corrector = "tau_trigger" + cfg.x.tau_trigger_corrector_cclub = "tauTriggerSF" + return cfg + + + def ConfigureJets(cfg, year, run, campaign): + """ + Configure Jet Energy Corrections (JEC) and Jet Energy Resolution (JER) + References: + - Run 2: https://cms-jerc.web.cern.ch/Recommendations/#run-2 + https://twiki.cern.ch/twiki/bin/view/CMS/JECDataMC?rev=204 + https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution?rev=109 + - Run 3: https://cms-jerc.web.cern.ch/Recommendations/#2022 + """ + newyear = year%100 + jec_uncertainty_sources = analysis_data['jec_sources'] + + if run == 2: + jec_version_map = {2016: "V7", 2017: "V5", 2018: "V5"} + jer_version_map = {2016: "V3", 2017: "V2", 2018: "V2"} + jec_campaign = f"Summer19UL{newyear}{campaign.x.postfix}" + jer_campaign = f"Summer{'20' if year == 2016 else '19'}UL{newyear}{campaign.x.postfix}" + jecjerdb = { + "jec_campaign": jec_campaign, + "jec_version": jec_version_map[year], + "jer_campaign": jer_campaign, + "jer_version": "JR" + jer_version_map[year], + "jet_type": "AK4PFchs", + "data_per_era": False, + } + + elif run == 3: + jerc_postfix = { + (2022, ""): "_22Sep2023", + (2022, "EE"): "_22Sep2023", + (2023, ""): "Prompt23", + (2023, "BPix"): "Prompt23", + (2024, ""): "Prompt24", + }.get((year, campaign.x.postfix)) + jec_version_map = { + (2022, ""): "V2", + (2022, "EE"): "V2", + (2023, ""): "V2", + (2023, "BPix"): "V3", + (2024, ""): "V1", + } + if not jerc_postfix: + raise ValueError(f"Unsupported JERC configuration for Run 3: year={year}, postfix={campaign.x.postfix}") + jec_campaign = f"Summer{newyear}{campaign.x.postfix}{jerc_postfix}" + jer_campaign = f"Summer{newyear}{campaign.x.postfix}{jerc_postfix}" + # For the time being, use the Summer23BPix JERs for 2024 data. + # The JER MC_ScaleFactor and MC_PtResolution for the Summer24 samples + # will be announced soon (expected by the end of October 2025). + if year ==2024: + jer_campaign = "Summer23BPixPrompt23_RunD" + # Add special Run fragment for 2023 + if year == 2023: + jer_campaign += f"_Run{'Cv1234' if campaign.has_tag('preBPix') else 'D'}" + jecjerdb = { + "jec_campaign": jec_campaign, + "jec_version": jec_version_map[(year, campaign.x.postfix)], + "jer_campaign": jer_campaign, + "jer_version": "JR" + {2022: "V1", 2023: "V1", 2024: "V1"}[year], + "jet_type": "AK4PFPuppi", + "data_per_era": year == 2022, # 2022 JEC depends on era + } + + if year in [2024, 2022]: + for src in ["TimeRunA", "TimeRunB", "TimeRunC", "TimeRunD"]: + if src in jec_uncertainty_sources: + jec_uncertainty_sources.remove(src) + + cfg.x.jec = DotDict.wrap({ + "Jet": { + "campaign": jecjerdb["jec_campaign"], + "version": jecjerdb["jec_version"], + "data_per_era": jecjerdb["data_per_era"], + "jet_type": jecjerdb["jet_type"], + "levels": ["L1FastJet", "L2Relative", "L2L3Residual", "L3Absolute"], + "levels_for_type1_met": ["L1FastJet"], + "uncertainty_sources": jec_uncertainty_sources, + }, + }) + + cfg.x.jer = DotDict.wrap({ + "Jet": { + "campaign": jecjerdb["jer_campaign"], + "version": jecjerdb["jer_version"], + "jet_type": jecjerdb["jet_type"], + }, + }) + + cfg.x.jet_id = JetIdConfig( + corrections={ + "AK4PUPPI_Tight": 2, + "AK4PUPPI_TightLeptonVeto": 3, + }) + + cfg.x.fatjet_id = JetIdConfig( + corrections={ + "AK8PUPPI_Tight": 2, + "AK8PUPPI_TightLeptonVeto": 3, + }) + + cfg.x.jet_trigger_corrector = "jetleg60" + return cfg + + + def ConfigureLFNS(cfg, limit_dataset_files=None): + """ + Configure custom methods for retrieving dataset LFNs depending on campaign settings. + """ + cfg.x.get_dataset_lfns = None + cfg.x.get_dataset_lfns_sandbox = None + + # Handle special campaign type: "custom" with "creator" == "uhh" + campaign_custom = cfg.campaign.x("custom", {}) + if campaign_custom.get("creator") != "uhh": + return cfg # No custom configuration needed + + def get_multileptons_dataset_lfns(dataset_inst: od.Dataset, shift_inst: od.Shift, dataset_key: str) -> list[str]: + """ + Retrieve LFNs for a given dataset under the UHH custom campaign convention. + """ + try: + _, dataset_id, full_campaign, tier = dataset_key.split("/") + main_campaign, sub_campaign = full_campaign.split("-", 1) + except ValueError: + raise ValueError(f"Invalid dataset key format: {dataset_key}") + + path = f"store/{dataset_inst.data_source}/{main_campaign}/{dataset_id}/{tier}/{sub_campaign}/0" + # Determine filesystem and directory target class + custom_name = campaign_custom.get("name") + remote_fs = f"wlcg_fs_{custom_name}" + local_fs = f"local_fs_{custom_name}" + dir_cls = law.wlcg.WLCGDirectoryTarget + fs_to_use = remote_fs + + if law.config.has_section(local_fs): + base = law.target.file.remove_scheme(law.config.get_expanded(local_fs, "base")) + if os.path.exists(base): + dir_cls = law.LocalDirectoryTarget + fs_to_use = local_fs + + lfn_base = dir_cls(path, fs=fs_to_use) + # Retrieve all ROOT files and convert them to LFNs + lfns = [ + "/" + lfn_base.child(fname, type="f").path.lstrip("/") + for fname in lfn_base.listdir(pattern="*.root") + ] + return sorted(lfns) + # Attach the retrieval method and related configuration + cfg.x.get_dataset_lfns = get_multileptons_dataset_lfns + cfg.x.get_dataset_lfns_sandbox = dev_sandbox("bash::$CF_BASE/sandboxes/cf.sh") + cfg.x.get_dataset_lfns_remote_fs = lambda dataset_inst: [ + f"local_fs_{campaign_custom['name']}", + f"wlcg_fs_{campaign_custom['name']}", + ] + return cfg + + + def _names_from_tag(tag): + return [s.name for s in cfg.shifts if s.has_tag(tag)] + + + def get_datasets_by_tag(tag): + """Return converted dataset processes matching a given tag.""" + return [ + convert_dataset_to_process(dataset.name, campaign, all_processes_from_campaign) + for dataset in cfg.datasets + if dataset.has_tag(tag) + ] + + + def prune_datasets_node(node): + """Recursively clean cmsdb lists, keeping only valid datasets.""" + if isinstance(node, dict): + new_node = {} + for k, v in node.items(): + if k == "cmsdb" and isinstance(v, list): + filtered = [ds for ds in v if ds in valid_datasets_set] + new_node[k] = filtered + else: + pruned = prune_datasets_node(v) + if pruned is not None: + new_node[k] = pruned + return new_node + elif isinstance(node, list): + pruned_list = [prune_datasets_node(item) for item in node] + return pruned_list + return node + + + def add_external(name, value): + if isinstance(value, dict): + value = DotDict.wrap(value) + cfg.x.external_files[name] = value + + + def register_shift_pair(cfg, base_name, base_id, aliases=None, tags=None, aux=None, step=1): + """Register up/down shifts with optional aliases, tags, and aux data.""" + cfg.add_shift(name=f"{base_name}_up", id=base_id, type="shape", tags=tags or set(), aux=aux) + cfg.add_shift(name=f"{base_name}_down", id=base_id + step, type="shape", tags=tags or set(), aux=aux) + if aliases: + add_shift_aliases(cfg, base_name, aliases) + + + def find_match_era(**kwargs): + """Helper to enable processes/datasets only for specific era.""" + return ( + (kwargs.get('run') is None or campaign.x.run in law.util.make_set(kwargs.get('run'))) and + (kwargs.get('tag') is None or campaign.has_tag(kwargs.get('tag'), mode=any)) and + (kwargs.get('year') is None or campaign.x.year in law.util.make_set(kwargs.get('year'))) and + (kwargs.get('nano') is None or campaign.x.version in law.util.make_set(kwargs.get('nano'))) and + (kwargs.get('postfix') is None or campaign.x.postfix in law.util.make_set(kwargs.get('postfix'))) + ) + + + def in_era(values=None, **kwargs): + """ + Return a filtered list of values if the current era matches, + or an empty list otherwise. + """ + return list(filter(bool, values or [])) if find_match_era(**kwargs) else [] + + + def not_in_era(**kwargs): + return not bool(in_era(**kwargs)) + + + def in_config(names=None, ids=None, values=None): + """ + Return a filtered list of values if cfg.id is in the provided ids, + or cfg.name in the provided names + or an empty list otherwise. + """ + if names: return list(filter(bool, values or [])) if cfg.name in names else [] + elif ids: return list(filter(bool, values or [])) if cfg.id in ids else [] + + + def not_in_config(**kwargs): + return not bool(in_config(**kwargs)) + + #============================================= + # configure some default objects + #============================================= + TopPtWeightFromTheory = False + cfg.x.default_selector_steps = "all" + cfg.x.default_calibrator = "default" + cfg.x.default_selector = "default" + cfg.x.default_reducer = "default" + cfg.x.default_producer = "default" + cfg.x.default_ml_model = None + cfg.x.default_inference_model = "default_no_shifts" + cfg.x.default_categories = ("all",) + cfg.x.default_variables = ("njet", "nlep") + cfg.x.default_hist_producer = "default" + cfg.x.external_files = DotDict() + cfg.x.minbias_xs = Number(69.2, 0.046j) + + btagJECsources = analysis_data.get("btag_sf_jec_sources", []) + btagJECsources += [f"Absolute_{year}", f"BBEC1_{year}", f"EC2_{year}", f"HF_{year}", f"RelativeSample_{year}", ""] + cfg.x.btag_sf_jec_sources = btagJECsources + cfg.x.btag_working_points = bTagWorkingPoints(year, run, campaign) + + ConfigureLuminosity(cfg, campaign, year, analysis_data) + ConfigureLFNS(cfg, limit_dataset_files) + ConfigureTaus(cfg, run, campaign) + ConfigureElectrons(cfg, run, year, campaign) + ConfigureMuons(cfg, run, year, campaign) + ConfigureJets(cfg, year, run, campaign) + + #============================================= + # processes and datasets - using YAML configuration + #============================================= + dataset_names = process_names = {'data': [], 'signal': [], 'background': []} + all_datasets_in_config = analysis_cfg.get_dataset_list('all') + valid_datasets = [d for d in all_datasets_in_config if campaign.has_dataset(d)] + valid_datasets_set = set(valid_datasets) + datasets_config = analysis_cfg.data.get("datasets", {}) + datasets_config = prune_datasets_node(datasets_config) + analysis_cfg.data["datasets"] = datasets_config + + # Loop over signal and background + for dtype in ['signal', 'background']: + for dataset_name in analysis_cfg.get_dataset_list(dtype): + tags = [] + proc, id = convert_dataset_to_process(dataset_name, campaign, all_processes_from_campaign) + if id is None or not campaign.has_dataset(dataset_name): + continue + cfg.add_process(proc, id) + dataset = cfg.add_dataset(campaign.get_dataset(dataset_name)) + dataset_names[dtype].append(dataset_name) + process_names[dtype].append(proc) + # Add tags to the process + if law.util.multi_match(dataset.name, [ + r"^(ww|wz|zz)_.*pythia$", + r"^tt(w|z)_.*amcatnlo$", + ]): + # datasets that are known to have no lhe info at all + dataset.add_tag("no_lhe_weights") + if re.match(r"^dy_m50toinf_\dj_(|pt.+_)amcatnlo$", dataset.name): + dataset.add_tag("dy_stitched") + if re.match(r"^w_lnu_\dj_(|pt.+_)amcatnlo$", dataset.name): + dataset.add_tag("w_lnu_stitched") + for sig in ['ggf', 'vbf']: + if dataset.name.startswith(f'hh_{sig}_htt_htt'): + dataset.add_tag(f"{sig}_4t") + elif dataset.name.startswith(f'hh_{sig}_htt_hvv'): + dataset.add_tag(f"{sig}_2t2v") + elif dataset.name.startswith(f'hh_{sig}_hvv_hvv'): + dataset.add_tag(f"{sig}_4v") + # datasets that are allowed to contain some events with missing lhe infos + # (known to happen for amcatnlo) + if dataset.name.endswith("_amcatnlo") or re.match(r"^z_vbf_.*madgraph$", dataset.name): + dataset.add_tag("partial_lhe_weights") + for tag in (t for t in law.util.make_set(tags) if t is not None): + dataset.add_tag(tag) + if limit_dataset_files: + for info in dataset.info.values(): + info.n_files = min(info.n_files, limit_dataset_files) + + # Add data + streams = datasets_config["data"]["streams"] + for y, year_cfg in datasets_config["data"].items(): + if y == "streams" or int(y) != campaign.x.year: + continue + + # Normalize: if year_cfg is a dict (like 2024), wrap it into a list + if isinstance(year_cfg, dict): + year_cfg = [year_cfg] + + # year_cfg is now always a list of tag blocks + for tag_block in year_cfg: + if "periods" in tag_block and len(tag_block) == 1: + tag_block = {"": tag_block} # empty tag name + + for tag, tag_cfg in tag_block.items(): + periods = tag_cfg["periods"] + requested_data = [ + *in_era( + year=y, + **({"tag": tag} if tag else {}), + values=[ + f"data_{stream}_{period}" + for stream in streams + for period in periods], + )] + valid_datasets = [d for d in requested_data if campaign.has_dataset(d)] + valid_datasets_set = set(valid_datasets) + dataset_names['data'] += valid_datasets_set + for dataset_name in valid_datasets_set: + dataset = cfg.add_dataset(campaign.get_dataset(dataset_name)) + proc = '_'.join(dataset_name.split('_')[:2]) + id = next((p.id for p in all_processes_from_campaign if p.name == proc), None) + if id is None: + raise ValueError(f"No process found with name '{proc}' in run{run} campaign: {campaign}") + if proc not in process_names['data']: + process_names['data'] +=[proc] + cfg.add_process(proc, id) + if dataset.name.startswith("data_e_"): + dataset.add_tag({"etau", "emu_from_e", "ee"}) + if dataset.name.startswith("data_mu_"): + dataset.add_tag({"mutau", "emu_from_mu", "mumu"}) + if dataset.name.startswith("data_tau_"): + dataset.add_tag({"tautau"}) + if dataset.name.startswith("data_muoneg_"): + dataset.add_tag({"mue"}) + # Optional: special tag for broken MET filter in 2022 + # bad ecalBadCalibFilter MET filter in 2022 data + # https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2?rev=172#ECal_BadCalibration_Filter_Flag + # https://cms-talk.web.cern.ch/t/noise-met-filters-in-run-3/63346/5 + if y == 2022 and dataset.is_data and dataset.x.era in "FG": + dataset.add_tag("broken_ecalBadCalibFilter") + if limit_dataset_files: + for info in dataset.info.values(): + info.n_files = min(info.n_files, limit_dataset_files) + + # verify that the root process of each dataset is part of any of the registered processes + verify_config_processes(cfg, warn=True) + + # process groups for conveniently looping over certain processs + # (used in wrapper_factory and during plotting) + decays = ["4v", "4t", "2t2v"] + productions = ["ggf", "vbf"] + nonresonant_signal_groups = { + f"{prod}_{decay}": get_datasets_by_tag(f"{prod}_{decay}") + for prod in productions + for decay in decays + } + + cfg.x.process_groups = { + "nonresonant_ggf": (nonresonant_ggf := datasets_config['signal']['nonresonant']['ggf']['cmsdb']), + "nonresonant_vbf": (nonresonant_vbf := datasets_config['signal']['nonresonant']['vbf']['cmsdb']), + "nonresonant": [*nonresonant_ggf, *nonresonant_vbf], + "resonant": (resonant := datasets_config['signal']['resonant']['cmsdb']), + "all_data":(process_names['data']), + "all_signals": (all_signals := [*resonant, *nonresonant_vbf, *nonresonant_ggf]), + "all_backgrounds": (all_backgrounds := process_names['background']), + # decay channel for all modes to pass + # ggf_4v, ggf_4t, ggf_2t2v, vbf_4v, vbf_4t, vbf_2t2v + **nonresonant_signal_groups, + # decay modes merged for productions to pass + "4v": [*nonresonant_signal_groups["ggf_4v"], *nonresonant_signal_groups["vbf_4v"]], + "4t": [*nonresonant_signal_groups["ggf_4t"], *nonresonant_signal_groups["vbf_4t"]], + "2t2v": [*nonresonant_signal_groups["ggf_2t2v"], *nonresonant_signal_groups["vbf_2t2v"]], + } + + # define inclusive datasets for the stitched process identification with corresponding leaf processes + # Drell-Yan and W+jets configurations + #cfg.x.dy_stitching = { + # "m50toinf": build_stitching_config("dy_m50toinf", cfg.datasets.n.dy_m50toinf_amcatnlo),} + #cfg.x.w_lnu_stitching = { + # "incl": build_stitching_config("w_lnu", cfg.datasets.n.w_lnu_amcatnlo),} + + # Background dataset groups + background_groups = {} + backgrounds = datasets_config.get('background', {}) + for bkg_name, bkg_cfg in backgrounds.items(): + cmsdb_entries = bkg_cfg.get('cmsdb', []) + if cmsdb_entries: + background_groups[bkg_name] = cmsdb_entries + + # dataset groups for conveniently looping over certain datasets + # (used in wrapper_factory and during plotting) + cfg.x.dataset_groups = { + "all_data": (data_group := [dataset.name for dataset in cfg.datasets if dataset.is_data]), + "all_signals": (signals_group := [dataset.name for dataset in cfg.datasets if dataset.has_tag("signal")]), + "all_backgrounds": (backgrounds := [ + dataset.name for dataset in cfg.datasets + if dataset.is_mc and not dataset.has_tag("signal") + ]), + **background_groups, # so basically the keys in the yaml datasets:background: + "backgrounds_unstitched": (backgrounds_unstitched := [ + dataset.name for dataset in cfg.datasets + if ( + dataset.is_mc and + not dataset.has_tag("signal") and + not dataset.has_tag({"dy_stitched", "w_lnu_stitched"}, mode=any) + ) + ]), + } + + # category groups for conveniently looping over certain categories + # (used during plotting) + cfg.x.category_groups = {} + + # variable groups for conveniently looping over certain variables + # (used during plotting) + cfg.x.variable_groups = { + "hh": (hh := [f"hh_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), + "dilep": (dilep := [f"dilep_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), + "dijet": (dijet := [f"dijet_{var}" for var in ["energy", "mass", "pt", "eta", "phi", "dr"]]), + "default": [ + *dijet, *dilep, *hh, + "mu1_pt", "mu1_eta", "mu1_phi", "mu2_pt", "mu2_eta", "mu2_phi", + "e1_pt", "e1_eta", "e1_phi", "e2_pt", "e2_eta", "e2_phi", + "tau1_pt", "tau1_eta", "tau1_phi", "tau2_pt", "tau2_eta", "tau2_phi", + ], + } + + # selector step groups for conveniently looping over certain steps + # (used in cutflow tasks) + cfg.x.selector_step_groups = { + "all": [], + "none": ["mc_filter", "json"], + "default": ["mc_filter", "json", "trigger", "met_filter", "jet_veto_map", "lepton", "jet2"], + } + + # plotting overwrites + stylize_processes(cfg, datasets_config) + # Configure colors, labels, etc + setup_plot_styles(cfg, analysis_data.get('plot_defaults',{})) + + #============================================= + # met settings + #============================================= + if run == 2: + cfg.x.met_name = "MET" + cfg.x.raw_met_name = "RawMET" + cfg.x.Met_phi_correction = METPhiConfigRun2( + met_name=cfg.x.met_name, + correction_set_template="{variable}_metphicorr_pfmet_{data_source}", + keep_uncorrected=True, + ) + elif run == 3: + cfg.x.met_name = "PuppiMET" + cfg.x.raw_met_name = "RawPuppiMET" + cfg.x.met_phi_correction = METPhiConfig( + met_name=cfg.x.met_name, + met_type=cfg.x.met_name, + correction_set="met_xy_corrections", + keep_uncorrected=True, + pt_phi_variations={ + "stat_xdn": "metphi_statx_down", + "stat_xup": "metphi_statx_up", + "stat_ydn": "metphi_staty_down", + "stat_yup": "metphi_staty_up", + }, + variations={ + "pu_dn": "minbias_xs_down", + "pu_up": "minbias_xs_up", + }, + ) + + #============================================= + # b-tag working points + #============================================= + cfg.x.btag_sf_deepjet = BTagSFConfig( + correction_set="deepJet_shape", + jec_sources=cfg.x.btag_sf_jec_sources, + discriminator="btagDeepFlavB", + ) + if run == 3: + cfg.x.btag_sf_pnet = BTagSFConfig( + correction_set="particleNet_shape", + jec_sources=cfg.x.btag_sf_jec_sources, + discriminator="btagPNetB", + ) + + #============================================= + # top pt reweighting + # https://twiki.cern.ch/twiki/bin/view/CMS/TopPtReweighting?rev=31 + #============================================= + # theory-based method preferred + if TopPtWeightFromTheory: + cfg.x.top_pt_weight = TopPtWeightFromTheoryConfig(params={ + "a": 0.103, + "b": -0.0118, + "c": -0.000134, + "d": 0.973, + }) + else: + # data-based method preferred + cfg.x.top_pt_weight = TopPtWeightFromDataConfig( + params={ + "a": 0.0615, + "a_up": 0.0615 * 1.5, + "a_down": 0.0615 * 0.5, + "b": -0.0005, + "b_up": -0.0005 * 1.5, + "b_down": -0.0005 * 0.5, + }, + pt_max=500.0, + ) + + #============================================= + # dy reweighting and recoil + #============================================= + if run == 3: + era = analysis_cfg.get_era(campaign) + + # dy reweighting + # https://cms-higgs-leprare.docs.cern.ch/htt-common/DY_reweight + cfg.x.dy_weight_config = DrellYanConfig( + era=era, + order="NLO", + correction="DY_pTll_reweighting", + unc_correction="DY_pTll_reweighting_N_uncertainty", + ) + + # dy boson recoil correction + # https://cms-higgs-leprare.docs.cern.ch/htt-common/V_recoil + cfg.x.dy_recoil_config = DrellYanConfig( + era=era, + order="NLO", + correction="Recoil_correction_Rescaling", + unc_correction="Recoil_correction_Uncertainty", + ) + + #============================================= + # shifts + #============================================= + # load JEC sources + all_jec_sources = analysis_data.get("jec_sources", []) + + # nominal + simple shifts + simple_shifts = [ + # (name, base_id, aliases, tags, aux) + ("nominal", 0, None, None, None), + ("tune", 1, None, {"disjoint_from_nominal"}, None), + ("hdamp", 3, None, {"disjoint_from_nominal"}, None), + ("mtop", 5, None, {"disjoint_from_nominal"}, None), + ("minbias_xs", 7, None, None, { + "pu_weight": "pu_weight_{name}", + "normalized_pu_weight": "normalized_pu_weight_{name}", + }), + ("top_pt", 9, None, None, {"top_pt_weight": "top_pt_weight_{direction}"}), + ] + + for name, base_id, aliases, tags, aux in simple_shifts: + if name =='nominal': + cfg.add_shift(name, base_id) + else: + register_shift_pair(cfg, name, base_id, aliases, tags, aux) + + # JEC sources + for jec_source in cfg.x.jec.Jet.uncertainty_sources: + idx = all_jec_sources.index(jec_source) + jec_id = 5000 + 2 * idx + jec_aliases = { + "Jet.pt": "Jet.pt_{name}", + "Jet.mass": "Jet.mass_{name}", + f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", + f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", + } + register_shift_pair(cfg, f"jec_{jec_source}", jec_id, jec_aliases, {"jec"}, {"jec_source": jec_source}) + + # link btag-related JEC sources + if ("" if jec_source == "Total" else jec_source) in cfg.x.btag_sf_jec_sources: + btag_aliases = { + "normalized_btag_deepjet_weight": "normalized_btag_deepjet_weight_{name}", + "normalized_njet_btag_deepjet_deepjet_weight": "normalized_njet_btag_deepjet_weight_{name}", + "normalized_btag_pnet_weight": "normalized_btag_pnet_weight_{name}", + "normalized_njet_btag_pnet_weight": "normalized_njet_btag_pnet_weight_{name}", + } + add_shift_aliases(cfg, f"jec_{jec_source}", btag_aliases) + + # JER + jer_aliases = { + "Jet.pt": "Jet.pt_{name}", + "Jet.mass": "Jet.mass_{name}", + f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", + f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", + } + register_shift_pair(cfg, "jer", 6000, jer_aliases, {"jer"}) + + # TEC shifts + for i, (match, dm) in enumerate(itertools.product(["jet", "e"], [0, 1, 10, 11])): + tec_aliases = { + "Tau.pt": "Tau.pt_{name}", + "Tau.mass": "Tau.mass_{name}", + f"{cfg.x.met_name}.pt": f"{cfg.x.met_name}.pt_{{name}}", + f"{cfg.x.met_name}.phi": f"{cfg.x.met_name}.phi_{{name}}", + } + register_shift_pair(cfg, f"tec_{match}_dm{dm}", 20 + 2 * i, tec_aliases, {"tec"}) + + # TAU uncertainties + cfg.x.tau_unc_names = [ + "jet_dm0", "jet_dm1", "jet_dm10", "jet_dm11", + "e_barrel", "e_endcap", + "mu_0p0To0p4", "mu_0p4To0p8", "mu_0p8To1p2", "mu_1p2To1p7", "mu_1p7To2p3", + ] + for i, unc in enumerate(cfg.x.tau_unc_names): + register_shift_pair(cfg, f"tau_{unc}", 50 + 2 * i, {"tau_weight": f"tau_weight_{unc}_{{direction}}"}) + + # Electron, muon, and energy corrections + register_shift_pair(cfg, "e", 90, {"electron_weight": "electron_weight_{direction}"}) + register_shift_pair(cfg, "mu", 100, {"muon_weight": "muon_weight_{direction}"}) + if run == 3 and year == 2022: + logger.debug("adding ees and eer shifts") + register_shift_pair(cfg, "ees", 92, {"Electron.pt": "Electron.pt_scale_{direction}"}, {"eec"}) + register_shift_pair(cfg, "eer", 94, {"Electron.pt": "Electron.pt_res_{direction}"}, {"eer"}) + + # b-tag uncertainties + cfg.x.btag_unc_names = [ + "hf", "lf", + f"hfstats1_{year}", f"hfstats2_{year}", + f"lfstats1_{year}", f"lfstats2_{year}", + "cferr1", "cferr2", + ] + for i, unc in enumerate(cfg.x.btag_unc_names): + btag_aliases = { + "normalized_btag_deepjet_weight": f"normalized_btag_deepjet_weight_{unc}_{{direction}}", + "normalized_njet_btag_deepjet_weight": f"normalized_njet_btag_deepjet_weight_{unc}_{{direction}}", + } + register_shift_pair(cfg, f"btag_{unc}", 110 + 2 * i, btag_aliases) + + # LHE variations + lhe_shifts = { + "pdf": 130, + "murmuf": 140, + "isr": 150, + "fsr": 155, + } + for name, base_id in lhe_shifts.items(): + aliases = { + f"{name}_weight": f"{name}_weight_{{direction}}", + f"normalized_{name}_weight": f"normalized_{name}_weight_{{direction}}", + } + register_shift_pair(cfg, name, base_id, aliases, {"lhe_weight"} if name in ["pdf", "murmuf"] else None) + + # trigger scale factors + trigger_legs = ["e", "mu", "tau_dm0", "tau_dm1", "tau_dm10", "tau_dm11", "jet"] + for i, leg in enumerate(trigger_legs): + register_shift_pair(cfg, f"trigger_{leg}", 180 + 2 * i) + + #============================================= + # Add scale-factors from correction lib + #============================================= + if run == 2: + tauPOGJsonFile = "tau.json.gz" + metPOGJsonFile = "met.json.gz" + + elif run == 3: # nasty names, workaround, also missing corrections for 2024 still + if year == 2022: + met_pog_suffix = f"{year}_{year}{'' if campaign.has_tag('preEE') else 'EE'}" + tau_pog_suffix = f"{'pre' if campaign.has_tag('preEE') else 'post'}EE" + elif year == 2023: + met_pog_suffix = f"{year}_{year}{'' if campaign.has_tag('preBPix') else 'BPix'}" + tau_pog_suffix = f"{'pre' if campaign.has_tag('preBPix') else 'post'}BPix" + if year == 2024: # just for now FIXME + tauPOGJsonFile = "tau_DeepTau2018v2p5_2023_preBPix.json.gz" + metPOGJsonFile = "met_xyCorrections_2024.json.gz" + else: + tauPOGJsonFile = f"tau_DeepTau2018v2p5_{year}_{tau_pog_suffix}.json.gz" + metPOGJsonFile = f"met_xyCorrections_{met_pog_suffix}.json.gz" + + campaign_tag = "" + for tag in ("preEE", "postEE", "preBPix", "postBPix"): + if campaign.has_tag(tag, mode=any): + if campaign_tag: + raise ValueError(f"Multiple campaign tags found: {cfg.x.campaign_tag} and {tag}") + campaign_tag = tag + + ver = '_v1' if year == 2024 else '' + # common files + # (versions in the end are for hashing in cases where file contents changed but paths did not) + goldenFile = analysis_data['years'][year]["certified_lumi_file"] + normtagFile = analysis_data['years'][year]["normtag"] + + add_external("lumi", {"golden":(goldenFile, "v1"), "normtag": (normtagFile, "v1")}) + add_external("jet_jerc", (localizePOGSF(year, "JME", "jet_jerc.json.gz"), "v1")) + add_external("jet_veto_map", (localizePOGSF(year, "JME", "jetvetomaps.json.gz"), "v1")) + add_external("muon_sf", (localizePOGSF(year, "MUO", "muon_Z.json.gz"), "v1")) + add_external("electron_sf", (localizePOGSF(year, "EGM", f"electron{ver}.json.gz"), "v1")) + + getfromyear = year + if year == 2024: + getfromyear = 2023 # these corrections are still missing for 2024 workaround with 2023 preBPix for now + tau_pog_suffix = "preBPix" + add_external("met_phi_corr", (f"{os.path.dirname(os.path.abspath(__file__))}/../data/{metPOGJsonFile}", "v1")) + else: + add_external("met_phi_corr", (localizePOGSF(getfromyear, "JME", f"{metPOGJsonFile}"), "v1")) + add_external("btag_sf_corr", (localizePOGSF(getfromyear, "BTV", "btagging.json.gz"), "v1")) + add_external("tau_sf", (localizePOGSF(getfromyear, "TAU", f"{tauPOGJsonFile}"), "v1")) + add_external("pu_sf", (localizePOGSF(getfromyear, "LUM", "puWeights.json.gz"), "v1")) + add_external("trigger_sf", Ext(f"{os.path.dirname(os.path.abspath(__file__))}/../data/TriggerScaleFactors/{getfromyear}{tau_pog_suffix}", + subpaths=DotDict( + muon="temporary_MuHlt_abseta_pt.json.gz", + cross_muon="CrossMuTauHlt.json.gz", + electron="electronHlt.json.gz", + cross_electron="CrossEleTauHlt.json.gz", + tau=f"tau_trigger_DeepTau2018v2p5_{getfromyear}{tau_pog_suffix}.json.gz", + jet=f"ditaujet_jetleg60_{getfromyear}{tau_pog_suffix}.json.gz", + ), + version="v1", + )) + + # run specific files + if run == 2: + add_external("tau_trigger_sf", (localizePOGSF(year, "TAU", "tau.json.gz"), "v1")) + elif run == 3: + # electron energy correction and smearing + add_external("electron_ss", (localizePOGSF(year, "EGM", f"electronSS_EtDependent{ver}.json.gz"), "v1")) + add_external("jet_id", (localizePOGSF(year, "JME", "jetid.json.gz"), "v1")) + + #============================================= + # reductions + #============================================= + # target file size after MergeReducedEvents in MB + cfg.x.reduced_file_size = 512.0 + # columns to keep after certain steps + cfg.x.keep_columns = DotDict.wrap({ + # !! note that this set is used by the cf_default reducer + "cf.ReduceEvents": { + # mandatory + ColumnCollection.MANDATORY_COFFEA, + # event info + "deterministic_seed", + # object info + "Jet.{pt,eta,phi,mass,hadronFlavour,puId,btag*,nConstituents,deterministic_seed}", + "NonCleanedJet.{pt,eta,phi,mass,hadronFlavour,puId,hhbtag,btag*,nConstituents,deterministic_seed}", + "VBFJet.{pt,eta,phi,mass,hadronFlavour,puId,btag*,nConstituents,deterministic_seed}", + "FatJet.*", + "SubJet{1,2}.*", + "Electron.*", + "ElectronLoose.*", + "ElectronTight.*", + "Muon.*", + "MuonLoose.*", + "MuonTight.*", + "Tau.*", + "TauNoID.*", + "TauIso.*", + "GenPart*", + f"{cfg.x.met_name}.{{pt,phi,significance,covXX,covXY,covYY}}", + "PV.npvs", + "HLT.*", + # keep all columns added during selection and reduction, but skip cutflow features + ColumnCollection.ALL_FROM_SELECTOR, + skip_column("cutflow.*"), + }, + "cf.MergeSelectionMasks": { + "cutflow.*", + }, + "cf.UniteColumns": { + "*", *skip_column("*_{up,down}"), + }, + }) + + #============================================= + # Add event weights configuration + # Each key corresponds to a possible event weight column. + # Each value is a list of shift dependencies (from systematics sources). + # This mapping is used by weight producers later in the workflow. + #============================================= + get_shifts = functools.partial(get_shifts_from_sources, cfg) + + # --- Global event weight configuration --- + base_event_weights = { + "pdf_weight": get_shifts("pdf"), + "murmuf_weight": get_shifts("murmuf"), + "normalization_weight": [], + "normalization_weight_inclusive": [], + "normalized_isr_weight": get_shifts("isr"), + "normalized_fsr_weight": get_shifts("fsr"), + # "normalized_pu_weight": get_shifts("minbias_xs"), + # "normalized_njet_btag_deepjet_weight": get_shifts(*(f"btag_{u}" for u in cfg.x.btag_unc_names)), + # "electron_weight": get_shifts("e"), + # "muon_weight": get_shifts("mu"), + # "tau_weight": get_shifts(*(f"tau_{u}" for u in cfg.x.tau_unc_names)), + # "trigger_weight": get_shifts(*(f"trigger_{leg}" for leg in trigger_legs)), + } + # Store in the config (preserving DotDict interface) + cfg.x.event_weights = DotDict(base_event_weights) + + # --- Per-dataset customizations --- + for dataset in cfg.datasets: + # Initialize empty mapping for each dataset (inherits from global) + dataset.x.event_weights = {} + if dataset.has_tag("ttbar"): + dataset.x.event_weights["top_pt_weight"] = get_shifts("top_pt") + elif dataset.has_tag("dy"): + # Placeholder for Drell–Yan reweighting uncertainties + dataset.x.event_weights["dy_weight"] = [] # TODO: add DY uncertainty sources + + # ----------------------------------------------------------------------------- + # Shift groups (for plotting / uncertainty grouping) + # ----------------------------------------------------------------------------- + cfg.x.shift_groups = { + "jec": _names_from_tag(("jec", "jer")), + "tec": _names_from_tag("tec"), + "eec": _names_from_tag(("ees", "eer")), + "ees": _names_from_tag("ees"), + "eer": _names_from_tag("eer"), + "lepton_sf": [s.name for s in (*get_shifts("e"), *get_shifts("mu"))], + "btag_sf": [s.name for s in get_shifts(*(f"btag_{u}" for u in cfg.x.btag_unc_names))], + "pdf": [s.name for s in get_shifts("pdf")], + "murmuf": [s.name for s in get_shifts("murmuf")], + "pu": [s.name for s in get_shifts("minbias_xs")], + } + + #============================================= + # add channels + #============================================= + #for channel_config in analysis_data.get("channels", []): + # cfg.add_channel( + # name=channel_config["name"], + # id=channel_config["id"], + # label=channel_config["label"], + # ) + + # 2lep + cfg.add_channel(name="cetau", id=1, label=r"$e\tau_{h}$") + cfg.add_channel(name="cmutau", id=2, label=r"$\mu\tau_{h}$") + cfg.add_channel(name="ctautau", id=3, label=r"$\tau_{h}\tau_{h}$") + cfg.add_channel(name="cee", id=4, label=r"$ee$") + cfg.add_channel(name="cmumu", id=5, label=r"$\mu\mu$") + cfg.add_channel(name="cemu", id=6, label=r"$e\mu$") + # 3lep + cfg.add_channel(name="c3e", id=14, label=r"$eee$") + cfg.add_channel(name="c2emu", id=15, label=r"$ee\mu$") + cfg.add_channel(name="ce2mu", id=16, label=r"$e\mu\mu$") + cfg.add_channel(name="c3mu", id=17, label=r"$\mu\mu\mu$") + # 4lep no taus + cfg.add_channel(name="c4e", id=18, label=r"$eeee$") + cfg.add_channel(name="c3emu", id=19, label=r"$eee\mu$") + cfg.add_channel(name="c2e2mu", id=20, label=r"$ee\mu\mu$") + cfg.add_channel(name="ce3mu", id=21, label=r"$e\mu\mu\mu$") + cfg.add_channel(name="c4mu", id=22, label=r"$\mu\mu\mu\mu$") + # 4lep with taus + cfg.add_channel(name="c3etau", id=23, label=r"$eee\tau_{h}$") + cfg.add_channel(name="c2emutau", id=24, label=r"$ee\mu\tau_{h}$") + cfg.add_channel(name="ce2mutau", id=25, label=r"$e\mu\mu\tau{h}$") + cfg.add_channel(name="c3mutau", id=26, label=r"$\mu\mu\mu\tau{h}$") + cfg.add_channel(name="c2e2tau", id=27, label=r"$ee\tau{h}\tau{h}$") + cfg.add_channel(name="cemu2tau", id=28, label=r"$e\mu\tau{h}\tau{h}$") + cfg.add_channel(name="c2mu2tau", id=29, label=r"$\mu\mu\tau{h}\tau{h}$") + cfg.add_channel(name="ce3tau", id=30, label=r"$e\tau{h}\tau{h}\tau{h}$") + cfg.add_channel(name="cmu3tau", id=31, label=r"$\mu\tau{h}\tau{h}\tau{h}$") + cfg.add_channel(name="c4tau", id=32, label=r"$\tau{h}\tau{h}\tau{h}\tau{h}$") + cfg.add_channel(name="c2e0or1tau", id=33, label=r"$ee\ \leq 1\,\tau_{h}$") + cfg.add_channel(name="cemu0or1tau", id=34, label=r"$e\mu\ \leq 1\,\tau_{h}$") + cfg.add_channel(name="c2mu0or1tau", id=35, label=r"$\mu\mu\ \leq 1\,\tau_{h}$") + + #============================================= + # add variables, categories , met and triggers + #============================================= + add_categories(cfg) + add_variables(cfg) + add_met_filters(cfg) + add_triggers(cfg) + + return cfg diff --git a/hbt/config/met_filters.py b/multilepton/config/met_filters.py similarity index 99% rename from hbt/config/met_filters.py rename to multilepton/config/met_filters.py index ba82889f..dd008abc 100644 --- a/hbt/config/met_filters.py +++ b/multilepton/config/met_filters.py @@ -12,7 +12,6 @@ def add_met_filters(config: od.Config) -> None: """ Adds all MET filters to a *config*. - Resources: https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2?rev=157#UL_data """ diff --git a/multilepton/config/run_multilepton.sh b/multilepton/config/run_multilepton.sh new file mode 120000 index 00000000..235a95bf --- /dev/null +++ b/multilepton/config/run_multilepton.sh @@ -0,0 +1 @@ +../../run_multilepton.sh \ No newline at end of file diff --git a/multilepton/config/styles.py b/multilepton/config/styles.py new file mode 100644 index 00000000..c38b2c0d --- /dev/null +++ b/multilepton/config/styles.py @@ -0,0 +1,189 @@ +# coding: utf-8 + +""" +Plot style definitions. +""" + +from __future__ import annotations + +import law +import order as od + +from copy import deepcopy +from collections import defaultdict, ChainMap +from columnflow.util import DotDict, try_int + + +logger = law.logger.get_logger(__name__) + + + +def resolve_inheritance(styles: dict) -> dict: + """ + Resolve 'inherit' relationships in the style dictionary. + Later definitions override parent ones. + """ + resolved = {} + for name, cfg in styles.items(): + if "inherit" in cfg: + parent = cfg.pop("inherit") + base = deepcopy(resolved.get(parent, styles.get(parent, {}))) + merged = deepcopy(base) + merged.update(cfg) + resolved[name] = merged + else: + resolved[name] = deepcopy(cfg) + return resolved + + +def setup_plot_styles(config: od.Config, yaml_data) -> None: + """ + Setup plot styles from a YAML configuration file. + Fallback to hardcoded defaults if YAML is missing. + """ + # General settings + general = yaml_data.get("general", {}) + config.x.default_general_settings = { + "cms_label": general.get("cms_label", "Work-in-progress"), + "whitespace_fraction": general.get("whitespace_fraction", 0.31), + } + + # Global defaults + config.x.default_custom_style_config = yaml_data.get("default_style", "wide_legend") + config.x.default_blinding_threshold = yaml_data.get("blinding_threshold", 0) + + # ───────────────────────────── + # Build custom style groups + # ───────────────────────────── + style_defs = yaml_data.get("styles", {}) + resolved_styles = resolve_inheritance(style_defs) + + # Optional: add computed or callable parameters + for name, style in resolved_styles.items(): + legend_cfg = style.get("legend_cfg", {}) + if legend_cfg.get("cf_entries_per_column", "auto") == "auto": + legend_cfg["cf_entries_per_column"] = legend_entries_per_column + style["legend_cfg"] = legend_cfg + + config.x.custom_style_config_groups = resolved_styles + logger.info(f"Loaded {len(resolved_styles)} style configurations from analysis.yaml") + return + + +def apply_process_styles(config, process_key, process_data, group=None): + """Apply individual style info to a process if it exists. + """ + color = process_data.get("color") + label = process_data.get("label") + cmsdb_list = process_data.get("cmsdb", []) + + for dataset in cmsdb_list: + if (p := config.get_process(dataset, default=None)): + if color: + p.color1 = color + if label: + p.label = label + + # only build label automatically if none is provided AND group is signal + if not label and group == "signal": + name = p.name if hasattr(p, "name") else dataset # fallback + if "htt_hvv" in name: + decay = r"\tau\tau VV" + elif "htt_htt" in name: + decay = r"\tau\tau\tau\tau" + elif "hvv_vv" in name: + decay = "VVVV" + + # --- GGF signal --- + if name.startswith("hh_ggf"): + kl = name.split("_")[-2].replace("kl", "") + kappa_label = create_kappa_label(**{r"\lambda": kl, "t": "1"}) + p.label = rf"$HH_{{ggf}} \rightarrow {decay}$ __SCALE____SHORT____BREAK__({kappa_label})" + + # --- VBF signal --- + elif name.startswith("hh_vbf"): + parts = {x[:2]: x[2:] for x in name.split("_") if x.startswith(("kv", "k2v", "kl"))} + kv = parts.get("kv", "1") + k2v = parts.get("k2v", "1") + kl = parts.get("kl", "1") + kappa_label = create_kappa_label(**{"2V": k2v, r"\lambda": kl, "V": kv}) + p.label = rf"$HH_{{vbf}} \rightarrow {decay}$ __SCALE____SHORT____BREAK__({kappa_label})" + return + + +def stylize_processes(config: od.Config, datasets_cfg: DotMap) -> None: + """ + Applies style and metadata (colors, labels, etc.) + to each process from the datasets YAML. + Recommended cms colors see: https://cms-analysis.docs.cern.ch/guidelines/plotting/colors + """ + # Loop through signal/background groups + for group_name, group in datasets_cfg.items(): + if group_name not in ["signal", "background"]: + continue + + for sub_group_name, sub_group in group.items(): + # Nested levels (e.g. 'nonresonant', 'ggf', 'vbf') + if isinstance(sub_group, dict) and "cmsdb" not in sub_group: + for process_name, process in sub_group.items(): + apply_process_styles(config, process_name, process, group_name) + else: + apply_process_styles(config, sub_group_name, sub_group, group_name) + return + + +def legend_entries_per_column(ax, handles: list, labels: list, n_cols: int) -> list[int]: + """ + Control number of entries such that backgrounds are in the first n - 1 columns, and everything + else in the last one. + """ + # get number of background and remaining entries + n_backgrounds = sum(1 for handle in handles if handle.__class__.__name__ == "StepPatch") + n_other = len(handles) - n_backgrounds + # fill number of entries per column + entries_per_col = n_cols * [0] + n_bkg_cols = n_cols + # set last column if non-backgrounds are present + if n_other: + entries_per_col[-1] = n_other + n_bkg_cols -= 1 + # fill background columns + for i in range(n_bkg_cols): + entries_per_col[i] = n_backgrounds // n_bkg_cols + (n_backgrounds % n_bkg_cols > i) + return entries_per_col + + +def kappa_str_to_num(value: str) -> int | float: + """ + Converts a string-encoded kappa value to an actual number. An integer is returned if possible, + and a float otherwise. Examples: + .. code-block:: python + kappa_str_to_num("1") # 1 + kappa_str_to_num("2.45") # 2.45 + kappa_str_to_num("m1p7") # -1.7 + """ + value = value.replace("p", ".").replace("m", "-") + return int(value) if try_int(value) else float(value) + + +def group_kappas(**kappas: dict[str, str]) -> dict[int | float, list[str]]: + """ + Groups kappa values by their coupling strength. Examples: + .. code-block:: python + group_kappas(kl="1", kt="1") # {1: ["kl", "kt"]} + group_kappas(kl="2p45", kt="1") # {2.45: ["kl"], 1: ["kt"]} + group_kappas(k2v="0", kv="1", kl="1") # {0: ["k2v"], 1: ["kv", "kl"]} + """ + str_groups = defaultdict(list) + for k, v in kappas.items(): + str_groups[v].append(k) + # convert keys to numbers + return {kappa_str_to_num(k): v for k, v in str_groups.items()} + + +def create_kappa_label(*, sep: str = ",", **kappas: dict[str, str]) -> str: + parts = [] + for v, _kappas in group_kappas(**kappas).items(): + k_str = "=".join(rf"\kappa_{{{k}}}"for k in _kappas) + parts.append(f"{k_str}={v}") + return "$" + sep.join(parts) + "$" diff --git a/multilepton/config/triggers.py b/multilepton/config/triggers.py new file mode 100644 index 00000000..8b6b8e82 --- /dev/null +++ b/multilepton/config/triggers.py @@ -0,0 +1,493 @@ +# coding: utf-8 + +from __future__ import annotations + +import functools + +import order as od + +from columnflow.util import DotDict + +from multilepton.config.util import Trigger, TriggerLeg, TriggerBits as Bits + + +trigger_bits = DotDict.wrap({ + # for v12: + # - checked with https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py + # - and in particular https://github.com/cms-sw/cmssw/blob/2defd844e96613d2438b690d10b79c773e02ab57/PhysicsTools/NanoAOD/python/triggerObjects_cff.py # noqa: E501 + # for v14: + # - from https://github.com/cms-sw/cmssw/tree/f50cf84669608dbe67fd8430660abe651d5b46fd/PhysicsTools/NanoAOD/python/triggerObjects_cff.py # noqa: E501 + # - last update in https://github.com/cms-sw/cmssw/blob/CMSSW_14_0_X/PhysicsTools/NanoAOD/python/triggerObjects_cff.py # noqa: E501 + "e": { + "CaloIdLTrackIdLIsoVL": Bits(v12=1, v14="v12"), + "WPTightTrackIso": Bits(v12=2, v14="v12"), + "WPLooseTrackIso": Bits(v12=4, v14="v12"), + "OverlapFilterPFTau": Bits(v12=8, v14="v12"), + "DiElectron": Bits(v12=16), + "DiElectronLeg1": Bits(v14=16), + "DiElectronLeg2": Bits(v14=32), + "MuEle": Bits(v12=32, v14=64), + "EleTau": Bits(v12=64, v14=128), + "TripleElectron": Bits(v12=128, v14=256), + "SingleMuonDiEle": Bits(v12=256, v14=512), + "DiMuonSingleEle": Bits(v12=512, v14=1024), + "SingleEle_L1DoubleAndSingleEle": Bits(v12=1024, v14=2048), + "SingleEle_CaloIdVT_GsfTrkIdT": Bits(v12=2048, v14=4096), + "SingleEle_PFJet": Bits(v12=4096, v14=8192), + "Photon175_Photon200": Bits(v12=8192, v14=16384), + "DoubleEle_CaloIdL_MW_seeded": Bits(v14=32768), + "DoubleEle_CaloIdL_MW_unseeded": Bits(v14=65536), + "EleTauPNet": Bits(v14=131072), + }, + "mu": { + "TrkIsoVVL": Bits(v12=1, v14="v12"), + "Iso": Bits(v12=2, v14="v12"), + "OverlapFilterPFTau": Bits(v12=4, v14="v12"), + "SingleMuon": Bits(v12=8, v14="v12"), + "DiMuon": Bits(v12=16, v14="v12"), + "MuEle": Bits(v12=32, v14="v12"), + "MuTau": Bits(v12=64, v14="v12"), + "TripleMuon": Bits(v12=128, v14="v12"), + "DiMuonSingleEle": Bits(v12=256, v14="v12"), + "SingleMuonDiEle": Bits(v12=512, v14="v12"), + "Mu50": Bits(v12=1024, v14="v12"), + "Mu100": Bits(v12=2048, v14="v12"), + "SingleMuonSinglePhoton": Bits(v12=4096, v14="v12"), + "MuTauPNet": Bits(v14=8192), + }, + "tau": { # general comment: lot of v14 paths contain PNet paths, not available in v12, e.g. OverlapFilterIsoEle + "LooseChargedIso": Bits(v12=1), + "Loose": Bits(v14=1), + "MediumChargedIso": Bits(v12=2), + "Medium": Bits(v14=2), + "TightChargedIso": Bits(v12=4), + "Tight": Bits(v14=4), + "DeepTau": Bits(v12=8, v14="v12"), + "PNet": Bits(v14=16), + "TightOOSCPhotons": Bits(v12=16), + "HPS": Bits(v12=32, v14=268435456), + "ChargedIso": Bits(v14=32), + "ChargedIsoDiTau": Bits(v12=64), + "Dxy": Bits(v14=64), + "DeepTauDiTau": Bits(v12=128, v14=2048 + 8), # manually created bit combinations for v14 + "ETauFilter": Bits(v14=128), + "MuTauFilter": Bits(v14=256), + "OverlapFilterIsoEle": Bits(v12=256, v14=4096), # contains HPS in v14, not in v12 + "OverlapFilterIsoMu": Bits(v12=512, v14=8192), # contains HPS in v14, not in v12 + "SingleTau": Bits(v14=512), + "SingleTauOrTauMet": Bits(v12=1024), # more general paths than SingleTau in v14 + "VBFDiTau": Bits(v14=1024), + "VBFpDoublePFTau_run2": Bits(v12=2048), + "VBFpDoublePFTau_run3": Bits(v12=4096), # warning: this trigger bit expects "ChargedIso" in the filter name, this does not correspond to our actual VBF filter name # noqa + "DiTau": Bits(v14=2048), + "DiPFJetAndDiTau": Bits(v12=8192), + "DiTauAndPFJet": Bits(v12=16384, v14="v12"), + "DisplacedTau": Bits(v12=32768), + "ETauDisplaced": Bits(v14=32768), + "MuTauDisplaced": Bits(v14=65536), + "DiTauDisplaced": Bits(v14=131072), + "Monitoring": Bits(v12=65536, v14=262144), + "MonitoringForVBFIsoTau": Bits(v14=524288), + "MonitoringDiTauAndPFJet": Bits(v14=1048576), + "MonitoringMuTauDisplaced": Bits(v14=2097152), + "MonitoringDiTau": Bits(v14=8388608), + "VBFDoubleTauMonitoring": Bits(v14=33554432), + "OverlapFilter": Bits(v14=16777216), + "RegionalPaths": Bits(v12=131072), + "L1SeededPaths": Bits(v12=262144), + "MatchL1HLT": Bits(v12=262144, v14=134217728), # for v12: alias for v12-v14 compatibility + "1Prong": Bits(v12=524288), + "OneProng": Bits(v14=4194304), # just changed "1" to "One" for v14, still means different filters + "SinglePFTauFilter": Bits(v14=536870912), + "VBFSingleTau": Bits(v14=1073741824), + }, + "jet": { + "4PixelOnlyPFCentralJetTightIDPt20": Bits(v12=1, v14="v12"), + "3PixelOnlyPFCentralJetTightIDPt30": Bits(v12=2, v14="v12"), + "PFJetFilterTwoC30": Bits(v12=4, v14="v12"), + "4PFCentralJetTightIDPt30": Bits(v12=8, v14="v12"), + "4PFCentralJetTightIDPt35": Bits(v12=16, v14="v12"), + "QuadCentralJet30": Bits(v12=32, v14="v12"), + "2PixelOnlyPFCentralJetTightIDPt40": Bits(v12=64, v14="v12"), + "L1sTripleJetVBF_orHTT_orDoubleJet_orSingleJet": Bits(v12=128, v14="v12"), + "3PFCentralJetTightIDPt40": Bits(v12=256, v14="v12"), + "3PFCentralJetTightIDPt45": Bits(v12=512, v14="v12"), + "L1sQuadJetsHT": Bits(v12=1024, v14="v12"), + "BTagCaloDeepCSVp17Double": Bits(v12=2048, v14="v12"), + "PFCentralJetLooseIDQuad30": Bits(v12=4096, v14="v12"), + "1PFCentralJetLooseID75": Bits(v12=8192, v14="v12"), + "2PFCentralJetLooseID60": Bits(v12=16384, v14="v12"), + "3PFCentralJetLooseID45": Bits(v12=32768, v14="v12"), + "4PFCentralJetLooseID40": Bits(v12=65536, v14="v12"), + "DoubleTau+Jet": Bits(v12=131072, v14="v12"), # v14 also contains PNet paths + "VBFcrossCleanedDeepTauPFTau": Bits(v12=262144, v14="v12"), # more general VBFDiTauJets in v14 TODO: change name? # noqa + "VBFcrossCleanedUsingDijetCorr": Bits(v12=524288, v14="v12"), # more general VBFSingleTauJets in v14 TODO: change name? # noqa + "MonitoringMuon+Tau+Jet": Bits(v12=1048576, v14="v12"), + "2PFCentralJetTightIDPt50": Bits(v12=2097152, v14="v12"), + "1PixelOnlyPFCentralJetTightIDPt60": Bits(v12=4194304, v14="v12"), + "1PFCentralJetTightIDPt70": Bits(v12=8388608, v14="v12"), + "BTagPFDeepJet1p5Single": Bits(v12=16777216, v14="v12"), + "BTagPFDeepJet4p5Triple": Bits(v12=33554432, v14="v12"), + "2BTagSumOR2BTagMeanPaths": Bits(v12=67108864, v14="v12"), + "2/1PixelOnlyPFCentralJetTightIDPt20/50": Bits(v12=134217728, v14="v12"), + "2PFCentralJetTightIDPt30": Bits(v12=268435456, v14="v12"), + "1PFCentralJetTightIDPt60": Bits(v12=536870912, v14="v12"), + "PF2CentralJetPt30PNet2BTagMean0p50": Bits(v12=1073741824, v14="v12"), + }, +}) + + +def get_triggerID(name): + """ + General requirement from the lepton selection: + For cross triggers, the lepton leg (lepton= {"e", "mu"}) must be defined before the tau leg. + An error here would be caught in the lepton selection, but it is better to avoid it. + Convention for Ids: + - 1xx: single muon triggers + - 2xx: single electron triggers + - 3xx: mu-tau triggers + - 4xx: e-tau triggers + - 5xx: tau-tau triggers + - 6xx: vbf triggers + - 7xx: tau tau jet triggers + - 8xx: quadjet triggers + Starting from xx = 01 and with a unique name for each path across all years. + """ + ids = { + # single muon triggers + "HLT_IsoMu22": 101, + "HLT_IsoMu22_eta2p1": 102, + "HLT_IsoTkMu22": 103, + "HLT_IsoTkMu22_eta2p1": 104, + "HLT_IsoMu24": 105, + "HLT_IsoMu27": 106, + # single electron triggers + "HLT_Ele25_eta2p1_WPTight_Gsf": 201, + "HLT_Ele32_WPTight_Gsf": 202, + "HLT_Ele32_WPTight_Gsf_L1DoubleEG": 203, + "HLT_Ele35_WPTight_Gsf": 204, + "HLT_Ele30_WPTight_Gsf": 205, + # mu–tau triggers + "HLT_IsoMu19_eta2p1_LooseIsoPFTau20": 301, + "HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1": 302, + "HLT_IsoMu20_eta2p1_LooseChargedIsoPFTau27_eta2p1_CrossL1": 303, + "HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1": 304, + # e–tau triggers + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20_SingleL1": 401, + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20": 402, + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30": 403, + "HLT_Ele24_eta2p1_WPTight_Gsf_LooseChargedIsoPFTau30_eta2p1_CrossL1": 404, + "HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1": 405, + # tau-tau triggers + "HLT_DoubleMediumIsoPFTau35_Trk1_eta2p1_Reg": 501, + "HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg": 502, + "HLT_DoubleMediumChargedIsoPFTau35_Trk1_eta2p1_Reg": 503, + "HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg": 504, + "HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg": 505, + "HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg": 506, + "HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1": 507, + "HLT_DoubleMediumChargedIsoPFTauHPS40_Trk1_eta2p1": 508, + "HLT_DoubleMediumChargedIsoDisplacedPFTauHPS32_Trk1_eta2p1": 509, + # VBF di-tau triggers + "HLT_VBF_DoubleLooseChargedIsoPFTau20_Trk1_eta2p1_Reg": 601, + "HLT_VBF_DoubleMediumDeepTauPFTauHPS20_eta2p1": 602, + "HLT_VBF_DoubleLooseChargedIsoPFTauHPS20_Trk1_eta2p1": 603, + "HLT_DoublePFJets40_Mass500_MediumDeepTauPFTauHPS45_L2NN_MediumDeepTauPFTauHPS20_eta2p1": 604, + # tau+jet triggers + "HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60": 701, + "HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet75": 702, + } + if name not in ids: + raise KeyError(f"Trigger name '{name}' not found in trigger ID list.") + return ids[name] + + +def get_bit_sum(nano_version: int, obj_name: str, names: list[str | None]) -> int | None: + total = 0 + for name in names: + if not name: + continue + try: + val = trigger_bits[obj_name][name].get(nano_version) or 0 + total += val + except KeyError: + logger.warning(f"missing trigger bit for {obj_name}.{name} at nano_version={nano_version}") + return total or None + + +def add_triggers(config: od.Config) -> None: + """ + Adds all triggers to a *config*. For the conversion from filter names to trigger bits, see + https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. + - Tau Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/Tau + - Electron Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/EgHLTRunIIISummary + - Muon Trigger: https://twiki.cern.ch/twiki/bin/view/CMS/MuonHLT + """ + # get trigger bits for the requested nano version + nano_version = config.campaign.x.version + year = config.campaign.x.year + get_bit_sum_v = functools.partial(get_bit_sum, nano_version) + config.x.triggers = od.UniqueObjectIndex(Trigger) + multileptons_triggers = {} + + if year in [2017, 2018, 2022, 2023, 2024]: + multileptons_triggers.update({ + + # single muon + "HLT_IsoMu24": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=get_bit_sum_v("mu", ["SingleMuon",]))), + #'filters': ["hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p08", "hltL3crIsoL1sSingleMu22L1f0L2f10QL3f24QL3trkIsoFiltered0p07"], # (1mu + Iso) + 'on_datasets': ["mutau", "emu_from_e", "emu_from_mu", "mumu"], + 'tags': ["single_trigger", "single_mu"], + }, + }) + + if year in [2022, 2023, 2024]: + multileptons_triggers.update({ + + # single electron + "HLT_Ele30_WPTight_Gsf": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=get_bit_sum_v("e", ["WPTightTrackIso",]))), + #'filters': [], + 'on_datasets': ["etau", "ee", "emu_from_e", "emu_from_mu"], + 'tags': ["single_trigger", "single_e"], + }, + + # e tauh + "HLT_Ele24_eta2p1_WPTight_Gsf_LooseDeepTauPFTauHPS30_eta2p1_CrossL1": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=get_bit_sum_v("e", ["OverlapFilterPFTau","EleTau",])), + tau=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DeepTau","HPS","OverlapFilterIsoEle","ETauFilter" if nano_version == 14 else None,])), + ), + #'filters': ["hltHpsOverlapFilterIsoEle24WPTightGsfLooseETauWPDeepTauPFTau30"], # (OverlapFilter) + 'on_datasets': ["etau"], + 'tags': ["cross_trigger", "cross_e_tau"], + }, + + # mu tauh + "HLT_IsoMu20_eta2p1_LooseDeepTauPFTauHPS27_eta2p1_CrossL1": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=get_bit_sum_v("mu", ["OverlapFilterPFTau","MuTau",])), + tau=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DeepTau","HPS","OverlapFilterIsoMu","MatchL1HLT","MuTauFilter" if nano_version == 14 else None,])), + ), + #'filters': ["hltHpsSelectedPFTau27LooseMuTauWPDeepTauVsJetsAgainstMuonL1HLTMatched"], # (DeepTau + HPS), + 'on_datasets': ["mutau"], + 'tags': ["cross_trigger", "cross_mu_tau"], + }, + + # tauh tauh + "HLT_DoubleMediumDeepTauPFTauHPS35_L2NN_eta2p1": { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DeepTauDiTau","HPS", "Medium" if nano_version == 14 else None,])), + tau2=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DeepTauDiTau","HPS", "Medium" if nano_version == 14 else None,])), + ), + #'filters': ["hltHpsDoublePFTau35MediumDitauWPDeepTauL1HLTMatched"], # (Deeptau + HPS), + 'on_datasets': ["tautau"], + 'tags': ["cross_trigger", "cross_tau_tau"], + }, + + # vbf + "HLT_VBF_DoubleMediumDeepTauPFTauHPS20_eta2p1": { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["VBFDiTau","HPS", "DeepTau" if nano_version == 14 else None,])), + tau2=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["VBFDiTau","HPS", "DeepTau" if nano_version == 14 else None,])), + vbf1=TriggerLeg(pdg_id=1, trigger_bits=get_bit_sum_v("jet", ["VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None,])), + vbf2=TriggerLeg(pdg_id=1, trigger_bits=get_bit_sum_v("jet", ["VBFcrossCleanedDeepTauPFTau" if nano_version == 14 else None,])), + ), + #'filters': ["hltHpsDoublePFTau20TrackDeepTauDitauWPForVBFIsoTau", "hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleMediumDeepTauDitauWPPFTauHPS20?"], + 'on_datasets': ["tautau"], + 'tags': ["cross_trigger", "cross_tau_tau_vbf"], + }, + + # tau tau jet + "HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet60": { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DiTauAndPFJet",])), + tau2=TriggerLeg(pdg_id=15, trigger_bits=get_bit_sum_v("tau", ["DiTauAndPFJet",])), + jet=TriggerLeg(pdg_id=1, trigger_bits=get_bit_sum_v("jet", ["DoubleTau+Jet",])), + ), + #'filters': ["hltHpsDoublePFTau30MediumDitauWPDeepTauL1HLTMatchedDoubleTauJet", "hltHpsOverlapFilterDeepTauDoublePFTau30PFJet60"], + 'on_datasets': ["tautau"], + 'tags': ["cross_trigger", "cross_tau_tau_jet"], + }, + }) + + if year == 2016: + multileptons_triggers.update({ + # e tauh + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20_SingleL1":{ + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=None), + tau=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + # does not exist for run F on but should only be used until run 276215 -> which era? + 'on_datasets': (lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era <= "E"), + 'tags': ["cross_trigger", "cross_e_tau"], + }, + + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau20": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=None), + tau=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + # does not exist for run F on but should only be used between run 276215 and 278270 -> which eras? + 'on_datasets': (lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era <= "E"), + 'tags': ["cross_trigger", "cross_e_tau"], + }, + + "HLT_Ele24_eta2p1_WPLoose_Gsf_LooseIsoPFTau30": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=None), + tau=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + # does not exist until run E but should only be used after run 278270 -> which era? + 'on_datasets': (lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era >= "E"), + 'tags': ["cross_trigger", "cross_e_tau"], + }, + + # mu tauh + "HLT_IsoMu19_eta2p1_LooseIsoPFTau20": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=None), + tau=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + 'tags': ["cross_trigger", "cross_mu_tau"], + }, + + "HLT_IsoMu19_eta2p1_LooseIsoPFTau20_SingleL1": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=None), + tau=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + 'tags': ["cross_trigger", "cross_mu_tau"], + }, + + # tauh tauh + "HLT_DoubleMediumIsoPFTau35_Trk1_eta2p1_Reg": { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=None), + tau2=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + 'on_datasets': (lambda dataset_inst: dataset_inst.is_mc or ("B" <= dataset_inst.x.era <= "F")), + 'tags': ["cross_trigger", "cross_tau_tau"], + }, + + "HLT_DoubleMediumCombinedIsoPFTau35_Trk1_eta2p1_Reg": { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=None), + tau2=TriggerLeg(pdg_id=15, trigger_bits=None), + ), + 'on_datasets': (lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "H"), + 'tags': ["cross_trigger", "cross_tau_tau"], + }, + }) + + if config.campaign.has_tag("preVFP"): + multileptons_triggers.update({ + # single electron + "HLT_Ele25_eta2p1_WPTight_Gsf": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=None),), + 'tags': ["single_trigger", "single_e"], + }, + }) + # single muon + for trig in ["HLT_IsoMu22", "HLT_IsoMu22_eta2p1", "HLT_IsoTkMu22", "HLT_IsoTkMu22_eta2p1"]: + multileptons_triggers.update({ + trig: { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=None),), + 'tags': ["single_trigger", "single_mu"], + }, + }) + + if year in [2017, 2018]: + multileptons_triggers.update({ + # single muon + "HLT_IsoMu27": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=get_bit_sum_v("mu", ["SingleMuon",]))), + #'filters': ["hltL3crIsoL1sMu22Or25L1f0L2f10QL3f27QL3trkIsoFiltered0p07"], + 'on_datasets': ["mutau", "emu_from_e", "emu_from_mu", "mumu"], + 'tags': ["single_trigger", "single_mu"], + }, + + # single electron + "HLT_Ele32_WPTight_Gsf": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=2),), + #'filters': ["hltEle32WPTightGsfTrackIsoFilter"], + 'on_datasets': (lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), + 'tags': ["single_trigger", "single_e"], + }, + + "HLT_Ele32_WPTight_Gsf_L1DoubleEG": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=2+1024),), + #'filters': ["hltEle32L1DoubleEGWPTightGsfTrackIsoFilter", "hltEGL1SingleEGOrFilter" ], + 'tags': ["single_trigger", "single_e"], + }, + + "HLT_Ele35_WPTight_Gsf": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=2),), + #'filters': ["hltEle35noerWPTightGsfTrackIsoFilter"], + 'tags': ["single_trigger", "single_e"], + }, + + # e tauh + "HLT_Ele24_eta2p1_WPTight_Gsf_LooseChargedIsoPFTau30_eta2p1_CrossL1": { + 'legs': dict(e=TriggerLeg(pdg_id=11, trigger_bits=2 + 64,), + tau=TriggerLeg(pdg_id=15, trigger_bits=1024 + 256,), + ), + #'filters': ["hltEle24erWPTightGsfTrackIsoFilterForTau", "hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30", + # "hltSelectedPFTau30LooseChargedIsolationL1HLTMatched", "hltOverlapFilterIsoEle24WPTightGsfLooseIsoPFTau30"], + 'tags': ["cross_trigger", "cross_e_tau"], + }, + + # mu tauh + "HLT_IsoMu20_eta2p1_LooseChargedIsoPFTau27_eta2p1_CrossL1": { + 'legs': dict(mu=TriggerLeg(pdg_id=13, trigger_bits=2 + 64,), + tau=TriggerLeg(pdg_id=15, trigger_bits=1024 + 512,), + ), + #'filters': ["hltL3crIsoL1sMu18erTau24erIorMu20erTau24erL1f0L2f10QL3f20QL3trkIsoFiltered0p07", + # "hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded", + # "hltSelectedPFTau27LooseChargedIsolationAgainstMuonL1HLTMatched", + # "hltOverlapFilterIsoMu20LooseChargedIsoPFTau27L1Seeded"], + 'tags': ["cross_trigger", "cross_mu_tau"], + }, + + # vbf + "HLT_VBF_DoubleLooseChargedIsoPFTau20_Trk1_eta2p1_Reg": { + 'id': 601, + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=2048,), + tau2=TriggerLeg(pdg_id=15, trigger_bits=2048,), + vbf1=TriggerLeg(pdg_id=1, trigger_bits=1,), + vbf2=TriggerLeg(pdg_id=1, trigger_bits=1,), + ), + #'filters': ["hltDoublePFTau20TrackPt1LooseChargedIsolation", "hltMatchedVBFOnePFJet2CrossCleanedFromDoubleLooseChargedIsoPFTau20"], + 'on_datasets': (lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "D"), + 'tags': ["cross_trigger", "cross_tau_tau_vbf"], + }, + }) + # tauh tauh + for trig in [ + "HLT_DoubleMediumChargedIsoPFTau35_Trk1_eta2p1_Reg", + "HLT_DoubleTightChargedIsoPFTau35_Trk1_TightID_eta2p1_Reg", + "HLT_DoubleMediumChargedIsoPFTau40_Trk1_TightID_eta2p1_Reg", + "HLT_DoubleTightChargedIsoPFTau40_Trk1_eta2p1_Reg" + ]: + multileptons_triggers.update({ + trig: { + 'legs': dict(tau1=TriggerLeg(pdg_id=15, trigger_bits=64,), + tau2=TriggerLeg(pdg_id=15, trigger_bits=64,), + ), + 'on_datasets': (lambda dataset_inst: dataset_inst.is_data), + 'tags': ["cross_trigger", "cross_tau_tau"], + }, + }) + + for name, triginfo in multileptons_triggers.items(): + on_datasets = triginfo.get('on_datasets', None) + kwargs = dict( + name=name, + id=get_triggerID(name), + legs=triginfo['legs'], + tags=triginfo['tags'], + ) + + # Handle applies_to_dataset only if on_datasets is provided + if on_datasets is not None: + if callable(on_datasets): + kwargs['applies_to_dataset'] = on_datasets + else: + kwargs['applies_to_dataset'] = ( + lambda dataset_inst, tags=on_datasets: + dataset_inst.is_mc or any(dataset_inst.has_tag(tag) for tag in tags) + ) + + config.x.triggers.add(**kwargs) + + diff --git a/hbt/config/util.py b/multilepton/config/util.py similarity index 98% rename from hbt/config/util.py rename to multilepton/config/util.py index 26be7ef9..14fbd4aa 100644 --- a/hbt/config/util.py +++ b/multilepton/config/util.py @@ -1,14 +1,14 @@ # coding: utf-8 """ -Config-related object definitions and utils. +Config-related HH-multileptons object definitions and utils. """ from __future__ import annotations import re -from dataclasses import dataclass +from dataclasses import dataclass from order import UniqueObject, TagMixin from order.util import typed @@ -18,14 +18,12 @@ class TriggerLeg(object): """ Container class storing information about trigger legs: - - *pdg_id*: The id of the object that should have caused the trigger leg to fire. - *min_pt*: The minimum transverse momentum in GeV of the triggered object. - *trigger_bits*: Integer bit mask or masks describing whether the last filter of a trigger fired. See https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/triggerObjects_cff.py. Per mask, any of the bits should match (*OR*). When multiple masks are configured, each of them should match (*AND*). - For accepted types and conversions, see the *typed* setters implemented in this class. """ @@ -41,7 +39,6 @@ def __init__( self._pdg_id = None self._min_pt = None self._trigger_bits = None - # set initial values self.pdg_id = pdg_id self.min_pt = min_pt @@ -58,22 +55,18 @@ def __repr__(self): def pdg_id(self, pdg_id: int | None) -> int | None: if pdg_id is None: return None - if not isinstance(pdg_id, int): raise TypeError(f"invalid pdg_id: {pdg_id}") - return pdg_id @typed def min_pt(self, min_pt: int | float | None) -> float | None: if min_pt is None: return None - if isinstance(min_pt, int): min_pt = float(min_pt) if not isinstance(min_pt, float): raise TypeError(f"invalid min_pt: {min_pt}") - return min_pt @typed @@ -83,25 +76,21 @@ def trigger_bits( ) -> list[int] | None: if trigger_bits is None: return None - # cast to list if isinstance(trigger_bits, tuple): trigger_bits = list(trigger_bits) elif not isinstance(trigger_bits, list): trigger_bits = [trigger_bits] - # check bit types for bit in trigger_bits: if not isinstance(bit, int): raise TypeError(f"invalid trigger bit: {bit}") - return trigger_bits class Trigger(UniqueObject, TagMixin): """ Container class storing information about triggers: - - *name*: The path name of a trigger that should have fired. - *id*: A unique id of the trigger. - *run_range*: An inclusive range describing the runs where the trigger is to be applied @@ -110,11 +99,8 @@ class Trigger(UniqueObject, TagMixin): additional information and constraints of particular trigger legs. - *applies_to_dataset*: A function that obtains an ``order.Dataset`` instance to decide whether the trigger applies to that dataset. Defaults to *True*. - For accepted types and conversions, see the *typed* setters implemented in this class. - In addition, a base class from *order* provides additional functionality via mixins: - - *tags*: Trigger objects can be assigned *tags* that can be checked later on, e.g. to describe the type of the trigger ("single_mu", "cross", ...). """ @@ -139,7 +125,6 @@ def __init__( self._run_range = None self._leg = None self._applies_to_dataset = None - # set initial values self.run_range = run_range self.legs = legs @@ -157,7 +142,6 @@ def name(self, name: str) -> str: raise TypeError(f"invalid name: {name}") if not name.startswith("HLT_"): raise ValueError(f"invalid name: {name}") - return name @typed @@ -167,11 +151,9 @@ def run_range( ) -> tuple[int] | None: if run_range is None: return None - # cast list to tuple if isinstance(run_range, list): run_range = tuple(run_range) - # run_range must be a tuple with two integers if not isinstance(run_range, tuple): raise TypeError(f"invalid run_range: {run_range}") @@ -181,7 +163,6 @@ def run_range( raise ValueError(f"invalid run_range start: {run_range[0]}") if not isinstance(run_range[1], int): raise ValueError(f"invalid run_range end: {run_range[1]}") - return run_range @typed @@ -191,18 +172,15 @@ def legs( ) -> dict[Hashable, TriggerLeg] | None: if legs is None: return None - # cast to dict if isinstance(legs, TriggerLeg): legs = [legs] if isinstance(legs, (list, tuple)): legs = dict(enumerate(legs)) - # validate for key, leg in legs.items(): if not isinstance(leg, TriggerLeg): raise TypeError(f"invalid trigger leg with key {key}: {leg}") - return legs or None @typed @@ -212,7 +190,6 @@ def applies_to_dataset(self, func: Callable | bool | Any) -> Callable: raise TypeError(f"invalid applies_to_dataset: {func}") decision = True if func is None else bool(func) func = lambda dataset_inst: decision - return func @property @@ -237,8 +214,8 @@ class TriggerBits: v12: int | None = None v14: int | None = None - - supported_versions: ClassVar[set[int]] = {12, 14} + v15: int | None = None + supported_versions: ClassVar[set[int]] = {12, 14, 15} def __post_init__(self) -> None: # versions might be strings such as "v12" that act as references diff --git a/hbt/config/variables.py b/multilepton/config/variables.py similarity index 62% rename from hbt/config/variables.py rename to multilepton/config/variables.py index ca4d1484..e670804c 100644 --- a/hbt/config/variables.py +++ b/multilepton/config/variables.py @@ -3,20 +3,80 @@ """ Definition of variables. """ -from functools import partial import order as od +from functools import partial + from columnflow.columnar_util import EMPTY_FLOAT, attach_coffea_behavior, default_coffea_collections from columnflow.util import maybe_import ak = maybe_import("awkward") +def build_ht(events): + objects = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1, events.Jet * 1], axis=1)[:, :] + objects_sum = objects.sum(axis=1) + return objects_sum.pt + + +# build variables for dilepton, dijet, and hh +def delta_r12(vectors): + # delta r between first two elements + dr = ak.firsts(vectors[:, :1], axis=1).delta_r(ak.firsts(vectors[:, 1:2], axis=1)) + return ak.fill_none(dr, EMPTY_FLOAT) + + +def build_dilep(events, which=None): + leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] + if which == "dr": + return delta_r12(leps) + dilep = leps.sum(axis=1) + if which is None: + return dilep * 1 + if which == "mass": + return dilep.mass + if which == "pt": + return dilep.pt + if which == "eta": + return dilep.eta + if which == "abs_eta": + return abs(dilep.eta) + if which == "phi": + return dilep.phi + if which == "energy": + return dilep.energy + raise ValueError(f"Unknown which: {which}") + + +def build_m4l(events): + objects = ak.concatenate([events.Electron * 1, events.Muon * 1], axis=1)[:, :] + objects_sum = objects.sum(axis=1) + return objects_sum.mass + + +def build_nbjets(events, which=None, wp="medium"): + if which == "btagPNetB": + wp_value = config.x.btag_working_points["particleNet"][wp] + elif which == "btagDeepFlavB": + wp_value = config.x.btag_working_points["deepjet"][wp] + else: + raise ValueError(f"Unknown which: {which}") + bjet_mask = events.Jet[which] >= wp_value + objects = events.Jet[bjet_mask] + objects_num = ak.num(objects, axis=1) + return objects_num + + def add_variables(config: od.Config) -> None: """ Adds all variables to a *config*. """ + build_ht.inputs = ["{Electron,Muon,Tau,Jet}.{pt,eta,phi,mass}"] + build_dilep.inputs = ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"] + build_m4l.inputs = ["{Electron,Muon}.{pt,eta,phi,mass}"] + build_nbjets.inputs = ["Jet.{btagPNetB,btagDeepFlavB}"] + add_variable( config, name="event", @@ -41,19 +101,6 @@ def add_variables(config: od.Config) -> None: x_title="Luminosity block", discrete_x=True, ) - add_variable( - config, - name="n_hhbtag", - expression="n_hhbtag", - binning=(4, -0.5, 3.5), - x_title="Number of HH b-tags", - discrete_x=True, - ) - def build_ht(events): - objects = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1, events.Jet * 1], axis=1)[:, :] - objects_sum = objects.sum(axis=1) - return objects_sum.pt - build_ht.inputs = ["{Electron,Muon,Tau,Jet}.{pt,eta,phi,mass}"] add_variable( config, name="ht", @@ -122,7 +169,6 @@ def build_ht(events): binning=(66, -3.3, 3.3), x_title=r"MET $\phi$", ) - # weights add_variable( config, @@ -138,35 +184,34 @@ def build_ht(events): binning=(40, 0, 2), x_title="Pileup weight", ) - add_variable( - config, - name="normalized_pu_weight", - expression="normalized_pu_weight", - binning=(40, 0, 2), - x_title="Normalized pileup weight", - ) - add_variable( - config, - name="btag_weight", - expression="btag_weight", - binning=(60, 0, 3), - x_title="b-tag weight", - ) - add_variable( - config, - name="normalized_btag_weight", - expression="normalized_btag_weight", - binning=(60, 0, 3), - x_title="Normalized b-tag weight", - ) - add_variable( - config, - name="normalized_njet_btag_weight", - expression="normalized_njet_btag_weight", - binning=(60, 0, 3), - x_title="$N_{jet}$ normalized b-tag weight", - ) - + # add_variable( + # config, + # name="normalized_pu_weight", + # expression="normalized_pu_weight", + # binning=(40, 0, 2), + # x_title="Normalized pileup weight", + # ) + # add_variable( + # config, + # name="btag_weight", + # expression="btag_weight", + # binning=(60, 0, 3), + # x_title="b-tag weight", + # ) + # add_variable( + # config, + # name="normalized_btag_weight", + # expression="normalized_btag_weight", + # binning=(60, 0, 3), + # x_title="Normalized b-tag weight", + # ) + # add_variable( + # config, + # name="normalized_njet_btag_weight", + # expression="normalized_njet_btag_weight", + # binning=(60, 0, 3), + # x_title="$N_{jet}$ normalized b-tag weight", + # ) # cutflow variables add_variable( config, @@ -214,172 +259,42 @@ def build_ht(events): unit="GeV", x_title=r"Subleading jet $p_{T}$", ) - - # build variables for dilepton, dijet, and hh - def delta_r12(vectors): - # delta r between first two elements - dr = ak.firsts(vectors[:, :1], axis=1).delta_r(ak.firsts(vectors[:, 1:2], axis=1)) - return ak.fill_none(dr, EMPTY_FLOAT) - - def build_dilep(events, which=None): - leps = ak.concatenate([events.Electron * 1, events.Muon * 1, events.Tau * 1], axis=1)[:, :2] - if which == "dr": - return delta_r12(leps) - dilep = leps.sum(axis=1) - if which is None: - return dilep * 1 - if which == "mass": - return dilep.mass - if which == "pt": - return dilep.pt - if which == "eta": - return dilep.eta - if which == "abs_eta": - return abs(dilep.eta) - if which == "phi": - return dilep.phi - if which == "energy": - return dilep.energy - raise ValueError(f"Unknown which: {which}") - - build_dilep.inputs = ["{Electron,Muon,Tau}.{pt,eta,phi,mass}"] - - def build_dibjet(events, which=None): - events = attach_coffea_behavior(events, {"HHBJet": default_coffea_collections["Jet"]}) - hhbjets = events.HHBJet[:, :2] - if which == "dr": - return delta_r12(hhbjets) - dijet = hhbjets.sum(axis=1) - if which is None: - return dijet * 1 - if which == "mass": - return dijet.mass - if which == "pt": - return dijet.pt - if which == "eta": - return dijet.eta - if which == "abs_eta": - return abs(dijet.eta) - if which == "phi": - return dijet.phi - if which == "energy": - return dijet.energy - raise ValueError(f"Unknown which: {which}") - - build_dibjet.inputs = ["HHBJet.{pt,eta,phi,mass}"] - - def build_hh(events, which=None): - dijet = build_dibjet(events) - dilep = build_dilep(events) - hs = ak.concatenate([dijet[..., None], dilep[..., None]], axis=1) - if which == "dr": - return delta_r12(hs) - hh = hs.sum(axis=1) - if which is None: - return hh * 1 - if which == "mass": - return hh.mass - if which == "pt": - return hh.pt - if which == "eta": - return hh.eta - if which == "abs_eta": - return abs(hh.eta) - if which == "phi": - return hh.phi - if which == "energy": - return hh.energy - raise ValueError(f"Unknown which: {which}") - - build_hh.inputs = build_dibjet.inputs + build_dilep.inputs - - # dibjet variables - add_variable( - config, - name="dibjet_energy", - expression=partial(build_dibjet, which="energy"), - aux={"inputs": build_dibjet.inputs}, - binning=(40, 40, 300), - unit="GeV", - x_title=r"$E_{bb}$", - ) - add_variable( - config, - name="dibjet_mass", - expression=partial(build_dibjet, which="mass"), - aux={"inputs": build_dibjet.inputs}, - binning=(30, 0, 300), - unit="GeV", - x_title=r"$m_{bb}$", - ) - add_variable( - config, - name="dibjet_pt", - expression=partial(build_dibjet, which="pt"), - aux={"inputs": build_dibjet.inputs}, - binning=(40, 0, 200), - unit="GeV", - x_title=r"$p_{T,bb}$", - ) add_variable( config, - name="dibjet_eta", - expression=partial(build_dibjet, which="eta"), - aux={"inputs": build_dibjet.inputs}, - binning=(50, -5, 5), - x_title=r"$\eta_{bb}$", - ) - add_variable( - config, - name="dibjet_phi", - expression=partial(build_dibjet, which="phi"), - aux={"inputs": build_dibjet.inputs}, - binning=(66, -3.3, 3.3), - x_title=r"$\phi_{bb}$", + name="nbjets_deepjet_medium", + expression=partial(build_nbjets, which="btagDeepFlavB"), + aux={"inputs": build_nbjets.inputs}, + binning=(11, -0.5, 10.5), + x_title=r"Number of b-jets (DeepJet medium)", + discrete_x=True, ) add_variable( config, - name="dibjet_dr", - expression=partial(build_dibjet, which="dr"), - aux={"inputs": build_dibjet.inputs}, - binning=(30, 0, 6), - x_title=r"$\Delta R_{bb}$", + name="nbjets_pnet_medium", + expression=partial(build_nbjets, which="btagPNetB"), + aux={"inputs": build_nbjets.inputs}, + binning=(11, -0.5, 10.5), + x_title=r"Number of b-jets (PNet medium)", + discrete_x=True, ) - - def build_nbjets(events, which=None): - wp = "medium" - if which == "btagPNetB": - wp_value = config.x.btag_working_points["particleNet"][wp] - elif which == "btagDeepFlavB": - wp_value = config.x.btag_working_points["deepjet"][wp] - else: - raise ValueError(f"Unknown which: {which}") - bjet_mask = events.Jet[which] >= wp_value - objects = events.Jet[bjet_mask] - objects_num = ak.num(objects, axis=1) - return objects_num - - build_nbjets.inputs = ["Jet.{btagPNetB,btagDeepFlavB}"] - add_variable( config, - name="nbjets_deepjet", - expression=partial(build_nbjets, which="btagDeepFlavB"), + name="nbjets_deepjet_loose", + expression=partial(build_nbjets, which="btagDeepFlavB", wp="loose"), aux={"inputs": build_nbjets.inputs}, binning=(11, -0.5, 10.5), - x_title=r"Number of b-jets (DeepJet medium)", + x_title=r"Number of b-jets (DeepJet loose)", discrete_x=True, ) add_variable( config, - name="nbjets_pnet", - expression=partial(build_nbjets, which="btagPNetB"), + name="nbjets_pnet_loose", + expression=partial(build_nbjets, which="btagPNetB", wp="loose"), aux={"inputs": build_nbjets.inputs}, binning=(11, -0.5, 10.5), - x_title=r"Number of b-jets (PNet medium)", + x_title=r"Number of b-jets (PNet loose)", discrete_x=True, ) - # dilepton variables add_variable( config, @@ -434,62 +349,6 @@ def build_nbjets(events, which=None): binning=(30, 0, 6), x_title=r"$\Delta R_{ll}$", ) - - # hh variables - add_variable( - config, - name="hh_energy", - expression=partial(build_hh, which="energy"), - aux={"inputs": build_hh.inputs}, - binning=(35, 100, 800), - unit="GeV", - x_title=r"$E_{ll+bb}$", - ) - add_variable( - config, - name="hh_mass", - expression=partial(build_hh, which="mass"), - aux={"inputs": build_hh.inputs}, - binning=(50, 0, 1000), - unit="GeV", - x_title=r"$m_{ll+bb}$", - ) - add_variable( - config, - name="hh_pt", - expression=partial(build_hh, which="pt"), - aux={"inputs": build_hh.inputs}, - binning=(40, 0, 400), - unit="GeV", - x_title=r"$p_{T,ll+bb}$", - ) - add_variable( - config, - name="hh_eta", - expression=partial(build_hh, which="eta"), - aux={"inputs": build_hh.inputs}, - binning=(50, -5, 5), - unit="GeV", - x_title=r"$\eta_{ll+bb}$", - ) - add_variable( - config, - name="hh_phi", - expression=partial(build_hh, which="phi"), - aux={"inputs": build_hh.inputs}, - binning=(66, -3.3, 3.3), - unit="GeV", - x_title=r"$\phi_{ll+bb}$", - ) - add_variable( - config, - name="hh_dr", - expression=partial(build_hh, which="dr"), - aux={"inputs": build_hh.inputs}, - binning=(30, 0, 6), - x_title=r"$\Delta R_{ll,bb}$", - ) - # single lepton variables # single electron add_variable( @@ -534,7 +393,6 @@ def build_nbjets(events, which=None): binning=(66, -3.3, 3.3), x_title=r"Subleading electron $\phi$", ) - # single tau add_variable( config, @@ -578,7 +436,6 @@ def build_nbjets(events, which=None): binning=(66, -3.3, 3.3), x_title=r"Subleading tau $\phi$", ) - # single mu add_variable( config, @@ -622,7 +479,6 @@ def build_nbjets(events, which=None): binning=(66, -3.3, 3.3), x_title=r"Subleading muon $\phi$", ) - add_variable( config, name="njets", @@ -631,6 +487,47 @@ def build_nbjets(events, which=None): binning=(11, -0.5, 10.5), x_title=r"Number of jets", ) + add_variable( + config, + name="nmu", + expression=lambda events: ak.num(events.Muon["pt"], axis=1), + aux={"inputs": {"Muon.pt"}}, + binning=(11, -0.5, 10.5), + x_title=r"Number of muons", + ) + add_variable( + config, + name="nlep", + expression=lambda events: ak.num((ak.concatenate([events.Electron["pt"] * 1, events.Muon["pt"] * 1], axis=1)[:, :]), axis=1), # noqa: E501 + aux={"inputs": {"{Electron,Muon}.pt"}}, + binning=(11, -0.5, 10.5), + x_title=r"Number of leptons", + ) + add_variable( + config, + name="m4l", + expression=partial(build_m4l), + aux={"inputs": build_m4l.inputs}, + binning=[0, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 380, 400, 450], + unit="GeV", + x_title="$m_{4\ell}$", # noqa: W605 + ) + add_variable( + config, + name="nele", + expression=lambda events: ak.num(events.Electron["pt"], axis=1), + aux={"inputs": {"Electron.pt"}}, + binning=(11, -0.5, 10.5), + x_title=r"Number of electrons", + ) + add_variable( + config, + name="ntau", + expression=lambda events: ak.num(events.Tau["pt"], axis=1), + aux={"inputs": {"Tau.pt"}}, + binning=(11, -0.5, 10.5), + x_title=r"Number of electrons", + ) for proc in ["hh", "tt", "dy"]: # outputs of the resonant pDNN at SM-like mass and spin values @@ -641,7 +538,6 @@ def build_nbjets(events, which=None): binning=(25, 0.0, 1.0), x_title=rf"{proc.upper()} output node, res. pDNN$_{{m_{{HH}}=500\,GeV,s=0}}$", ) - # outputs of the resonant DNN trained over flat masses add_variable( config, @@ -650,7 +546,6 @@ def build_nbjets(events, which=None): binning=(25, 0.0, 1.0), x_title=rf"{proc.upper()} output node, res. DNN", ) - add_variable( config, name=f"res_dnn_{proc}_fine", @@ -663,14 +558,9 @@ def build_nbjets(events, which=None): # helper to add a variable to the config with some defaults def add_variable(config: od.Config, *args, **kwargs) -> od.Variable: kwargs.setdefault("null_value", EMPTY_FLOAT) - - # create the variable variable = config.add_variable(*args, **kwargs) - - # defaults if not variable.has_aux("underflow"): variable.x.underflow = True if not variable.has_aux("overflow"): variable.x.overflow = True - return variable diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/CrossEleTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/CrossEleTauHlt.json.gz new file mode 100644 index 00000000..5bea24aa Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/CrossEleTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/CrossMuTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/CrossMuTauHlt.json.gz new file mode 100644 index 00000000..a29652a1 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/CrossMuTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/METTrigger_SFs_run3_2022_postEE.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/METTrigger_SFs_run3_2022_postEE.json.gz new file mode 100644 index 00000000..3872ead1 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/METTrigger_SFs_run3_2022_postEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/PNetTauTauTrigger_SFs_2022.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/PNetTauTauTrigger_SFs_2022.json.gz new file mode 100644 index 00000000..a945f329 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/PNetTauTauTrigger_SFs_2022.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz new file mode 100644 index 00000000..d3d2fbb8 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/ditaujet_jetleg60_2022postEE.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/ditaujet_jetleg60_2022postEE.json.gz new file mode 100644 index 00000000..e050f0cf Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/ditaujet_jetleg60_2022postEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/electronHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/electronHlt.json.gz new file mode 100644 index 00000000..7797f206 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/electronHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/tau_trigger_DeepTau2018v2p5_2022postEE.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/tau_trigger_DeepTau2018v2p5_2022postEE.json.gz new file mode 100644 index 00000000..b9a32cc8 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/tau_trigger_DeepTau2018v2p5_2022postEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022postEE/temporary_MuHlt_abseta_pt.json.gz b/multilepton/data/TriggerScaleFactors/2022postEE/temporary_MuHlt_abseta_pt.json.gz new file mode 100644 index 00000000..012993a9 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022postEE/temporary_MuHlt_abseta_pt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/CrossEleTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/CrossEleTauHlt.json.gz new file mode 100644 index 00000000..b379b3ab Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/CrossEleTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/CrossMuTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/CrossMuTauHlt.json.gz new file mode 100644 index 00000000..992a49ba Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/CrossMuTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/METTrigger_SFs_run3_2022_preEE.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/METTrigger_SFs_run3_2022_preEE.json.gz new file mode 100644 index 00000000..22810857 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/METTrigger_SFs_run3_2022_preEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/PNetTauTauTrigger_SFs_2022.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/PNetTauTauTrigger_SFs_2022.json.gz new file mode 100644 index 00000000..d6e90501 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/PNetTauTauTrigger_SFs_2022.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz new file mode 100644 index 00000000..64a84ad8 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/Trigger_SF_2022_Ak8_Pnet_HLT_pT_mSD.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/ditaujet_jetleg60_2022preEE.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/ditaujet_jetleg60_2022preEE.json.gz new file mode 100644 index 00000000..8dcb4d70 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/ditaujet_jetleg60_2022preEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/electronHlt.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/electronHlt.json.gz new file mode 100644 index 00000000..92784b06 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/electronHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/tau_trigger_DeepTau2018v2p5_2022preEE.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/tau_trigger_DeepTau2018v2p5_2022preEE.json.gz new file mode 100644 index 00000000..e372f74c Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/tau_trigger_DeepTau2018v2p5_2022preEE.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2022preEE/temporary_MuHlt_abseta_pt.json.gz b/multilepton/data/TriggerScaleFactors/2022preEE/temporary_MuHlt_abseta_pt.json.gz new file mode 100644 index 00000000..1e2d05bd Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2022preEE/temporary_MuHlt_abseta_pt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/CrossEleTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/CrossEleTauHlt.json.gz new file mode 100644 index 00000000..93c2363a Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/CrossEleTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/CrossMuTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/CrossMuTauHlt.json.gz new file mode 100644 index 00000000..f0929804 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/CrossMuTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/METTrigger_SFs_run3_2023_postBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/METTrigger_SFs_run3_2023_postBPix.json.gz new file mode 100644 index 00000000..331a8979 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/METTrigger_SFs_run3_2023_postBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/PNetTauTauTrigger_SFs_2023.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/PNetTauTauTrigger_SFs_2023.json.gz new file mode 100644 index 00000000..1553ceb4 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/PNetTauTauTrigger_SFs_2023.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz new file mode 100644 index 00000000..68063d0e Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/ditaujet_jetleg60_2023postBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/ditaujet_jetleg60_2023postBPix.json.gz new file mode 100644 index 00000000..aaffd7de Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/ditaujet_jetleg60_2023postBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/electronHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/electronHlt.json.gz new file mode 100644 index 00000000..f6c4492d Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/electronHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/tau_trigger_DeepTau2018v2p5_2023postBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/tau_trigger_DeepTau2018v2p5_2023postBPix.json.gz new file mode 100644 index 00000000..bc31412c Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/tau_trigger_DeepTau2018v2p5_2023postBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023postBPix/temporary_MuHlt_abseta_pt.json.gz b/multilepton/data/TriggerScaleFactors/2023postBPix/temporary_MuHlt_abseta_pt.json.gz new file mode 100644 index 00000000..347f61e7 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023postBPix/temporary_MuHlt_abseta_pt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/CrossEleTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/CrossEleTauHlt.json.gz new file mode 100644 index 00000000..5917dbc4 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/CrossEleTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/CrossMuTauHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/CrossMuTauHlt.json.gz new file mode 100644 index 00000000..c546d08a Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/CrossMuTauHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/METTrigger_SFs_run3_2023_preBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/METTrigger_SFs_run3_2023_preBPix.json.gz new file mode 100644 index 00000000..7caae3ee Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/METTrigger_SFs_run3_2023_preBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/PNetTauTauTrigger_SFs_2023.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/PNetTauTauTrigger_SFs_2023.json.gz new file mode 100644 index 00000000..33f06c76 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/PNetTauTauTrigger_SFs_2023.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz new file mode 100644 index 00000000..c29e28fc Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/Trigger_SF_2023_Ak8_Pnet_HLT_pT_mSD.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/ditaujet_jetleg60_2023preBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/ditaujet_jetleg60_2023preBPix.json.gz new file mode 100644 index 00000000..d6053525 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/ditaujet_jetleg60_2023preBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/electronHlt.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/electronHlt.json.gz new file mode 100644 index 00000000..e5f1a045 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/electronHlt.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/tau_trigger_DeepTau2018v2p5_2023preBPix.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/tau_trigger_DeepTau2018v2p5_2023preBPix.json.gz new file mode 100644 index 00000000..5f36b1c7 Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/tau_trigger_DeepTau2018v2p5_2023preBPix.json.gz differ diff --git a/multilepton/data/TriggerScaleFactors/2023preBPix/temporary_MuHlt_abseta_pt.json.gz b/multilepton/data/TriggerScaleFactors/2023preBPix/temporary_MuHlt_abseta_pt.json.gz new file mode 100644 index 00000000..79376d2d Binary files /dev/null and b/multilepton/data/TriggerScaleFactors/2023preBPix/temporary_MuHlt_abseta_pt.json.gz differ diff --git a/multilepton/data/met_xyCorrections_2024.json.gz b/multilepton/data/met_xyCorrections_2024.json.gz new file mode 100644 index 00000000..ce8e21b1 Binary files /dev/null and b/multilepton/data/met_xyCorrections_2024.json.gz differ diff --git a/hbt/hist_hooks/__init__.py b/multilepton/hist_hooks/__init__.py similarity index 100% rename from hbt/hist_hooks/__init__.py rename to multilepton/hist_hooks/__init__.py diff --git a/hbt/hist_hooks/binning.py b/multilepton/hist_hooks/binning.py similarity index 65% rename from hbt/hist_hooks/binning.py rename to multilepton/hist_hooks/binning.py index ca7b3c0c..733cbd2c 100644 --- a/hbt/hist_hooks/binning.py +++ b/multilepton/hist_hooks/binning.py @@ -8,6 +8,7 @@ import functools from dataclasses import dataclass +from collections import defaultdict import law import order as od @@ -15,7 +16,6 @@ from columnflow.util import maybe_import from columnflow.types import Any, Callable -np = maybe_import("numpy") hist = maybe_import("hist") @@ -47,51 +47,44 @@ class BinningConstraint: # helper to extract the name of the requested category and variable -def get_task_infos(task) -> dict[str, Any]: +def get_task_infos(task: law.Task, config) -> dict[str, Any]: # datacard task - if "config_category" in task.branch_data: + if (config_data := task.branch_data.get("config_data")): return { - "category_name": task.branch_data.config_category, - "variable_name": task.branch_data.config_variable, + "category_name": config_data[config.name].category, + "variable_name": config_data[config.name].variable, } # plotting task if "category" in task.branch_data: + # TODO: this might fail for multi-config tasks return { "category_name": task.branch_data.category, - "variable_name": task.branch_data.variable[0], + "variable_name": task.branch_data.variable, } - raise Exception(f"cannot determine task infos of unhandled task: {task!r}") + raise Exception(f"cannot determine task infos of unhandled task {task!r}") -def add_hooks(config: od.Config) -> None: +def add_hooks(analysis_inst: od.Analysis) -> None: """ - Add histogram hooks to a configuration. + Add histogram hooks to a analysis. """ def flat_s( - task, - hists: dict[od.Process, hist.Histogram], + task: law.Task, + hists: dict[od.Config, dict[od.Process, hist.Hist]], signal_process_name: str = "", n_bins: int = 10, constraint: BinningConstraint | None = None, - ) -> dict[od.Process, hist.Histogram]: - """Rebinnig of the histograms in *hists* to archieve a flat-signal distribution. - - :param task: task instance that contains the process informations - :param hists: A dictionary of histograms using Process instances as keys - - :raises RuntimeError: If the wanted number of bins is reached and the initial - bin edge is not minimal. - :raises Exception: If the number of actual bins ended up larger than requested. - :return: A dictionary of histograms using Process instances as keys + ) -> dict[od.Config, dict[od.Process, hist.Hist]]: """ - + Rebinnig of the histograms in *hists* to archieve a flat-signal distribution. + """ + import numpy as np # edge finding helper def find_edges( signal_hist: hist.Hist, - background_hists: dict[od.Process, hist.Hist], - variable: str, + background_hists: list[tuple[od.Process, hist.Hist]], n_bins: int = 10, ) -> tuple[np.ndarray, np.ndarray]: """ @@ -99,13 +92,6 @@ def find_edges( The edges are determined by the signal distribution, while the background distribution is used to ensure that the background yield in each bin is sufficient. - :param signal_hist: The histogram that describes the signal distribution. - :param background_hists: A dictionary of histograms using the process as key - that describe the background distribution. - :param variable: The variable name that is rebinned. - :param n_bins: The number of bins that the signal distribution should be rebinned to. - - :return: A tuple containing the new bin edges and the indices that define the new bin edges. """ # prepare parameters low_edge, max_edge = 0, 1 @@ -121,7 +107,7 @@ def find_edges( # prepare signal # fine binned histograms bin centers are approx equivalent to dnn output # flip arrays to start from the right - dnn_score_signal = np.flip(signal_hist.axes[variable].centers, axis=-1) + dnn_score_signal = np.flip(signal_hist.axes[-1].centers, axis=-1) y = np.flip(signal_hist.counts(), axis=-1) # set negative yields to zero and warn about it @@ -146,7 +132,7 @@ def find_edges( # start with empty data for counts and variances constraint_data[tag] = [np.zeros_like(y), np.zeros_like(y)] # loop over histograms and check if they fit the process - for proc, h in background_hists.items(): + for proc, h in background_hists: if proc.has_tag(tag): constraint_data[tag][0] += np.flip(h.counts(), axis=-1) constraint_data[tag][1] += np.flip(h.variances(), axis=-1) @@ -164,8 +150,7 @@ def find_edges( if y_remaining < y_min: stop_reason = "remaining signal yield insufficient" break - # find the index "stop_idx" of the source bin that marks the start of the next - # merged bin + # find the index "stop_idx" of the source bin that marks the start of the next merged bin if y_remaining >= y_per_bin: threshold = y_already_binned + y_per_bin # get indices of array of values above threshold @@ -225,8 +210,7 @@ def find_edges( if bin_edges[-1] != low_edge: if len(bin_edges) > n_bins: raise RuntimeError( - "number of bins reached and initial bin edge" - f" is not minimal bin edge (edges: {bin_edges})", + f"number of bins reached and initial bin edge is not minimal bin edge (edges: {bin_edges})", ) bin_edges.append(low_edge) indices_gathering.append(num_bins_orig) @@ -247,18 +231,14 @@ def find_edges( return np.flip(np.array(bin_edges), axis=-1), indices_gathering # rebinning helper - def apply_edges(h: hist.Hist, edges: np.ndarray, indices: np.ndarray, variable: str) -> hist.Hist: + def apply_edges( + h: hist.Hist, + edges: np.ndarray, + indices: np.ndarray, + ) -> hist.Hist: """ - Rebin the content axes determined by *variable* of a given hist histogram *h* to - given *edges* and their *indices*. - The rebinned histogram is returned. - - :param h: hist Histogram that is to be rebinned - :param edges: a array of ascending bin edges - :param indices: a array of indices that define the new bin edges - :param variable: variable name that is rebinned - - :return: rebinned hist histogram + Rebin the content axes of a given hist histogram *h* to given *edges* and their *indices*. The rebinned + histogram is returned. """ # sort edges and indices, by default they are sorted ascending_order = np.argsort(edges) @@ -266,11 +246,8 @@ def apply_edges(h: hist.Hist, edges: np.ndarray, indices: np.ndarray, variable: # create new hist and add axes with coresponding edges # define new axes, from old histogram and rebinned variable with new axis - axes = ( - [h.axes[axis] for axis in h.axes.name if axis not in variable] + - [hist.axis.Variable(edges, name=variable, label=f"{variable}-flat-s")] - ) - + variable = h.axes[-1].name + axes = list(h.axes[:-1]) + [hist.axis.Variable(edges, name=variable, label=f"{variable}_flat_s")] new_hist = hist.Hist(*axes, storage=hist.storage.Weight()) # slice the old histogram storage view with new edges @@ -283,90 +260,93 @@ def apply_edges(h: hist.Hist, edges: np.ndarray, indices: np.ndarray, variable: return new_hist - # extract task infos - task_infos = get_task_infos(task) - - # find signal histogram for which you will optimize, only 1 signal process is allowed - signal_proc = None - signal_hist = None - background_hists = {} - for process, j in hists.items(): - if process.has_tag("signal") and (signal_process_name in (process.name, "")): - if signal_proc: - logger.warning("more than one signal process found, use the first one") - else: - signal_proc = process - signal_hist = j - elif process.is_mc: - background_hists[process] = j - - if not signal_proc: - logger.warning("could not find any signal process, return hist unchanged") - return hists - - # 1. preparation - # get the leaf categories (e.g. {etau,mutau}__os__iso) - category_inst = task.config_inst.get_category(task_infos["category_name"]) - leaf_cats = ( - [category_inst] - if category_inst.is_leaf_category - else category_inst.get_leaf_categories() - ) - - # filter categories not existing in histogram - cat_ids_locations = [hist.loc(c.id) for c in leaf_cats if c.id in signal_hist.axes["category"]] - - # sum over different leaf categories - combined_signal_hist = signal_hist[{"category": cat_ids_locations}][{"category": sum}] - combined_signal_hist = combined_signal_hist[{"shift": hist.loc(0)}] - - # same for background - for process, j in background_hists.items(): - combined_background_hist = j[{"category": cat_ids_locations}][{"category": sum}] - combined_background_hist = combined_background_hist[{"shift": hist.loc(0)}] - background_hists[process] = combined_background_hist + # find signal and background histograms + signal_hist: dict[od.Config, hist.Hist] = {} + background_hists: dict[od.Config, dict[od.Process, hist.Hist]] = defaultdict(dict) + for config_inst, proc_hists in hists.items(): + for process_inst, h in proc_hists.items(): + if process_inst.has_tag("signal") and (signal_process_name in (process_inst.name, "")): + if config_inst in signal_hist: + logger.warning("more than one signal histogram found, use the first one") + else: + signal_hist[config_inst] = h + elif process_inst.is_mc: + background_hists[config_inst][process_inst] = h + if config_inst not in signal_hist: + logger.warning(f"could not find any signal process for config {config_inst}, skip flat_s hook") + return hists - # 2. determine bin edges + # extract task infos + task_infos = {config_inst: get_task_infos(task, config_inst) for config_inst in hists} + + # 1. select and sum over requested categories + for config_inst in hists: + # get the leaf categories + category_inst = config_inst.get_category(task_infos[config_inst]["category_name"]) + leaf_cats = ( + [category_inst] + if category_inst.is_leaf_category + else category_inst.get_leaf_categories() + ) + # filter categories not existing in histogram + cat_ids_locations = [ + hist.loc(c.name) for c in leaf_cats + if c.name in signal_hist[config_inst].axes["category"] + ] + + # sum over different leaf categories and select the nominal shift + select = lambda h: h[{"category": cat_ids_locations}][{"category": sum, "shift": hist.loc("nominal")}] + signal_hist[config_inst] = select(signal_hist[config_inst]) + for process_inst, h in background_hists[config_inst].items(): + background_hists[config_inst][process_inst] = select(h) + + # 2. determine bin edges, considering signal and background sums over all configs + # note: for signal, this assumes that variable axes have the same name, but they probably always will + signal_sum = sum((signal_hists := list(signal_hist.values()))[1:], signal_hists[0].copy()) + background_sum = sum((list(proc_hists.items()) for proc_hists in background_hists.values()), []) flat_s_edges, flat_s_indices = find_edges( - signal_hist=combined_signal_hist, - background_hists=background_hists, - variable=task_infos["variable_name"], + signal_hist=signal_sum, + background_hists=background_sum, n_bins=n_bins, ) # 3. apply to hists - for process, histogram in hists.items(): - hists[process] = apply_edges( - histogram, - flat_s_edges, - flat_s_indices, - task_infos["variable_name"], - ) - + for config_inst, proc_hists in hists.items(): + for process_inst, h in proc_hists.items(): + proc_hists[process_inst] = apply_edges( + h=h, + edges=flat_s_edges, + indices=flat_s_indices, + ) return hists # some usual binning constraints - def constrain_tt_dy(counts: dict[str, BinCount]) -> bool: - # have at least one tt, one dy, and four total background events - # as well as positive yields + def constrain_tt_dy(counts: dict[str, BinCount], n_tt: int = 1, n_dy: int = 1, n_sum: int = 4) -> bool: + # have at least one tt, one dy, and four total background events as well as positive yields return ( - counts["tt"].num >= 1 and - counts["dy"].num >= 1 and - counts["tt"].num + counts["dy"].num >= 4 and + counts["tt"].num >= n_tt and + counts["dy"].num >= n_dy and + (counts["tt"].num + counts["dy"].num) >= n_sum and counts["tt"].val > 0 and counts["dy"].val > 0 ) # add hooks - config.x.hist_hooks.flats = flat_s - config.x.hist_hooks.flats_kl1_n10 = functools.partial( + analysis_inst.x.hist_hooks.flats = flat_s + analysis_inst.x.hist_hooks.flats_kl1_n10 = functools.partial( flat_s, signal_process_name="hh_ggf_hbb_htt_kl1_kt1", n_bins=10, ) - config.x.hist_hooks.flats_kl1_n10_guarded = functools.partial( + analysis_inst.x.hist_hooks.flats_kl1_n10_guarded = functools.partial( flat_s, signal_process_name="hh_ggf_hbb_htt_kl1_kt1", n_bins=10, constraint=BinningConstraint(["tt", "dy"], constrain_tt_dy), ) + analysis_inst.x.hist_hooks.flats_kl1_n10_guarded5 = functools.partial( + flat_s, + signal_process_name="hh_ggf_hbb_htt_kl1_kt1", + n_bins=10, + constraint=BinningConstraint(["tt", "dy"], functools.partial(constrain_tt_dy, n_tt=5, n_dy=5, n_sum=10)), + ) diff --git a/multilepton/hist_hooks/blinding.py b/multilepton/hist_hooks/blinding.py new file mode 100644 index 00000000..cccda226 --- /dev/null +++ b/multilepton/hist_hooks/blinding.py @@ -0,0 +1,34 @@ +# coding: utf-8 + +""" +Histogram hooks for blinding data points. +""" + +from __future__ import annotations + +import law +import order as od + +from columnflow.util import maybe_import + +hist = maybe_import("hist") + + +def add_hooks(analysis_inst: od.Analysis) -> None: + """ + Add histogram hooks to an analysis. + """ + def remove_data_hists( + task: law.Task, + hists: dict[od.Config, dict[od.Process, hist.Hist]], + ) -> dict[od.Config, dict[od.Process, hist.Hist]]: + """ + Remove data histograms from the input histograms. + """ + return { + config_inst: {proc: hist for proc, hist in proc_hists.items() if not proc.is_data} + for config_inst, proc_hists in hists.items() + } + + # add hooks + analysis_inst.x.hist_hooks.blind = remove_data_hists diff --git a/hbt/hist_hooks/qcd.py b/multilepton/hist_hooks/qcd.py similarity index 79% rename from hbt/hist_hooks/qcd.py rename to multilepton/hist_hooks/qcd.py index d4903c83..e48970f7 100644 --- a/hbt/hist_hooks/qcd.py +++ b/multilepton/hist_hooks/qcd.py @@ -5,7 +5,6 @@ """ from __future__ import annotations - from collections import defaultdict import law @@ -13,11 +12,10 @@ import scinum as sn from columnflow.util import maybe_import, DotDict +from columnflow.types import Any np = maybe_import("numpy") hist = maybe_import("hist") - - logger = law.logger.get_logger(__name__) @@ -41,23 +39,34 @@ def integrate_num(num: sn.Number, axis=None) -> sn.Number: ) -def add_hooks(config: od.Config) -> None: +# helper to ensure that a specific category exists on the "category" axis of a histogram +def ensure_category(h: hist.Histogram, category_name: str) -> hist.Histogram: + cat_axis = h.axes["category"] + if category_name in cat_axis: + return h + dummy_fill = {ax.name: ax[0] for ax in h.axes if ax.name != "category"} + h.fill(**dummy_fill, category=category_name, weight=0.0) + return h + + +def add_hooks(analysis_inst: od.Analysis) -> None: """ - Add histogram hooks to a configuration. + Add histogram hooks to a analysis. """ - def qcd_estimation(task, hists): - if not hists: - return hists - + def qcd_estimation_per_config( + task: law.Task, + config_inst: od.Config, + hists: dict[od.Process, Any], + ) -> dict[od.Process, Any]: # get the qcd process - qcd_proc = config.get_process("qcd", default=None) + qcd_proc = config_inst.get_process("qcd", default=None) if not qcd_proc: return hists - # extract all unique category ids and verify that the axis order is exactly + # extract all unique category names and verify that the axis order is exactly # "category -> shift -> variable" which is needed to insert values at the end CAT_AXIS, SHIFT_AXIS, VAR_AXIS = range(3) - category_ids = set() + category_names = set() for proc, h in hists.items(): # validate axes assert len(h.axes) == 3 @@ -65,13 +74,12 @@ def qcd_estimation(task, hists): assert h.axes[SHIFT_AXIS].name == "shift" # get the category axis cat_ax = h.axes["category"] - for cat_index in range(cat_ax.size): - category_ids.add(cat_ax.value(cat_index)) + category_names.update(list(cat_ax)) # create qcd groups qcd_groups: dict[str, dict[str, od.Category]] = defaultdict(DotDict) - for cat_id in category_ids: - cat_inst = config.get_category(cat_id) + for cat_name in category_names: + cat_inst = config_inst.get_category(cat_name) if cat_inst.has_tag({"os", "iso"}, mode=all): qcd_groups[cat_inst.x.qcd_group].os_iso = cat_inst elif cat_inst.has_tag({"os", "noniso"}, mode=all): @@ -101,10 +109,12 @@ def qcd_estimation(task, hists): for group_name in complete_groups: group = qcd_groups[group_name] - # get the corresponding histograms and convert them to number objects, - # each one storing an array of values with uncertainties + # get the corresponding histograms and convert them to number objects, each one storing an array of values + # with uncertainties # shapes: (SHIFT, VAR) - get_hist = lambda h, region_name: h[{"category": hist.loc(group[region_name].id)}] + def get_hist(h: hist.Histogram, region_name: str) -> hist.Histogram: + h = ensure_category(h, group[region_name].name) + return h[{"category": hist.loc(group[region_name].name)}] os_noniso_mc = hist_to_num(get_hist(mc_hist, "os_noniso"), "os_noniso_mc") ss_noniso_mc = hist_to_num(get_hist(mc_hist, "ss_noniso"), "ss_noniso_mc") ss_iso_mc = hist_to_num(get_hist(mc_hist, "ss_iso"), "ss_iso_mc") @@ -142,14 +152,14 @@ def broadcast_data_num(num: sn.Number) -> None: int_ss_noniso_neg = int_ss_noniso <= 0 if int_ss_iso_neg.any(): shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_iso_neg)[0])) - shifts = list(map(config.get_shift, shift_ids)) + shifts = list(map(config_inst.get_shift, shift_ids)) logger.warning( f"negative QCD integral in ss_iso region for group {group_name} and shifts: " f"{', '.join(map(str, shifts))}", ) if int_ss_noniso_neg.any(): shift_ids = list(map(mc_hist.axes["shift"].value, np.where(int_ss_noniso_neg)[0])) - shifts = list(map(config.get_shift, shift_ids)) + shifts = list(map(config_inst.get_shift, shift_ids)) logger.warning( f"negative QCD integral in ss_noniso region for group {group_name} and shifts: " f"{', '.join(map(str, shifts))}", @@ -191,17 +201,25 @@ def broadcast_data_num(num: sn.Number) -> None: # insert values into the qcd histogram cat_axis = qcd_hist.axes["category"] for cat_index in range(cat_axis.size): - if cat_axis.value(cat_index) == group.os_iso.id: + if cat_axis.value(cat_index) == group.os_iso.name: qcd_hist.view().value[cat_index, ...] = os_iso_qcd_values qcd_hist.view().variance[cat_index, ...] = os_iso_qcd_variances break else: raise RuntimeError( - f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} " - f"for category {group.os_iso}", + f"could not find index of bin on 'category' axis of qcd histogram {qcd_hist} for category " + f"{group.os_iso}", ) - return hists + def qcd_estimation( + task: law.Task, + hists: dict[od.Config, dict[od.Process, Any]], + ) -> dict[od.Config, dict[od.Process, Any]]: + return { + config_inst: qcd_estimation_per_config(task, config_inst, hists[config_inst]) + for config_inst in hists.keys() + } + # add the hook - config.x.hist_hooks.qcd = qcd_estimation + analysis_inst.x.hist_hooks.qcd = qcd_estimation diff --git a/hbt/inference/__init__.py b/multilepton/histogramming/__init__.py similarity index 100% rename from hbt/inference/__init__.py rename to multilepton/histogramming/__init__.py diff --git a/multilepton/histogramming/default.py b/multilepton/histogramming/default.py new file mode 100644 index 00000000..26e402fe --- /dev/null +++ b/multilepton/histogramming/default.py @@ -0,0 +1,78 @@ +# coding: utf-8 + +""" +Default histogram producers (mostly for event weight generation). +""" + +from columnflow.histogramming import HistProducer +from columnflow.histogramming.default import cf_default +from columnflow.columnar_util import Route +from columnflow.util import maybe_import, pattern_matcher + +ak = maybe_import("awkward") +np = maybe_import("numpy") + + +@cf_default.hist_producer( + # both produced columns and dependent shifts are defined in init below + # options to keep or drop specific weights + keep_weights=None, + drop_weights={"normalization_weight_inclusive"}, +) +def default(self: HistProducer, events: ak.Array, **kwargs) -> ak.Array: + weight = ak.Array(np.ones(len(events), dtype=np.float32)) + + # build the full event weight + if self.dataset_inst.is_mc and len(events): + for column in self.weight_columns: + weight = weight * Route(column).apply(events) + + return events, weight + + +@default.init +def default_init(self: HistProducer) -> None: + # use the config's auxiliary event_weights, drop some of them based on drop_weights, and on this + # weight producer instance, store weight_columns, used columns, and shifts + self.weight_columns = set() + + if self.dataset_inst.is_data: + return + + # helpers to match to kept or dropped weights + do_keep = pattern_matcher(self.keep_weights) if self.keep_weights else (lambda _, /: True) + do_drop = pattern_matcher(self.drop_weights) if self.drop_weights else (lambda _, /: False) + + # collect all possible weight columns and affected shifts + all_weights = self.config_inst.x.event_weights.copy() + all_weights.update(self.dataset_inst.x("event_weights", {})) + for weight_name, shift_insts in all_weights.items(): + if not do_keep(weight_name) or do_drop(weight_name): + continue + + # manually skip pdf and scale weights for samples that do not have lhe info + is_lhe_weight = any(shift_inst.has_tag("lhe_weight") for shift_inst in shift_insts) + if is_lhe_weight and self.dataset_inst.has_tag("no_lhe_weights"): + continue + + self.weight_columns.add(weight_name) + self.uses.add(weight_name) + self.shifts |= {shift_inst.name for shift_inst in shift_insts} + + +normalization_inclusive = default.derive("normalization_inclusive", cls_dict={ + "drop_weights": {"normalization_weight"}, +}) + +normalization_only = default.derive("normalization_only", cls_dict={ + "keep_weights": {"normalization_weight"}, +}) + +normalization_inclusive_only = default.derive("normalization_inclusive_only", cls_dict={ + "keep_weights": {"normalization_weight_inclusive"}, + "drop_weights": None, +}) + +no_trigger_weight = default.derive("no_trigger_weight", cls_dict={ + "drop_weights": {"normalization_weight_inclusive", "trigger_weight"}, +}) diff --git a/hbt/ml/__init__.py b/multilepton/inference/__init__.py similarity index 100% rename from hbt/ml/__init__.py rename to multilepton/inference/__init__.py diff --git a/multilepton/inference/base.py b/multilepton/inference/base.py new file mode 100644 index 00000000..9843ec82 --- /dev/null +++ b/multilepton/inference/base.py @@ -0,0 +1,85 @@ +# coding: utf-8 + +""" +Inference base models with common functionality. +""" + +import re +import abc + +import order as od + +from columnflow.inference import InferenceModel + + +class MULTILEPTONInferenceModelBase(InferenceModel): + """ + Base class for statistical models with support for a single or and multiple configs. In the latter case, + each set of processes is created per config and will thus have different names, + resulting in a "stacking" of histograms. + """ + + def __init__(self, *args, **kwargs) -> None: + # members that are set in init_objects + self.single_config: bool + self.campaign_keys: dict[od.Config, str] = {} + self.campaign_key: str + self.proc_map: dict[str, dict[od.Config, str]] = {} + + super().__init__(*args, **kwargs) + + @abc.abstractmethod + def init_proc_map(self) -> None: + # should setup self.proc_map + ... + + @abc.abstractmethod + def init_categories(self) -> None: + # should setup inference model cateogries + ... + + @abc.abstractmethod + def init_processes(self) -> None: + # should setup inference model processes + ... + + @abc.abstractmethod + def init_parameters(self) -> None: + # should setup inference model parameters + ... + + def init_func(self) -> None: + # the default initialization is split into logical parts + self.init_objects() + self.init_categories() + self.init_processes() + self.init_parameters() + self.init_cleanup() + + def inject_era(self, config_inst: od.Config, combine_name: str) -> str: + # helper to inject era info into combine process names + campaign_key = self.campaign_keys[config_inst] + # for HH, inject the key before the ecm value + if (m := re.match(r"^((ggHH|qqHH)_.+)_(13p(0|6)TeV_hbbhtt)$", combine_name)): + return f"{m.group(1)}_{campaign_key}_{m.group(3)}" + # for single H, inject the key before the higgs decay + if (m := re.match(r"^(.+)_(hbb|htt)$", combine_name)): + return f"{m.group(1)}_{campaign_key}_{m.group(2)}" + # for all other processes, just append the campaign key + return f"{combine_name}_{campaign_key}" + + def init_objects(self) -> None: + # gather campaign identifier keys per config + self.single_config = len(self.config_insts) == 1 + for config_inst in self.config_insts: + year2 = config_inst.campaign.x.year % 100 + self.campaign_keys[config_inst] = f"{year2}{config_inst.campaign.x.postfix}" + + # overall campaign key + self.campaign_key = "_".join(self.campaign_keys.values()) + + # setup the process_map + self.init_proc_map() + + def init_cleanup(self) -> None: + self.cleanup(keep_parameters="THU_HH") diff --git a/multilepton/inference/default.py b/multilepton/inference/default.py new file mode 100644 index 00000000..7c65a80e --- /dev/null +++ b/multilepton/inference/default.py @@ -0,0 +1,247 @@ +# coding: utf-8 + +""" +Default inference model. +""" +from __future__ import annotations + +import law + +from columnflow.inference import ParameterType, FlowStrategy +from columnflow.config_util import get_datasets_from_process + +from multilepton.inference.base import MULTILEPTONInferenceModelBase + + +logger = law.logger.get_logger(__name__) + + +class default(MULTILEPTONInferenceModelBase): + """ + Default statistical model for the HH -> bbtautau analysis. + """ + + add_qcd = False + fake_data = True + + def init_proc_map(self) -> None: + # mapping of process names in the datacard ("combine name") to configs and process names in a dict + name_map = dict([ + *[ + (f"ggHH_kl_{kl}_kt_1_13p6TeV_hbbhtt", f"hh_ggf_hbb_htt_kl{kl}_kt1") + for kl in ["0", "1", "2p45", "5"] + ], + # ("ttbar", "tt"), + ("ttbarV", "ttv"), + # ("ttbarVV", "ttvv"), + # ("singlet", "st"), + # ("DY", "dy"), + # # ("EWK", "z"), # currently not use + # ("W", "w"), + # ("VV", "vv"), + # ("VVV", "vvv"), + # ("WH_htt", "wh"), + # ("ZH_hbb", "zh"), + # ("ggH_htt", "h_ggf"), + # ("qqH_htt", "h_vbf"), + # ("ttH_hbb", "tth"), + ]) + if self.add_qcd: + name_map["QCD"] = "qcd" + + # insert into proc_map + # (same process name for all configs for now) + for combine_name, proc_name in name_map.items(): + # same process name for all configs for now + for config_inst in self.config_insts: + _combine_name = self.inject_era(config_inst, combine_name) + self.proc_map.setdefault(_combine_name, {})[config_inst] = proc_name + + def init_categories(self) -> None: + for ch in ["etau", "mutau", "tautau"]: + for cat in ["res1b", "res2b", "boosted"]: + # gather fake processes to model data when needed + fake_processes = [] + if self.fake_data: + fake_processes = list(set.union(*( + { + combine_name + for config_inst, proc_name in proc_map.items() + if ( + not config_inst.get_process(proc_name).has_tag("nonresonant_signal") and + proc_name != "qcd" + ) + } + for combine_name, proc_map in self.proc_map.items() + ))) + # add the category + self.add_category( + f"cat_{self.campaign_key}_{ch}_{cat}", + config_data={ + config_inst.name: self.category_config_spec( + category=f"{ch}__{cat}__os__iso", + # variable="res_dnn_hh_fine", + variable="jet1_pt", + data_datasets=["data_*"], + ) + for config_inst in self.config_insts + }, + data_from_processes=fake_processes, + mc_stats=10.0, + flow_strategy=FlowStrategy.move, + ) + + def init_processes(self) -> None: + for combine_name, proc_map in self.proc_map.items(): + for config_inst, proc_name in proc_map.items(): + proc_inst = config_inst.get_process(proc_name) + is_dynamic = proc_name == "qcd" + dataset_names = [] + if not is_dynamic: + dataset_names = [ + dataset.name + for dataset in get_datasets_from_process(config_inst, proc_name, strategy="all") + ] + if not dataset_names: + logger.debug( + f"skipping process {proc_name} in inference model {self.cls_name}, no matching datasets " + f"found in config {config_inst.name}", + ) + continue + self.add_process( + name=combine_name, + config_data={ + config_inst.name: self.process_config_spec( + process=proc_name, + mc_datasets=dataset_names, + ), + }, + is_signal=proc_inst.has_tag("nonresonant_signal"), + is_dynamic=is_dynamic, + ) + + def init_parameters(self) -> None: + # general groups + self.add_parameter_group("experiment") + self.add_parameter_group("theory") + + # groups that contain parameters that solely affect the signal cross section and/or br + self.add_parameter_group("signal_norm_xs") + self.add_parameter_group("signal_norm_xsbr") + + # parameter that is added by the HH physics model, representing kl-dependent QCDscale + mtop + # uncertainties on the ggHH cross section + self.add_parameter_to_group("THU_HH", "theory") + self.add_parameter_to_group("THU_HH", "signal_norm_xs") + self.add_parameter_to_group("THU_HH", "signal_norm_xsbr") + + # helper to select processes across multiple configs + def inject_all_eras(*names: str) -> list[str]: + gen = ( + {self.inject_era(config_inst, name) for config_inst in self.config_insts} + for name in names + ) + return list(set.union(*gen)) + + # theory uncertainties + self.add_parameter( + "BR_hbb", + type=ParameterType.rate_gauss, + process=["*_hbb", "*_hbbhtt"], + effect=(0.9874, 1.0124), + group=["theory", "signal_norm_xsbr"], + ) + self.add_parameter( + "BR_htt", + type=ParameterType.rate_gauss, + process=["*_htt", "*_hbbhtt"], + effect=(0.9837, 1.0165), + group=["theory", "signal_norm_xsbr"], + ) + self.add_parameter( + "pdf_gg", # contains alpha_s + type=ParameterType.rate_gauss, + process=inject_all_eras("TT"), + effect=1.042, + group=["theory"], + ) + self.add_parameter( + "pdf_Higgs_ggHH", # contains alpha_s + type=ParameterType.rate_gauss, + process="ggHH_*", + effect=1.023, + group=["theory", "signal_norm_xs", "signal_norm_xsbr"], + ) + # self.add_parameter( + # "pdf_Higgs_qqHH", # contains alpha_s + # type=ParameterType.rate_gauss, + # process="qqHH_*", + # effect=1.027, + # group=["theory", "signal_norm_xs", "signal_norm_xsbr"], + # ) + self.add_parameter( + "QCDscale_ttbar", + type=ParameterType.rate_gauss, + process=inject_all_eras("TT"), + effect=(0.965, 1.024), + group=["theory"], + ) + # self.add_parameter( + # "QCDscale_qqHH", + # type=ParameterType.rate_gauss, + # process="qqHH_*", + # effect=(0.9997, 1.0005), + # group=["theory", "signal_norm_xs", "signal_norm_xsbr"], + # ) + + # lumi + for config_inst in self.config_insts: + ckey = self.campaign_keys[config_inst] + lumi = config_inst.x.luminosity + for unc_name in lumi.uncertainties: + self.add_parameter( + unc_name, + type=ParameterType.rate_gauss, + effect=lumi.get(names=unc_name, direction=("down", "up"), factor=True), + process=[f"*{ckey}*", "!QCD*"], + process_match_mode=all, + group="experiment", + ) + # pileup + # for config_inst in self.config_insts: + # ckey = self.campaign_keys[config_inst] + # self.add_parameter( + # f"CMS_pileup_20{ckey}", + # type=ParameterType.shape, + # config_data={ + # config_inst.name: self.parameter_config_spec(shift_source="minbias_xs"), + # }, + # process=[f"*{ckey}*", "!QCD*"], + # process_match_mode=all, + # group="experiment", + # ) + + # btag + # TODO: adapt for multi-config and jec correlation + # for name in self.config_inst.x.btag_unc_names: + # self.add_parameter( + # f"CMS_btag_{name}", + # type=ParameterType.shape, + # config_data={ + # self.config_inst.name: self.parameter_config_spec(shift_source=f"btag_{name}"), + # }, + # group="experiment", + # ) + + +@default.inference_model +def default_no_shifts(self): + super(default_no_shifts, self).init_func() + + # remove all parameters that require a shift source other than nominal + + for category_name, process_name, parameter in self.iter_parameters(): + if parameter.type.is_shape or any(trafo.from_shape for trafo in parameter.transformations): + self.remove_parameter(parameter.name, process=process_name, category=category_name) + # repeat the cleanup + self.init_cleanup() diff --git a/hbt/production/__init__.py b/multilepton/ml/__init__.py similarity index 100% rename from hbt/production/__init__.py rename to multilepton/ml/__init__.py diff --git a/hbt/ml/test.py b/multilepton/ml/test.py similarity index 97% rename from hbt/ml/test.py rename to multilepton/ml/test.py index bce95ea1..9ceadb65 100644 --- a/hbt/ml/test.py +++ b/multilepton/ml/test.py @@ -18,12 +18,10 @@ np = maybe_import("numpy") ak = maybe_import("awkward") - law.contrib.load("tensorflow") class TestModel(MLModel): - def setup(self): # dynamically add variables for the quantities produced by this model if f"{self.cls_name}.kl" not in self.config_inst.variables: @@ -35,7 +33,7 @@ def setup(self): ) def sandbox(self, task: law.Task) -> str: - return dev_sandbox("bash::$HBT_BASE/sandboxes/venv_columnar_tf.sh") + return dev_sandbox("bash::$MULTILEPTON_BASE/sandboxes/venv_multilepton.sh") def datasets(self, config_inst: od.Config) -> set[od.Dataset]: return { @@ -106,6 +104,5 @@ def evaluate( return events - # usable derivations test_model = TestModel.derive("test_model", cls_dict={"folds": 2}) diff --git a/multilepton/ml/tf_evaluator.py b/multilepton/ml/tf_evaluator.py new file mode 100644 index 00000000..6362e02c --- /dev/null +++ b/multilepton/ml/tf_evaluator.py @@ -0,0 +1,236 @@ +# coding: utf-8 + +""" +Generic interface for loading and evaluating TensorFlow models in a separate process. +Data exchange is handled through multiprocessing pipes. +""" + +from __future__ import annotations + +import os +import time +import pathlib +from multiprocessing import Process, Pipe +from multiprocessing.connection import Connection +from dataclasses import dataclass +from typing import Any + + +STOP_SIGNAL = "STOP" + + +class TFEvaluator: + """ + TensorFlow model evaluator that runs in a separate process with support for multiple models. + + .. code-block:: python + + evaluator = TFEvaluator() + evaluator.add_model("model_name", "path/to/model") + with evaluator: + result = evaluator("model_name", input_data) + """ + + @dataclass + class Model: + name: str + path: str + pipe: Connection | None = None + signature_key: str = "" + + def __init__(self) -> None: + super().__init__() + + self._models: dict[str, TFEvaluator.Model] = {} + self._p: Process | None = None + + self.delay = 0.2 + self.silent = False + + def __enter__(self) -> TFEvaluator: + self.start() + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.stop() + + def __del__(self) -> None: + self.stop() + + def __call__(self, *args, **kwargs) -> Any: + return self.evaluate(*args, **kwargs) + + @property + def running(self) -> bool: + return self._p is not None + + def add_model(self, name: str, path: str | pathlib.Path, signature_key: str = "") -> None: + if self.running: + raise ValueError("cannot add models while running") + if name in self._models: + raise ValueError(f"model with name '{name}' already exists") + + # normalize path + path = str(path) + path = os.path.expandvars(os.path.expanduser(path)) + path = os.path.abspath(os.path.abspath(path)) + + # add it + self._models[name] = TFEvaluator.Model(name=name, path=path, signature_key=signature_key) + + def start(self) -> None: + if self.running: + raise ValueError("process already started") + + # build the subprocess config + config = [] + for model in self._models.values(): + parent_pipe, child_pipe = Pipe() + model.pipe = parent_pipe + config.append({"name": model.name, "path": model.path, "pipe": child_pipe}) + + # create and start the process + self._p = Process( + target=_tf_evaluate, + args=(config,), + kwargs={"delay": self.delay, "silent": self.silent}, + ) + self._p.start() + + def evaluate(self, name: str, *args, **kwargs) -> Any: + if not self.running: + raise ValueError("process not started") + + # get the model + if name not in self._models: + raise ValueError(f"model with name '{name}' does not exist") + model = self._models[name] + + # evaluate and send back result + model.pipe.send((args, kwargs)) # type: ignore[union-attr] + return model.pipe.recv() # type: ignore[union-attr] + + def stop(self, timeout: int | float = 5) -> None: + # stop and remove model pipes + for model in self._models.values(): + if model.pipe is not None: + model.pipe.send(STOP_SIGNAL) + model.pipe.close() + model.pipe = None + + # nothing to do when not running + if not self.running: + return + + # join to wait for normal termination + if self._p.is_alive(): + self._p.join(timeout) + + # kill if still alive + if self._p.is_alive(): + self._p.kill() + + # reset + self._p = None + + +def _tf_evaluate( + config: list[dict[str, Any]], + /, + *, + delay: int | float = 0.2, + silent: bool = False, +) -> None: + _print = (lambda *args, **kwargs: None) if silent else print + + _print("importing tensorflow ...") + import numpy as np + import tensorflow as tf # type: ignore[import-not-found,import-untyped] + _print("done") + + @dataclass + class Model: + name: str + path: str + pipe: Connection + signature_key: str = "" + model: Any = None + + @classmethod + def new(cls, config: dict[str, Any], /) -> Model: + for attr in ("name", "path", "pipe"): + if attr not in config: + raise ValueError(f"missing field '{attr}' in model config") + if not os.path.exists(config["path"]): + raise FileNotFoundError(f"model file '{config['path']}' does not exist") + if not isinstance(config["pipe"], Connection): + raise TypeError(f"'pipe' {config['pipe']} not of type '{Connection}'") + return cls( + name=config["name"], + path=config["path"], + pipe=config["pipe"], + signature_key=config.get("signature_key", ""), + ) + + def load(self) -> None: + sig_msg = f" (signature '{self.signature_key}')" if self.signature_key else "" + _print(f"loading model '{self.name}'{sig_msg} from {self.path} ...") + + model = tf.saved_model.load(self.path) + self.model = model if not self.signature_key else model.signatures[self.signature_key] + + _print("done") + + def evaluate(self, *args, **kwargs) -> np.ndarray: + return self.model(*args, **kwargs).numpy() + + def clear(self) -> None: + _print(f"clearing model '{self.name}'") + self.model = None + self.pipe.close() + + # convert to model objects + models = [Model.new(item) for item in config] + + # load model objects + for model in models: + model.load() + + # helper for gracefully shutting down + def shutdown() -> None: + for model in models: + model.clear() + models.clear() + + # start loop listening for data + while models: + remove_models: list[int] = [] + for i, model in enumerate(models): + # skip if there is no data to process + if not model.pipe.poll(): + continue + + # get data and process + data = model.pipe.recv() + if isinstance(data, tuple) and len(data) == 2: + # evaluate + try: + args, kwargs = data + result = model.evaluate(*args, **kwargs) + except: + shutdown() + raise + # send back result + model.pipe.send(result) + + elif data == STOP_SIGNAL: + # remove model + model.clear() + remove_models.append(i) + + else: + raise ValueError(f"unexpected data type {type(data)}") + + # reduce models and sleep + models = [model for i, model in enumerate(models) if i not in remove_models] + time.sleep(delay) diff --git a/hbt/selection/__init__.py b/multilepton/production/__init__.py similarity index 100% rename from hbt/selection/__init__.py rename to multilepton/production/__init__.py diff --git a/hbt/production/btag.py b/multilepton/production/btag.py similarity index 76% rename from hbt/production/btag.py rename to multilepton/production/btag.py index 14edd9b8..4b8a5662 100644 --- a/hbt/production/btag.py +++ b/multilepton/production/btag.py @@ -8,14 +8,17 @@ import functools +import law + from columnflow.production import Producer, producer from columnflow.production.cms.btag import btag_weights -from columnflow.util import maybe_import, safe_div, InsertableDict +from columnflow.util import maybe_import, safe_div from columnflow.columnar_util import set_ak_column - +from columnflow.types import Any np = maybe_import("numpy") ak = maybe_import("awkward") +hist = maybe_import("hist") # helper set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) @@ -35,7 +38,7 @@ @producer( - uses={"process_id", "Jet.pt"}, + uses={"process_id", "Jet.{mass,pt,phi,eta}"}, # only run on mc mc_only=True, # configurable weight producer class @@ -72,13 +75,10 @@ def _normalized_btag_weights(self: Producer, events: ak.Array, **kwargs) -> ak.A return events -@_normalized_btag_weights.init -def _normalized_btag_weights_init(self: Producer) -> None: +@_normalized_btag_weights.post_init +def _normalized_btag_weights_post_init(self: Producer, **kwargs) -> None: assert self.btag_weights_cls, "btag_weights_cls must be set" - if not getattr(self, "dataset_inst", None): - return - # reuse the weight and tagger names self.weight_name = self.btag_weights_cls.weight_name self.tagger_name = self.btag_weights_cls.tagger_name @@ -91,41 +91,42 @@ def _normalized_btag_weights_init(self: Producer) -> None: @_normalized_btag_weights.requires -def _normalized_btag_weights_requires(self: Producer, reqs: dict) -> None: +def _normalized_btag_weights_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: from columnflow.tasks.selection import MergeSelectionStats reqs["selection_stats"] = MergeSelectionStats.req_different_branching( - self.task, - branch=-1 if self.task.is_workflow() else 0, + task, + branch=-1 if task.is_workflow() else 0, ) @_normalized_btag_weights.setup -def _normalized_btag_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: - # load the selection stats - selection_stats = self.task.cached_value( - key="selection_stats", - func=lambda: inputs["selection_stats"]["stats"].load(formatter="json"), +def _normalized_btag_weights_setup( + self: Producer, + task: law.Task, + inputs: dict[str, Any], + **kwargs, +) -> None: + # load the selection hists + hists = task.cached_value( + key="selection_hists", + func=lambda: inputs["selection_stats"]["hists"].load(formatter="pickle"), ) # get the unique process ids in that dataset - key = f"sum_btag_weight_{self.tagger_name}_selected_nob_{self.tagger_name}_per_process_and_njet" - self.unique_process_ids = list(map(int, selection_stats[key].keys())) + key = f"sum_btag_weight_{self.tagger_name}_selected_nob_{self.tagger_name}" + self.unique_process_ids = list(hists[key].axes["process"]) # get the maximum numbers of jets - max_n_jets = max(map(int, sum((list(d.keys()) for d in selection_stats[key].values()), []))) + max_n_jets = max(list(hists[key].axes["n_jets"])) # helper to get sums of mc weights per pid and njet, with an optional weight name - def sum_per_pid(pid, weight_name="", /): - if weight_name: - weight_name += "_" - key = f"sum_mc_weight_{weight_name}selected_nob_{self.tagger_name}_per_process" - return selection_stats[key].get(str(pid), 0.0) - - def sum_per_pid_njet(pid, n_jets, weight_name="", /): + def get_sum(pid, n_jets, weight_name="", /) -> float: if weight_name: weight_name += "_" - key = f"sum_mc_weight_{weight_name}selected_nob_{self.tagger_name}_per_process_and_njet" - return selection_stats[key].get(str(pid), {}).get(str(n_jets), 0.0) + if n_jets != sum: + n_jets = hist.loc(n_jets) + key = f"sum_mc_weight_{weight_name}selected_nob_{self.tagger_name}" + return hists[key][{"process": hist.loc(pid), "n_jets": n_jets}].value # ratio per weight and pid # extract the ratio per weight, pid and also the jet multiplicity, using the latter as in index @@ -137,13 +138,13 @@ def sum_per_pid_njet(pid, n_jets, weight_name="", /): continue # normal ratio self.ratio_per_pid[weight_name] = { - pid: safe_div(sum_per_pid(pid), sum_per_pid(pid, weight_name)) + pid: safe_div(get_sum(pid, sum), get_sum(pid, sum, weight_name)) for pid in self.unique_process_ids } # per jet multiplicity ratio self.ratio_per_pid_njet[weight_name] = { pid: np.array([ - safe_div(sum_per_pid_njet(pid, n_jets), sum_per_pid_njet(pid, n_jets, weight_name)) + safe_div(get_sum(pid, n_jets), get_sum(pid, n_jets, weight_name)) for n_jets in range(max_n_jets + 1) ]) for pid in self.unique_process_ids diff --git a/hbt/production/default.py b/multilepton/production/default.py similarity index 55% rename from hbt/production/default.py rename to multilepton/production/default.py index a889f336..5de32e57 100644 --- a/hbt/production/default.py +++ b/multilepton/production/default.py @@ -9,28 +9,34 @@ from columnflow.production.categories import category_ids from columnflow.production.cms.electron import electron_weights from columnflow.production.cms.muon import muon_weights -from columnflow.production.cms.top_pt_weight import top_pt_weight +from columnflow.production.cms.top_pt_weight import top_pt_weight as cf_top_pt_weight +from columnflow.production.cms.dy import dy_weights from columnflow.util import maybe_import +from columnflow.columnar_util import attach_coffea_behavior, default_coffea_collections -from hbt.production.weights import ( - normalized_pu_weight, normalized_pdf_weight, normalized_murmuf_weight, +from multilepton.production.weights import ( + normalized_pu_weight, normalized_pdf_weight, normalized_murmuf_weight, normalized_ps_weights, ) -from hbt.production.btag import normalized_btag_weights_deepjet, normalized_btag_weights_pnet -from hbt.production.tau import tau_weights, trigger_weights -from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3 + +from multilepton.production.btag import normalized_btag_weights_deepjet, normalized_btag_weights_pnet +from multilepton.production.tau import tau_weights +from multilepton.production.trigger_sf import trigger_weight +from multilepton.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3 + ak = maybe_import("awkward") +top_pt_weight = cf_top_pt_weight.derive("top_pt_weight", cls_dict={"require_dataset_tag": None}) @producer( uses={ - category_ids, stitched_normalization_weights, normalized_pu_weight, + category_ids, stitched_normalization_weights, normalized_pu_weight, normalized_ps_weights, normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight), # weight producers added dynamically if produce_weights is set }, produces={ - category_ids, stitched_normalization_weights, normalized_pu_weight, + category_ids, stitched_normalization_weights, normalized_pu_weight, normalized_ps_weights, normalized_btag_weights_deepjet, IF_RUN_3(normalized_btag_weights_pnet), IF_DATASET_HAS_LHE_WEIGHTS(normalized_pdf_weight, normalized_murmuf_weight), # weight producers added dynamically if produce_weights is set @@ -40,6 +46,10 @@ ) def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # category ids + events = attach_coffea_behavior( + events, + collections={"Jet": default_coffea_collections["Jet"]}, + ) events = self[category_ids](events, **kwargs) # mc-only weights @@ -58,44 +68,50 @@ def default(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # normalized pu weights events = self[normalized_pu_weight](events, **kwargs) - # btag weights - events = self[normalized_btag_weights_deepjet](events, **kwargs) - if self.has_dep(normalized_btag_weights_pnet): - events = self[normalized_btag_weights_pnet](events, **kwargs) + # normalized parton shower weights + events = self[normalized_ps_weights](events, **kwargs) - # tau weights - if self.has_dep(tau_weights): - events = self[tau_weights](events, **kwargs) + # # btag weights + # events = self[normalized_btag_weights_deepjet](events, **kwargs) + # if self.has_dep(normalized_btag_weights_pnet): + # events = self[normalized_btag_weights_pnet](events, **kwargs) - # electron weights - if self.has_dep(electron_weights): - events = self[electron_weights](events, **kwargs) + # # tau weights + # if self.has_dep(tau_weights): + # events = self[tau_weights](events, **kwargs) - # muon weights - if self.has_dep(muon_weights): - events = self[muon_weights](events, **kwargs) + # # electron weights + # if self.has_dep(electron_weights): + # events = self[electron_weights](events, **kwargs) - # trigger weights - if self.has_dep(trigger_weights): - events = self[trigger_weights](events, **kwargs) + # # muon weights + # if self.has_dep(muon_weights): + # events = self[muon_weights](events, **kwargs) + + # # trigger weight + # if self.has_dep(trigger_weight): + # events = self[trigger_weight](events, **kwargs) # top pt weight if self.has_dep(top_pt_weight): events = self[top_pt_weight](events, **kwargs) + # dy weights + if self.has_dep(dy_weights): + events = self[dy_weights](events, **kwargs) + return events @default.init -def default_init(self: Producer) -> None: +def default_init(self: Producer, **kwargs) -> None: if self.produce_weights: - weight_producers = {tau_weights, electron_weights, muon_weights, trigger_weights} - - if (dataset_inst := getattr(self, "dataset_inst", None)) and dataset_inst.has_tag("ttbar"): + weight_producers = {tau_weights, electron_weights, muon_weights, trigger_weight} + if self.dataset_inst.has_tag("ttbar"): weight_producers.add(top_pt_weight) - + if self.dataset_inst.has_tag("dy"): + weight_producers.add(dy_weights) self.uses |= weight_producers self.produces |= weight_producers - empty = default.derive("empty", cls_dict={"produce_weights": False}) diff --git a/hbt/production/features.py b/multilepton/production/features.py similarity index 93% rename from hbt/production/features.py rename to multilepton/production/features.py index 9c4dbe7a..19a447e6 100644 --- a/hbt/production/features.py +++ b/multilepton/production/features.py @@ -24,17 +24,16 @@ @producer( uses={ # nano columns - "Electron.pt", "Muon.pt", "Jet.pt", "HHBJet.pt", + "Electron.pt", "Muon.pt", "Jet.pt", }, produces={ # new columns - "n_electron", "ht", "n_jet", "n_hhbtag", "n_electron", "n_muon", + "n_electron", "ht", "n_jet", "n_electron", "n_muon", }, ) def features(self: Producer, events: ak.Array, **kwargs) -> ak.Array: events = set_ak_column_f32(events, "ht", ak.sum(events.Jet.pt, axis=1)) events = set_ak_column_i32(events, "n_jet", ak.num(events.Jet.pt, axis=1)) - events = set_ak_column_i32(events, "n_hhbtag", ak.num(events.HHBJet.pt, axis=1)) events = set_ak_column_i32(events, "n_electron", ak.num(events.Electron.pt, axis=1)) events = set_ak_column_i32(events, "n_muon", ak.num(events.Muon.pt, axis=1)) return events diff --git a/hbt/production/hhbtag.py b/multilepton/production/hhbtag.py similarity index 57% rename from hbt/production/hhbtag.py rename to multilepton/production/hhbtag.py index 6539fbc6..0ce3101d 100644 --- a/hbt/production/hhbtag.py +++ b/multilepton/production/hhbtag.py @@ -9,24 +9,24 @@ import law from columnflow.production import Producer, producer -from columnflow.util import maybe_import, dev_sandbox, InsertableDict -from columnflow.columnar_util import EMPTY_FLOAT, layout_ak_array +from columnflow.util import maybe_import, dev_sandbox, DotDict +from columnflow.columnar_util import EMPTY_FLOAT, layout_ak_array, set_ak_column, full_like, flat_np_view +from columnflow.types import Any -from hbt.util import IF_RUN_2 +from multilepton.util import IF_RUN_2, MET_COLUMN, IF_NOT_NANO_V15 np = maybe_import("numpy") ak = maybe_import("awkward") - logger = law.logger.get_logger(__name__) @producer( uses={ "event", "channel_id", - "Jet.{pt,eta,phi,mass,jetId,btagDeepFlavB}", IF_RUN_2("Jet.puId"), - # dynamic MET columns added in init + "Jet.{pt,eta,phi,mass,btagDeepFlavB}", IF_NOT_NANO_V15("Jet.jetId"), IF_RUN_2("Jet.puId"), + MET_COLUMN("{pt,phi}"), }, - sandbox=dev_sandbox("bash::$HBT_BASE/sandboxes/venv_columnar_tf.sh"), + sandbox=dev_sandbox("bash::$MULTILEPTON_BASE/sandboxes/venv_multilepton.sh"), ) def hhbtag( self: Producer, @@ -100,11 +100,11 @@ def split(where): even_mask = ak.to_numpy((events[event_mask].event % 2) == 0) if ak.sum(even_mask): input_features_even = split(even_mask) - scores_even = self.hhbtag_model_even(input_features_even).numpy() + scores_even = self.evaluator("hhbtag_even", input_features_even) scores[even_mask] = scores_even if ak.sum(~even_mask): input_features_odd = split(~even_mask) - scores_odd = self.hhbtag_model_odd(input_features_odd).numpy() + scores_odd = self.evaluator("hhbtag_odd", input_features_odd) scores[~even_mask] = scores_odd # remove the scores of padded jets @@ -125,19 +125,49 @@ def split(where): jet_mask = ak.fill_none(jet_mask, False, axis=-1) # insert scores into an array with same shape as input jets (without jet_mask and event_mask) - all_scores = ak.fill_none(ak.full_like(events.Jet.pt, EMPTY_FLOAT, dtype=np.float32), EMPTY_FLOAT, axis=-1) - np.asarray(ak.flatten(all_scores))[ak.flatten(jet_mask & event_mask, axis=1)] = np.asarray(ak.flatten(scores)) - - return all_scores + all_scores = ak.fill_none(full_like(events.Jet.pt, EMPTY_FLOAT, dtype=np.float32), EMPTY_FLOAT, axis=-1) + flat_np_view(all_scores, axis=1)[ak.flatten(jet_mask & event_mask, axis=1)] = flat_np_view(scores) + + events = set_ak_column(events, "hhbtag_score", all_scores) + + if self.config_inst.x.sync: + # for sync save input variables as additional columns in the sync collection + input_feature_names = [ + "jet_shape", "jets_pt", "jets_eta", + "jets_ratio_mass_to_pt", "jets_ratio_energy_to_pt", + "delta_eta_jets_to_htt", "pnet_btag_score", + "delta_phi_jets_to_htt", "campaign", + "channel_id", "htt_pt", + "htt_eta", "delta_phi_htt_to_met", + "ratio_pt_met_to_htt", "all_lepton_pt", + ] + store_sync_columns = dict(zip(input_feature_names, input_features)) + + # store inputs + for column, values in store_sync_columns.items(): + # create empty multi dim placeholder + value_placeholder = ak.fill_none( + ak.full_like(events.Jet.pt, EMPTY_FLOAT, dtype=np.float32), EMPTY_FLOAT, axis=-1, + ) + values = ak.concatenate([values, scores_ext], axis=1) + # fill placeholder + np.asarray(ak.flatten(value_placeholder))[ak.flatten(jet_mask & event_mask, axis=1)] = ( + np.asarray(ak.flatten(values)) + ) + events = set_ak_column(events, f"sync_hhbtag_{column}", value_placeholder) + + return events @hhbtag.init def hhbtag_init(self: Producer, **kwargs) -> None: - self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi}}") + # produce input columns + if self.config_inst.x.sync: + self.produces.add("sync_*") @hhbtag.requires -def hhbtag_requires(self: Producer, reqs: dict) -> None: +def hhbtag_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: """ Add the external files bundle to requirements. """ @@ -145,42 +175,31 @@ def hhbtag_requires(self: Producer, reqs: dict) -> None: return from columnflow.tasks.external import BundleExternalFiles - reqs["external_files"] = BundleExternalFiles.req(self.task) + reqs["external_files"] = BundleExternalFiles.req(task) @hhbtag.setup -def hhbtag_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: +def hhbtag_setup( + self: Producer, + task: law.Task, + reqs: dict[str, DotDict[str, Any]], + **kwargs, +) -> None: """ Sets up the two HHBtag TF models. """ - import os - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - import tensorflow as tf - - tf.config.threading.set_inter_op_parallelism_threads(1) - tf.config.threading.set_intra_op_parallelism_threads(1) + from multilepton.ml.tf_evaluator import TFEvaluator - # unpack the external files bundle, create a subdiretory and unpack the hhbtag repo in it + # unpack the external files bundle and setup the evaluator bundle = reqs["external_files"] + self.evaluator = TFEvaluator() + self.evaluator.add_model("hhbtag_even", bundle.files.hh_btag_repo.even.abspath) + self.evaluator.add_model("hhbtag_odd", bundle.files.hh_btag_repo.odd.abspath) - # unpack repo - repo_dir = bundle.files_dir.child("hh-btag-repo", type="d") - arc = bundle.files.hh_btag_repo - arc.load(repo_dir, formatter="tar") - - # get the version of the external file - self.hhbtag_version = self.config_inst.x.external_files["hh_btag_repo"][1] + # get the model version (coincides with the external file version) + self.hhbtag_version = self.config_inst.x.external_files.hh_btag_repo.version - # define the model path - model_dir = repo_dir.child("hh-btag-master/models") - model_path = f"HHbtag_{self.hhbtag_version}_par" - # save both models (even and odd event numbers) - with self.task.publish_step("loading hhbtag models ..."): - self.hhbtag_model_even = tf.saved_model.load(model_dir.child(f"{model_path}_0").path) - self.hhbtag_model_odd = tf.saved_model.load(model_dir.child(f"{model_path}_1").path) - - # prepare mappings for the HHBtag model - # (see links above for mapping information) + # prepare mappings for the HHBtag model (see links above for mapping information) channel_map = { self.config_inst.channels.n.etau.id: 1 if self.hhbtag_version == "v3" else 0, self.config_inst.channels.n.mutau.id: 0 if self.hhbtag_version == "v3" else 1, @@ -190,8 +209,33 @@ def hhbtag_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: Inser self.config_inst.channels.n.ee.id: 4 if self.hhbtag_version == "v3" else 0, self.config_inst.channels.n.mumu.id: 3 if self.hhbtag_version == "v3" else 1, self.config_inst.channels.n.emu.id: 5 if self.hhbtag_version == "v3" else 0, + # for now, for multilepton we can remove the hh btag + self.config_inst.channels.n.c3e.id: 0, + self.config_inst.channels.n.c2emu.id: 0, + self.config_inst.channels.n.ce2mu.id: 0, + self.config_inst.channels.n.c3mu.id: 0, + self.config_inst.channels.n.c4e.id: 0, + self.config_inst.channels.n.c3emu.id: 0, + self.config_inst.channels.n.c2e2mu.id: 0, + self.config_inst.channels.n.ce3mu.id: 0, + self.config_inst.channels.n.c4mu.id: 0, + # self.config_inst.channels.n.ceormu.id: 0, + self.config_inst.channels.n.c3etau.id: 0, + self.config_inst.channels.n.c2emutau.id: 0, + self.config_inst.channels.n.ce2mutau.id: 0, + self.config_inst.channels.n.c3mutau.id: 0, + self.config_inst.channels.n.c2e2tau.id: 0, + self.config_inst.channels.n.cemu2tau.id: 0, + self.config_inst.channels.n.c2mu2tau.id: 0, + self.config_inst.channels.n.ce3tau.id: 0, + self.config_inst.channels.n.cmu3tau.id: 0, + self.config_inst.channels.n.c4tau.id: 0, + self.config_inst.channels.n.c2e0or1tau.id: 0, + self.config_inst.channels.n.cemu0or1tau.id: 0, + self.config_inst.channels.n.c2mu0or1tau.id: 0, } - # convert to + + # convert self.hhbtag_channel_map = np.array([ channel_map.get(cid, np.nan) for cid in range(max(channel_map.keys()) + 1) @@ -221,3 +265,14 @@ def hhbtag_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: Inser f"hhbtag model {self.hhbtag_version} uses {hhbtag_met_name}, but config requests " f"{self.config_inst.x.met_name}", ) + # start the evaluator + self.evaluator.start() + + +@hhbtag.teardown +def hhbtag_teardown(self: Producer, **kwargs) -> None: + """ + Stops the TF evaluator. + """ + if (evaluator := getattr(self, "evaluator", None)) is not None: + evaluator.stop() diff --git a/multilepton/production/jet.py b/multilepton/production/jet.py new file mode 100644 index 00000000..43e454e7 --- /dev/null +++ b/multilepton/production/jet.py @@ -0,0 +1,103 @@ +# coding: utf-8 + +""" +Jet scale factor production. +""" + +from __future__ import annotations + +import functools + +import law + +from columnflow.production import Producer, producer +from columnflow.util import maybe_import, load_correction_set +from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array + + +ak = maybe_import("awkward") +np = maybe_import("numpy") +set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) + + +@producer( + uses={ + "channel_id", "Jet.{pt,eta,phi,mass}", + }, + mc_only=True, + get_jet_file=(lambda self, external_files: external_files.trigger_sf.jet), + get_jet_corrector=(lambda self: self.config_inst.x.jet_trigger_corrector), + efficiency_name="jet_trigger_eff", +) +def jet_trigger_efficiencies( + self: Producer, + events: ak.Array, + jet_mask: ak.Array | type(Ellipsis) = Ellipsis, + **kwargs, +) -> ak.Array: + """ + Producer for jet trigger efficiencies derived by the CCLUB group at object level. Requires an external file in the + config under ``trigger_sf.jet``. + + *get_jet_file* can be adapted in a subclass in case it is stored differently in the external files. A correction + set named ``"jet_trigger_corrector"`` is extracted from it. + + Resources: + https://gitlab.cern.ch/cclubbtautau/AnalysisCore/-/tree/cclub_cmssw15010/data/TriggerScaleFactors?ref_type=heads + """ + + # flat absolute eta and pt views + abs_eta = flat_np_view(abs(events.Jet.eta[jet_mask]), axis=1) + pt = flat_np_view(events.Jet.pt[jet_mask], axis=1) + variable_map = { + "pt": pt, + "abseta": abs_eta, + } + + for kind in ["data", "mc"]: + for syst, postfix in [ + ("nom", ""), + ("up", "_up"), + ("down", "_down"), + ]: + variable_map_syst = { + **variable_map, + "syst": syst, + "data_or_mc": kind, + } + inputs = [variable_map_syst[inp.name] for inp in self.jet_trig_corrector.inputs] + sf_flat = self.jet_trig_corrector(*inputs) + sf = layout_ak_array(sf_flat, events.Jet.pt[jet_mask]) + events = set_ak_column(events, f"{self.efficiency_name}_{kind}{postfix}", sf, value_type=np.float32) + return events + + +@jet_trigger_efficiencies.init +def jet_trigger_efficiencies_init(self: Producer, **kwargs) -> None: + # add the product of nominal and up/down variations to produced columns + self.produces.add(f"{self.efficiency_name}_{{data,mc}}{{,_up,_down}}") + + +@jet_trigger_efficiencies.requires +def jet_trigger_efficiencies_requires(self: Producer, task: law.Task, reqs: dict) -> None: + from columnflow.tasks.external import BundleExternalFiles + if "external_files" in reqs: + return + reqs["external_files"] = BundleExternalFiles.req(task) + + +@jet_trigger_efficiencies.setup +def jet_trigger_efficiencies_setup( + self: Producer, + task: law.Task, + reqs: dict, + inputs: dict, + reader_targets: law.util.InsertableDict, +) -> None: + bundle = reqs["external_files"] + + # create the trigger and id correctors + correction_set = load_correction_set(self.get_jet_file(bundle.files)) + #print("Available keys:", list(correction_set.keys())) + self.jet_trig_corrector = correction_set[self.get_jet_corrector()] + #assert self.jet_trig_corrector.version in [0, 1] diff --git a/hbt/production/minimal.py b/multilepton/production/minimal.py similarity index 100% rename from hbt/production/minimal.py rename to multilepton/production/minimal.py diff --git a/hbt/production/patches.py b/multilepton/production/patches.py similarity index 100% rename from hbt/production/patches.py rename to multilepton/production/patches.py diff --git a/hbt/production/processes.py b/multilepton/production/processes.py similarity index 96% rename from hbt/production/processes.py rename to multilepton/production/processes.py index 200bd646..c13e1b40 100644 --- a/hbt/production/processes.py +++ b/multilepton/production/processes.py @@ -12,10 +12,10 @@ import order from columnflow.production import Producer -from columnflow.util import maybe_import, InsertableDict +from columnflow.util import maybe_import from columnflow.columnar_util import set_ak_column, Route -from hbt.util import IF_DATASET_IS_DY, IF_DATASET_IS_W_LNU +from multilepton.util import IF_DATASET_IS_DY, IF_DATASET_IS_W_LNU np = maybe_import("numpy") ak = maybe_import("awkward") @@ -57,7 +57,7 @@ def cross_check_translation_dict(self) -> dict[str, str]: # must be overwritten by inheriting classes ... - def init_func(self, *args, **kwargs): + def init_func(self, **kwargs) -> None: # if there is a include_condition set, apply it to both used and produced columns cond = lambda args: {self.include_condition(*args)} if self.include_condition else {*args} self.uses |= cond(self.stitching_columns or []) @@ -147,12 +147,7 @@ def leaf_processes(self) -> list[order.Process]: # must be overwritten by inheriting classes ... - def setup_func( - self, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, - ) -> None: + def setup_func(self, task: law.Task, **kwargs) -> None: # define stitching ranges for the DY datasets covered by this producer's dy_inclusive_dataset stitching_ranges: dict[NJetsRange, list[PtRange]] = {} for proc in self.leaf_processes: diff --git a/hbt/production/res_networks.py b/multilepton/production/res_networks.py similarity index 83% rename from hbt/production/res_networks.py rename to multilepton/production/res_networks.py index 46f7dc88..646137b6 100644 --- a/hbt/production/res_networks.py +++ b/multilepton/production/res_networks.py @@ -16,7 +16,8 @@ from columnflow.columnar_util import ( set_ak_column, attach_behavior, flat_np_view, EMPTY_FLOAT, default_coffea_collections, ) -from columnflow.util import maybe_import, dev_sandbox, InsertableDict, DotDict +from columnflow.util import maybe_import, dev_sandbox, DotDict +from columnflow.types import Any np = maybe_import("numpy") ak = maybe_import("awkward") @@ -48,7 +49,7 @@ # limited chunk size to avoid memory issues max_chunk_size=5_000, # produced columns are added in the deferred init below - sandbox=dev_sandbox("bash::$HBT_BASE/sandboxes/venv_columnar_tf.sh"), + sandbox=dev_sandbox("bash::$MULTILEPTON_BASE/sandboxes/venv_multilepton.sh"), # not exposed to be called from the command line exposed=False, ) @@ -65,8 +66,6 @@ def _res_dnn_evaluation( correct order can be found in the tautauNN repo: https://github.com/uhh-cms/tautauNN/blob/f1ca194/evaluation/interface.py#L67 """ - import tensorflow as tf - # ensure coffea behavior events = self[attach_coffea_behavior]( events, @@ -222,7 +221,7 @@ def mask_values(mask, value, *fields): # build continous inputs # (order exactly as documented in link above) continous_inputs = [ - t[..., None] for t in [ + np.asarray(t[..., None], dtype=np.float32) for t in [ f.met_px, f.met_py, f.met_cov00, f.met_cov01, f.met_cov11, f.vis_tau1_px, f.vis_tau1_py, f.vis_tau1_pz, f.vis_tau1_e, f.vis_tau2_px, f.vis_tau2_py, f.vis_tau2_pz, f.vis_tau2_e, @@ -252,10 +251,13 @@ def mask_values(mask, value, *fields): ] # evaluate the model - scores = self.res_model( - cont_input=tf.concat(continous_inputs, axis=1), - cat_input=tf.concat(categorical_inputs, axis=1), - )["hbt_ensemble"].numpy() + scores = self.evaluator( + "res", + inputs=[ + np.concatenate(continous_inputs, axis=1), + np.concatenate(categorical_inputs, axis=1), + ], + ) # in very rare cases (1 in 25k), the network output can be none, likely for numerical reasons, # so issue a warning and set them to a default value @@ -272,53 +274,71 @@ def mask_values(mask, value, *fields): values = EMPTY_FLOAT * np.ones(len(events), dtype=np.float32) values[event_mask] = scores[:, i] events = set_ak_column_f32(events, column, values) + if self.config_inst.x.sync: + # store input columns for sync + cont_inputs_names = [ + "met_px", "met_py", "met_cov00", "met_cov01", "met_cov11", + "vis_tau1_px", "vis_tau1_py", "vis_tau1_pz", "vis_tau1_e", + "vis_tau2_px", "vis_tau2_py", "vis_tau2_pz", "vis_tau2_e", + "bjet1_px", "bjet1_py", "bjet1_pz", "bjet1_e", "bjet1_btag_df", "bjet1_cvsb", "bjet1_cvsl", "bjet1_hhbtag", + "bjet2_px", "bjet2_py", "bjet2_pz", "bjet2_e", "bjet2_btag_df", "bjet2_cvsb", "bjet2_cvsl", "bjet2_hhbtag", + "fatjet_px", "fatjet_py", "fatjet_pz", "fatjet_e", + "htt_e", "htt_px", "htt_py", "htt_pz", + "hbb_e", "hbb_px", "hbb_py", "hbb_pz", + "htthbb_e", "htthbb_px", "htthbb_py", "htthbb_pz", + "httfatjet_e", "httfatjet_px", "httfatjet_py", "httfatjet_pz", + ] + + cat_inputs_names = [ + "pair_type", "dm1", "dm2", "vis_tau1_charge", "vis_tau2_charge", "has_jet_pair", "has_fatjet", + ] + for column, values in zip( + cont_inputs_names + cat_inputs_names, + continous_inputs + categorical_inputs, + ): + values_placeholder = EMPTY_FLOAT * np.ones(len(events), dtype=np.float32) + values_placeholder[event_mask] = ak.flatten(values) + events = set_ak_column_f32(events, "sync_res_dnn_" + column, values_placeholder) return events @_res_dnn_evaluation.init -def _res_dnn_evaluation_init(self: Producer) -> None: +def _res_dnn_evaluation_init(self: Producer, **kwargs) -> None: self.uses.add(f"{self.config_inst.x.met_name}.{{pt,phi,covXX,covXY,covYY}}") @_res_dnn_evaluation.requires -def _res_dnn_evaluation_requires(self: Producer, reqs: dict) -> None: +def _res_dnn_evaluation_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: if "external_files" in reqs: return from columnflow.tasks.external import BundleExternalFiles - reqs["external_files"] = BundleExternalFiles.req(self.task) + reqs["external_files"] = BundleExternalFiles.req(task) @_res_dnn_evaluation.setup def _res_dnn_evaluation_setup( self: Producer, - reqs: dict, - inputs: dict, - reader_targets: InsertableDict, + task: law.Task, + reqs: dict[str, DotDict[str, Any]], + **kwargs, ) -> None: - import os - os.environ["CUDA_VISIBLE_DEVICES"] = "-1" - import tensorflow as tf + from multilepton.ml.tf_evaluator import TFEvaluator # some checks if not isinstance(self.parametrized, bool): raise AttributeError("'parametrized' must be set in the producer configuration") - # constrain tf to use only one core - tf.config.threading.set_inter_op_parallelism_threads(1) - tf.config.threading.set_intra_op_parallelism_threads(1) - # unpack the model archive bundle = reqs["external_files"] bundle.files model_dir = bundle.files_dir.child(self.cls_name, type="d") getattr(bundle.files, self.cls_name).load(model_dir, formatter="tar") - # load the model - with self.task.publish_step(f"loading resonant model '{self.cls_name}' ..."): - saved_model = tf.saved_model.load(model_dir.child("model_fold0").abspath) - self.res_model = saved_model.signatures["serving_default"] + # setup the evaluator + self.evaluator = TFEvaluator() + self.evaluator.add_model("res", model_dir.child("model_fold0").abspath, signature_key="serving_default") # categorical values handled by the network # (names and values from training code that was aligned to KLUB notation) @@ -359,12 +379,24 @@ def _res_dnn_evaluation_setup( (2023, "BPix"): 3, }[(self.config_inst.campaign.x.year, self.config_inst.campaign.x.postfix)] + # start the evaluator + self.evaluator.start() + + +@_res_dnn_evaluation.teardown +def _res_dnn_evaluation_teardown(self: Producer, **kwargs) -> None: + """ + Stops the TF evaluator. + """ + if (evaluator := getattr(self, "evaluator", None)) is not None: + evaluator.stop() # # parameterized network # trained with Radion (spin 0) and Graviton (spin 2) samples up to mX = 3000 GeV in all run 2 eras # + res_pdnn = _res_dnn_evaluation.derive("res_pdnn", cls_dict={ "parametrized": True, "exposed": True, @@ -374,8 +406,8 @@ def _res_dnn_evaluation_setup( @res_pdnn.init -def res_pdnn_init(self: Producer) -> None: - super(res_pdnn, self).init_func() +def res_pdnn_init(self: Producer, **kwargs) -> None: + super(res_pdnn, self).init_func(**kwargs) # check spin value and mass values if self.spin not in {0, 2}: @@ -405,8 +437,8 @@ def res_pdnn_init(self: Producer) -> None: @res_dnn.init -def res_dnn_init(self: Producer) -> None: - super(res_dnn, self).init_func() +def res_dnn_init(self: Producer, **kwargs) -> None: + super(res_dnn, self).init_func(**kwargs) # output column names (in this order) self.output_columns = [ @@ -416,6 +448,8 @@ def res_dnn_init(self: Producer) -> None: # update produced columns self.produces |= set(self.output_columns) + if self.config_inst.x.sync: + self.produces.add("sync_*") # diff --git a/hbt/production/tau.py b/multilepton/production/tau.py similarity index 58% rename from hbt/production/tau.py rename to multilepton/production/tau.py index 3ade596e..c430bb1c 100644 --- a/hbt/production/tau.py +++ b/multilepton/production/tau.py @@ -3,18 +3,17 @@ """ Tau scale factor production. """ - +import law import functools from columnflow.production import Producer, producer -from columnflow.util import maybe_import, InsertableDict +from columnflow.util import maybe_import, load_correction_set, DotDict from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array - +from columnflow.tasks.external import BundleExternalFiles +from columnflow.types import Any ak = maybe_import("awkward") np = maybe_import("numpy") - -# helper set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) @@ -23,7 +22,7 @@ # custom columns created upstream, probably by a selector "single_triggered", "cross_triggered", # nano columns - "Tau.{pt,eta,genPartFlav,decayMode}", + "Tau.{mass,pt,eta,phi,decayMode,genPartFlav}", }, produces={ "tau_weight", @@ -35,38 +34,29 @@ "mu_0p0To0p4", "mu_0p4To0p8", "mu_0p8To1p2", "mu_1p2To1p7", "mu_1p7To2p3", ] }, - # only run on mc mc_only=True, - # function to determine the correction file get_tau_file=(lambda self, external_files: external_files.tau_sf), - # function to determine the tau tagger name get_tau_tagger=(lambda self: self.config_inst.x.tau_tagger), ) def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ Producer for tau ID weights. Requires an external file in the config under ``tau_sf``: - .. code-block:: python - cfg.x.external_files = DotDict.wrap({ "tau_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/TAU/2017_UL/tau.json.gz", # noqa }) - *get_tau_file* can be adapted in a subclass in case it is stored differently in the external files. The name of the tagger should be given as an auxiliary entry in the config: - .. code-block:: python - cfg.x.tau_tagger = "DeepTau2017v2p1" It is used to extract correction set names such as "DeepTau2017v2p1VSjet". *get_tau_tagger* can be adapted in a subclass in case it is stored differently in the config. - Resources: - https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113 - https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU + - https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113 + - https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU """ # helper to bring a flat sf array into the shape of taus, and multiply across the tau axis reduce_mul = lambda sf: ak.prod(layout_ak_array(sf, events.Tau.pt), axis=1, mask_identity=False) @@ -90,7 +80,6 @@ def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # # compute nominal ID weights # - # start with ones sf_nom = np.ones_like(pt, dtype=np.float32) wp_config = self.config_inst.x.tau_trigger_working_points @@ -113,11 +102,11 @@ def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: raise NotImplementedError mu_args = lambda mask, wp, syst: (abseta[mask], match[mask], wp, syst) - + # genuine taus tau_mask = flat_np_view(dm_mask & (events.Tau.genPartFlav == 5), axis=1) sf_nom[tau_mask] = self.id_vs_jet_corrector(*tau_args(tau_mask, "nom")) - + # electrons faking taus e_mask = ((events.Tau.genPartFlav == 1) | (events.Tau.genPartFlav == 3)) if self.config_inst.campaign.x.run == 3: @@ -140,21 +129,27 @@ def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # # compute varied ID weights # - for direction in ["up", "down"]: # genuine taus -> split into decay modes sf_tau_dm0 = sf_nom.copy() sf_tau_dm1 = sf_nom.copy() sf_tau_dm10 = sf_nom.copy() + sf_tau_dm11 = sf_nom.copy() + tau_dm0_mask = tau_mask & (dm == 0) tau_dm1_mask = tau_mask & (dm == 1) - tau_dm10_mask = tau_mask & ((dm == 10) | (dm == 11)) + tau_dm10_mask = tau_mask & (dm == 10) + tau_dm11_mask = tau_mask & (dm == 11) + sf_tau_dm0[tau_dm0_mask] = self.id_vs_jet_corrector(*tau_args(tau_dm0_mask, direction)) sf_tau_dm1[tau_dm1_mask] = self.id_vs_jet_corrector(*tau_args(tau_dm1_mask, direction)) sf_tau_dm10[tau_dm10_mask] = self.id_vs_jet_corrector(*tau_args(tau_dm10_mask, direction)) + sf_tau_dm11[tau_dm11_mask] = self.id_vs_jet_corrector(*tau_args(tau_dm11_mask, direction)) + events = set_ak_column_f32(events, f"tau_weight_jet_dm0_{direction}", reduce_mul(sf_tau_dm0)) events = set_ak_column_f32(events, f"tau_weight_jet_dm1_{direction}", reduce_mul(sf_tau_dm1)) events = set_ak_column_f32(events, f"tau_weight_jet_dm10_{direction}", reduce_mul(sf_tau_dm10)) + events = set_ak_column_f32(events, f"tau_weight_jet_dm11_{direction}", reduce_mul(sf_tau_dm11)) # electron fakes -> split into 2 eta regions for region, region_mask in [ @@ -190,30 +185,29 @@ def tau_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: *mu_args(mu_cross_region_mask, wp_config.id_vs_mu_cross, direction), ) events = set_ak_column_f32(events, f"tau_weight_mu_{region}_{direction}", reduce_mul(sf_mu)) - return events @tau_weights.requires -def tau_weights_requires(self: Producer, reqs: dict) -> None: +def tau_weights_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: if "external_files" in reqs: return - - from columnflow.tasks.external import BundleExternalFiles - reqs["external_files"] = BundleExternalFiles.req(self.task) + reqs["external_files"] = BundleExternalFiles.req(task) @tau_weights.setup -def tau_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: - bundle = reqs["external_files"] +def tau_weights_setup( + self: Producer, + task: law.Task, + reqs: dict[str, DotDict[str, Any]], + **kwargs, +) -> None: # create the trigger and id correctors - import correctionlib - correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate - correction_set = correctionlib.CorrectionSet.from_string( - self.get_tau_file(bundle.files).load(formatter="gzip").decode("utf-8"), - ) + tau_file = self.get_tau_file(reqs["external_files"].files) + correction_set = load_correction_set(tau_file) tagger_name = self.get_tau_tagger() + self.id_vs_jet_corrector = correction_set[f"{tagger_name}VSjet"] self.id_vs_e_corrector = correction_set[f"{tagger_name}VSe"] self.id_vs_mu_corrector = correction_set[f"{tagger_name}VSmu"] @@ -226,46 +220,58 @@ def tau_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: @producer( uses={ - "channel_id", "single_triggered", "cross_triggered", + "channel_id", "single_triggered", "cross_triggered", "matched_trigger_ids", "Tau.{pt,decayMode}", }, produces={ - "tau_trigger_weight", - } | { - f"tau_trigger_weight_{ch}_{direction}" - for direction in ["up", "down"] - for ch in ["etau", "mutau", "tautau"] # TODO: add tautauvbf when existing + "tau_trigger_eff_{data,mc}_{etau,mutau,tautau,tautaujet}", + "tau_trigger_eff_{data,mc}_{etau,mutau,tautau,tautaujet}_dm{0,1,10,11}_{up,down}", }, - # only run on mc mc_only=True, - # function to determine the correction file get_tau_file=(lambda self, external_files: external_files.tau_sf), + get_tau_corrector=(lambda self: self.config_inst.x.tau_trigger_corrector), ) -def trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: +def tau_trigger_efficiencies(self: Producer, events: ak.Array, **kwargs) -> ak.Array: """ - Producer for trigger scale factors derived by the TAU POG. Requires an external file in the + Producer for trigger scale factors derived by the TAU POG at object level. Requires an external file in the config under ``tau_sf``: - .. code-block:: python cfg.x.external_files = DotDict.wrap({ "tau_sf": "/afs/cern.ch/work/m/mrieger/public/mirrors/jsonpog-integration-9ea86c4c/POG/TAU/2017_UL/tau.json.gz", # noqa }) - *get_tau_file* can be adapted in a subclass in case it is stored differently in the external files. A correction set named ``"tau_trigger"`` is extracted from it. Resources: - https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113 - https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU + - https://twiki.cern.ch/twiki/bin/view/CMS/TauIDRecommendationForRun2?rev=113 + - https://gitlab.cern.ch/cms-nanoAOD/jsonpog-integration/-/blob/849c6a6efef907f4033715d52290d1a661b7e8f9/POG/TAU """ # get channels from the config - ch_etau = self.config_inst.get_channel("etau") - ch_mutau = self.config_inst.get_channel("mutau") - ch_tautau = self.config_inst.get_channel("tautau") - - # helper to bring a flat sf array into the shape of taus, and multiply across the tau axis - reduce_mul = lambda sf: ak.prod(layout_ak_array(sf, events.Tau.pt), axis=1, mask_identity=False) + ch_etau = self.config_inst.get_channel("cetau") + ch_mutau = self.config_inst.get_channel("cmutau") + ch_tautau = self.config_inst.get_channel("ctautau") + + # find out which tautau triggers are passed + tautau_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + tautaujet_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + tautauvbf_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + for trigger in self.config_inst.x.triggers: + if trigger.has_tag("cross_tau_tau"): + tautau_trigger_passed = ( + tautau_trigger_passed | + np.any(events.matched_trigger_ids == trigger.id, axis=-1) + ) + if trigger.has_tag("cross_tau_tau_jet"): + tautaujet_trigger_passed = ( + tautaujet_trigger_passed | + np.any(events.matched_trigger_ids == trigger.id, axis=-1) + ) + if trigger.has_tag("cross_tau_tau_vbf"): + tautauvbf_trigger_passed = ( + tautauvbf_trigger_passed | + np.any(events.matched_trigger_ids == trigger.id, axis=-1) + ) # the correction tool only supports flat arrays, so convert inputs to flat np view first pt = flat_np_view(events.Tau.pt, axis=1) @@ -274,82 +280,96 @@ def trigger_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: # # compute nominal trigger weight # - # define channel / trigger dependent masks channel_id = events.channel_id - single_triggered = events.single_triggered - dm_mask = ( - (events.Tau.decayMode == 0) | - (events.Tau.decayMode == 1) | - (events.Tau.decayMode == 10) | - (events.Tau.decayMode == 11) - ) - tautau_mask = flat_np_view( - dm_mask & (events.Tau.pt >= 40.0) & (channel_id == ch_tautau.id), - axis=1, - ) - # not existing yet - # tautauvbf_mask = flat_np_view(dm_mask & (channel_id == ch_tautau.id), axis=1) - etau_mask = flat_np_view( - dm_mask & (channel_id == ch_etau.id) & single_triggered & (events.Tau.pt >= 25.0), - axis=1, - ) - mutau_mask = flat_np_view( - dm_mask & (channel_id == ch_mutau.id) & single_triggered & (events.Tau.pt >= 25.0), - axis=1, + cross_triggered = events.cross_triggered + default_tautau_mask = ( + (channel_id == ch_tautau.id) & + ((ak.local_index(events.Tau) == 0) | (ak.local_index(events.Tau) == 1)) ) + tautau_mask = default_tautau_mask & tautau_trigger_passed + flat_tautau_mask = flat_np_view(tautau_mask, axis=1) + tautaujet_mask = default_tautau_mask & tautaujet_trigger_passed + flat_tautaujet_mask = flat_np_view(tautaujet_mask, axis=1) + # TODO: add additional phase space requirements for tautauvbf + # tautauvbf_mask = flat_np_view(default_tautau_mask & tautauvbf_trigger_passed, axis=1) + etau_mask = (channel_id == ch_etau.id) & cross_triggered & (ak.local_index(events.Tau) == 0) + flat_etau_mask = flat_np_view(etau_mask, axis=1) + mutau_mask = (channel_id == ch_mutau.id) & cross_triggered & (ak.local_index(events.Tau) == 0) + flat_mutau_mask = flat_np_view(mutau_mask, axis=1) # start with flat ones - sf_nom = np.ones_like(pt, dtype=np.float32) - wp_config = self.config_inst.x.tau_trigger_working_points - eval_args = lambda mask, ch, syst: (pt[mask], dm[mask], ch, wp_config.trigger_corr, "sf", syst) - sf_nom[etau_mask] = self.trigger_corrector(*eval_args(etau_mask, "etau", "nom")) - sf_nom[mutau_mask] = self.trigger_corrector(*eval_args(mutau_mask, "mutau", "nom")) - sf_nom[tautau_mask] = self.trigger_corrector(*eval_args(tautau_mask, "ditau", "nom")) - - # create and store weights - events = set_ak_column_f32(events, "tau_trigger_weight", reduce_mul(sf_nom)) - - # - # compute varied trigger weights - # - - for direction in ["up", "down"]: + for kind in ["data", "mc"]: + wp_config = self.config_inst.x.tau_trigger_working_points + eval_args = lambda mask, ch, syst: (pt[mask], dm[mask], ch, wp_config.trigger_corr, f"eff_{kind}", syst) + for corr_channel in ["etau", "mutau", "tautau", "tautaujet"]: # TODO: add tautauvbf + if corr_channel == "etau": + mask = flat_etau_mask + corr_channel_arg = corr_channel + elif corr_channel == "mutau": + mask = flat_mutau_mask + corr_channel_arg = corr_channel + elif corr_channel == "tautau": + mask = flat_tautau_mask + corr_channel_arg = "ditau" + elif corr_channel == "tautaujet": + mask = flat_tautaujet_mask + corr_channel_arg = "ditaujet" + else: + raise ValueError(f"Unknown channel {corr_channel}") + sf_nom = np.ones_like(pt, dtype=np.float32) + sf_nom[mask] = self.tau_trig_corrector(*eval_args(mask, corr_channel_arg, "nom")) + # create and store weights + events = set_ak_column_f32( + events, + f"tau_trigger_eff_{kind}_{corr_channel}", + layout_ak_array(sf_nom, events.Tau.pt), + ) + # + # compute varied trigger weights + # for ch, ch_corr, mask in [ ("etau", "etau", etau_mask), ("mutau", "mutau", mutau_mask), ("tautau", "ditau", tautau_mask), + ("tautaujet", "ditaujet", tautaujet_mask), # ("tautauvbf", "ditauvbf", tautauvbf_mask), ]: - sf_unc = sf_nom.copy() - sf_unc[mask] = self.trigger_corrector(*eval_args(mask, ch_corr, direction)) - events = set_ak_column_f32(events, f"tau_trigger_weight_{ch}_{direction}", reduce_mul(sf_unc)) - + for decay_mode in [0, 1, 10, 11]: + decay_mode_mask = mask & (events.Tau.decayMode == decay_mode) + flat_decay_mode_mask = flat_np_view(decay_mode_mask, axis=1) + for direction in ["up", "down"]: + # only possible with object-level information + sf_unc = ak.copy(events[f"tau_trigger_eff_{kind}_{ch}"]) + sf_unc_flat = flat_np_view(sf_unc, axis=1) + sf_unc_flat[flat_decay_mode_mask] = self.tau_trig_corrector( + *eval_args(flat_decay_mode_mask, ch_corr, direction), + ) + events = set_ak_column_f32( + events, + f"tau_trigger_eff_{kind}_{ch}_dm{decay_mode}_{direction}", + sf_unc, + ) return events -@trigger_weights.requires -def trigger_weights_requires(self: Producer, reqs: dict) -> None: +@tau_trigger_efficiencies.requires +def tau_trigger_efficiencies_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: if "external_files" in reqs: return - - from columnflow.tasks.external import BundleExternalFiles - reqs["external_files"] = BundleExternalFiles.req(self.task) + reqs["external_files"] = BundleExternalFiles.req(task) -@trigger_weights.setup -def trigger_weights_setup(self: Producer, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: - bundle = reqs["external_files"] +@tau_trigger_efficiencies.setup +def tau_trigger_efficiencies_setup( + self: Producer, + task: law.Task, + reqs: dict[str, DotDict[str, Any]], + **kwargs, +) -> None: # create the trigger and id correctors - import correctionlib - correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate - - # load the correction set - correction_set = correctionlib.CorrectionSet.from_string( - self.get_tau_file(bundle.files).load(formatter="gzip").decode("utf-8"), - ) - self.trigger_corrector = correction_set["tau_trigger"] - - # check versions - assert self.trigger_corrector.version in [0, 1] + tau_file = self.get_tau_file(reqs["external_files"].files) + corrector_name = self.get_tau_corrector() + self.tau_trig_corrector = load_correction_set(tau_file)[corrector_name] + assert self.tau_trig_corrector.version in [0, 1] diff --git a/multilepton/production/trigger_sf.py b/multilepton/production/trigger_sf.py new file mode 100644 index 00000000..58ca4dd3 --- /dev/null +++ b/multilepton/production/trigger_sf.py @@ -0,0 +1,726 @@ +# coding: utf-8 + +""" +Custom trigger scale factor production. + +Note : The trigger weight producers multiply the sfs for all objects in an event to get the total +trigger scale factor of the event. Since we might want to use different objects in different channels, +we will derive the trigger weight producers for each channel separately to apply the correct masks. +""" + +import functools + +import order as od + +from columnflow.production import Producer, producer +from columnflow.util import maybe_import +from columnflow.columnar_util import set_ak_column +from columnflow.production.cms.muon import muon_trigger_weights as cf_muon_trigger_weight +from columnflow.production.cms.electron import electron_trigger_weights as cf_electron_trigger_weight + +from multilepton.production.tau import tau_trigger_efficiencies +from multilepton.production.jet import jet_trigger_efficiencies + +ak = maybe_import("awkward") +np = maybe_import("numpy") + + +# helper +set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) + +# subclass the electron trigger weight producer to create the electron trigger weight +electron_trigger_weight = cf_electron_trigger_weight.derive( + "electron_trigger_weight", + cls_dict={ + "get_electron_file": (lambda self, external_files: external_files.trigger_sf.electron), + }, +) +muon_trigger_weight = cf_muon_trigger_weight.derive( + "muon_trigger_weight", + cls_dict={ + "get_muon_file": (lambda self, external_files: external_files.trigger_sf.muon), + }, +) + +# subclass the electron weight producer to create the electron efficiencies +single_trigger_electron_data_effs = electron_trigger_weight.derive( + "single_trigger_electron_data_effs", + cls_dict={ + "get_electron_config": (lambda self: self.config_inst.x.single_trigger_electron_data_effs_cfg), + "weight_name": "single_trigger_e_data_effs", + }, +) + +single_trigger_electron_mc_effs = electron_trigger_weight.derive( + "single_trigger_electron_mc_effs", + cls_dict={ + "get_electron_config": (lambda self: self.config_inst.x.single_trigger_electron_mc_effs_cfg), + "weight_name": "single_trigger_e_mc_effs", + }, +) + +cross_trigger_electron_data_effs = electron_trigger_weight.derive( + "cross_trigger_electron_data_effs", + cls_dict={ + "get_electron_file": (lambda self, external_files: external_files.trigger_sf.cross_electron), + "get_electron_config": (lambda self: self.config_inst.x.cross_trigger_electron_data_effs_cfg), + "weight_name": "cross_trigger_e_data_effs", + }, +) + +cross_trigger_electron_mc_effs = electron_trigger_weight.derive( + "cross_trigger_electron_mc_effs", + cls_dict={ + "get_electron_file": (lambda self, external_files: external_files.trigger_sf.cross_electron), + "get_electron_config": (lambda self: self.config_inst.x.cross_trigger_electron_mc_effs_cfg), + "weight_name": "cross_trigger_e_mc_effs", + }, +) + +# subclass the muon weight producer to create the muon efficiencies +single_trigger_muon_data_effs = muon_trigger_weight.derive( + "single_trigger_muon_data_effs", + cls_dict={ + "get_muon_config": (lambda self: self.config_inst.x.single_trigger_muon_data_effs_cfg), + "weight_name": "single_trigger_mu_data_effs", + }, +) + +single_trigger_muon_mc_effs = muon_trigger_weight.derive( + "single_trigger_muon_mc_effs", + cls_dict={ + "get_muon_config": (lambda self: self.config_inst.x.single_trigger_muon_mc_effs_cfg), + "weight_name": "single_trigger_mu_mc_effs", + }, +) + +cross_trigger_muon_data_effs = muon_trigger_weight.derive( + "cross_trigger_muon_data_effs", + cls_dict={ + "get_muon_file": (lambda self, external_files: external_files.trigger_sf.cross_muon), + "get_muon_config": (lambda self: self.config_inst.x.cross_trigger_muon_data_effs_cfg), + "weight_name": "cross_trigger_mu_data_effs", + }, +) + +cross_trigger_muon_mc_effs = muon_trigger_weight.derive( + "cross_trigger_muon_mc_effs", + cls_dict={ + "get_muon_file": (lambda self, external_files: external_files.trigger_sf.cross_muon), + "get_muon_config": (lambda self: self.config_inst.x.cross_trigger_muon_mc_effs_cfg), + "weight_name": "cross_trigger_mu_mc_effs", + }, +) + +# subclass the tau weight producer to use the cclub tau efficiencies +tau_trigger_effs_cclub = tau_trigger_efficiencies.derive( + "tau_trigger_effs_cclub", + cls_dict={ + "get_tau_file": (lambda self, external_files: external_files.trigger_sf.tau), + "get_tau_corrector": (lambda self: self.config_inst.x.tau_trigger_corrector_cclub), + }, +) + +ee_trigger_weight = electron_trigger_weight.derive( + "ee_trigger_weight", + cls_dict={ + "weight_name": "ee_trigger_weight", + }, +) + +mumu_trigger_weight = muon_trigger_weight.derive( + "mumu_trigger_weight", + cls_dict={ + "weight_name": "mumu_trigger_weight", + }, +) + +emu_e_trigger_weight = electron_trigger_weight.derive( + "emu_e_trigger_weight", + cls_dict={ + "weight_name": "emu_e_trigger_weight", + }, +) + +emu_mu_trigger_weight = muon_trigger_weight.derive( + "emu_mu_trigger_weight", + cls_dict={ + "weight_name": "emu_mu_trigger_weight", + }, +) + + +def reshape_masked_to_oneslike_original(masked_array: ak.Array, mask: ak.Array) -> ak.Array: + """ + Reshape a masked array to a numpy.ones_like array of the original shape. + """ + oneslike_original = np.ones_like(mask) + oneslike_original[mask] = masked_array + return oneslike_original + + +def calculate_correlated_ditrigger_efficiency( + first_trigger_matched: ak.Array, + second_trigger_matched: ak.Array, + first_trigger_effs: ak.Array, + second_trigger_common_object_effs: ak.Array, + second_trigger_other_object_effs: ak.Array, +) -> ak.Array: + """ + Calculate the combination of the single and cross trigger efficiencies. + """ + trigger_efficiency = ( + (first_trigger_effs * first_trigger_matched) + + (second_trigger_other_object_effs * second_trigger_common_object_effs * second_trigger_matched) - + ( + first_trigger_matched * + second_trigger_matched * + second_trigger_other_object_effs * + np.minimum( + first_trigger_effs, + second_trigger_common_object_effs, + ) + ) + ) + return trigger_efficiency + + +def create_trigger_weight( + events: ak.Array, + first_trigger_eff_data: ak.Array, + first_trigger_eff_mc: ak.Array, + second_trigger_common_object_eff_data: ak.Array, + second_trigger_common_object_eff_mc: ak.Array, + second_trigger_other_object_eff_data: ak.Array, + second_trigger_other_object_eff_mc: ak.Array, + channel: od.Channel, + first_trigger_matched: ak.Array, + second_trigger_matched: ak.Array, +) -> ak.Array: + """ + Create the trigger weight for a given channel. + """ + trigger_eff_data = calculate_correlated_ditrigger_efficiency( + first_trigger_matched, + second_trigger_matched, + first_trigger_eff_data, + second_trigger_common_object_eff_data, + second_trigger_other_object_eff_data, + ) + trigger_eff_mc = calculate_correlated_ditrigger_efficiency( + first_trigger_matched, + second_trigger_matched, + first_trigger_eff_mc, + second_trigger_common_object_eff_mc, + second_trigger_other_object_eff_mc, + ) + + # calculate the ratio + trigger_weight = trigger_eff_data / trigger_eff_mc + + # nan happens for all events not in the specific channel, due to efficiency == 0 + # add a failsafe here in case of efficiency 0 for an event actually in the channel + nan_mask = np.isnan(trigger_weight) + if np.any(nan_mask & (events.channel_id == channel.id) & (first_trigger_matched | second_trigger_matched)): + raise ValueError(f"Found nan in {channel.name} trigger weight") + trigger_weight_no_nan = np.nan_to_num(trigger_weight, nan=1.0) + + return trigger_weight_no_nan + + +@producer( + uses={ + "channel_id", "single_triggered", "cross_triggered", # "matched_trigger_ids" + single_trigger_electron_data_effs, cross_trigger_electron_data_effs, + single_trigger_electron_mc_effs, cross_trigger_electron_mc_effs, + single_trigger_muon_data_effs, cross_trigger_muon_data_effs, + single_trigger_muon_mc_effs, cross_trigger_muon_mc_effs, + tau_trigger_effs_cclub, + }, + produces={ + "{e,mu}tau_trigger_weight", + "etau_trigger_weight_e_{up,down}", + "mutau_trigger_weight_mu_{up,down}", + "{e,mu}tau_trigger_weight_tau_dm{0,1,10,11}_{up,down}", + }, +) +def etau_mutau_trigger_weight( + self: Producer, + events: ak.Array, + **kwargs, +) -> ak.Array: + """ + Produces trigger weight for events that fall into the etau and mutau categories. Requires several external files + and configs in the analysis config. + """ + # create e/mu object-level masks for events in the etau and mutau channel, selecting only the leading lepton for + # which the trigger efficiencies were initially calculated (we used the same lepton for matching in the selection) + single_electron_triggered = ( + (events.channel_id == self.config_inst.channels.n.etau.id) & + events.single_triggered & + (ak.local_index(events.Electron) == 0) + ) + single_muon_triggered = ( + (events.channel_id == self.config_inst.channels.n.mutau.id) & + events.single_triggered & + (ak.local_index(events.Muon) == 0) + ) + cross_electron_triggered = ( + (events.channel_id == self.config_inst.channels.n.etau.id) & + events.cross_triggered & + (ak.local_index(events.Electron) == 0) + ) + cross_muon_triggered = ( + (events.channel_id == self.config_inst.channels.n.mutau.id) & + events.cross_triggered & + (ak.local_index(events.Muon) == 0) + ) + + # get efficiencies from the correctionlib producers + + # first, create the efficiencies for the leptons in data + events = self[single_trigger_muon_data_effs](events, single_muon_triggered, **kwargs) + events = self[cross_trigger_muon_data_effs](events, cross_muon_triggered, **kwargs) + events = self[single_trigger_electron_data_effs](events, single_electron_triggered, **kwargs) + events = self[cross_trigger_electron_data_effs](events, cross_electron_triggered, **kwargs) + + # do the same for MC efficiencies + events = self[single_trigger_muon_mc_effs](events, single_muon_triggered, **kwargs) + events = self[cross_trigger_muon_mc_effs](events, cross_muon_triggered, **kwargs) + events = self[single_trigger_electron_mc_effs](events, single_electron_triggered, **kwargs) + events = self[cross_trigger_electron_mc_effs](events, cross_electron_triggered, **kwargs) + + # create all tau efficiencies at object-level + events = self[tau_trigger_effs_cclub](events, **kwargs) + + # create the nominal case + for lepton, channel_name in [("e", "etau"), ("mu", "mutau")]: + channel = self.config_inst.get_channel(channel_name) + single_trigger_lepton_data_effs = events[f"single_trigger_{lepton}_data_effs"] + cross_trigger_lepton_data_effs = events[f"cross_trigger_{lepton}_data_effs"] + single_trigger_lepton_mc_effs = events[f"single_trigger_{lepton}_mc_effs"] + cross_trigger_lepton_mc_effs = events[f"cross_trigger_{lepton}_mc_effs"] + + # make tau efficiencies to event level quantity + cross_trigger_tau_data_effs = ak.prod( + events[f"tau_trigger_eff_data_{channel_name}"], + axis=1, + mask_identity=False, + ) + cross_trigger_tau_mc_effs = ak.prod( + events[f"tau_trigger_eff_mc_{channel_name}"], + axis=1, + mask_identity=False, + ) + + trigger_weight = create_trigger_weight( + events, + single_trigger_lepton_data_effs, + single_trigger_lepton_mc_effs, + cross_trigger_lepton_data_effs, + cross_trigger_lepton_mc_effs, + cross_trigger_tau_data_effs, + cross_trigger_tau_mc_effs, + channel, + ((events.channel_id == channel.id) & events.single_triggered), + ((events.channel_id == channel.id) & events.cross_triggered), + ) + events = set_ak_column_f32(events, f"{channel_name}_trigger_weight", trigger_weight) + + # create the variations + for direction in ["up", "down"]: + for lepton, channel_name in [("e", "etau"), ("mu", "mutau")]: + # e and mu variations + + channel = self.config_inst.get_channel(channel_name) + single_triggered = (events.channel_id == channel.id) & events.single_triggered + cross_triggered = (events.channel_id == channel.id) & events.cross_triggered + + single_trigger_lepton_data_effs = events[f"single_trigger_{lepton}_data_effs_{direction}"] + cross_trigger_lepton_data_effs = events[f"cross_trigger_{lepton}_data_effs_{direction}"] + single_trigger_lepton_mc_effs = events[f"single_trigger_{lepton}_mc_effs_{direction}"] + cross_trigger_lepton_mc_effs = events[f"cross_trigger_{lepton}_mc_effs_{direction}"] + cross_trigger_tau_data_effs = events[f"tau_trigger_eff_data_{channel_name}"] + cross_trigger_tau_mc_effs = events[f"tau_trigger_eff_mc_{channel_name}"] + + # make tau efficiencies to event level quantity + cross_trigger_tau_data_effs = ak.prod( + cross_trigger_tau_data_effs, + axis=1, + mask_identity=False, + ) + cross_trigger_tau_mc_effs = ak.prod( + cross_trigger_tau_mc_effs, + axis=1, + mask_identity=False, + ) + + trigger_weight = create_trigger_weight( + events, + single_trigger_lepton_data_effs, + single_trigger_lepton_mc_effs, + cross_trigger_lepton_data_effs, + cross_trigger_lepton_mc_effs, + cross_trigger_tau_data_effs, + cross_trigger_tau_mc_effs, + channel, + single_triggered, + cross_triggered, + ) + events = set_ak_column_f32(events, f"{channel.name}_trigger_weight_{lepton}_{direction}", trigger_weight) + + # tau variations + single_trigger_lepton_data_effs = events[f"single_trigger_{lepton}_data_effs"] + cross_trigger_lepton_data_effs = events[f"cross_trigger_{lepton}_data_effs"] + single_trigger_lepton_mc_effs = events[f"single_trigger_{lepton}_mc_effs"] + cross_trigger_lepton_mc_effs = events[f"cross_trigger_{lepton}_mc_effs"] + + for dm in [0, 1, 10, 11]: + trigger_weight = create_trigger_weight( + events, + single_trigger_lepton_data_effs, + single_trigger_lepton_mc_effs, + cross_trigger_lepton_data_effs, + cross_trigger_lepton_mc_effs, + ak.prod(events[f"tau_trigger_eff_data_{channel.name}_dm{dm}_{direction}"], axis=1, mask_identity=False), # noqa: E501 + ak.prod(events[f"tau_trigger_eff_mc_{channel.name}_dm{dm}_{direction}"], axis=1, mask_identity=False), # noqa: E501 + channel, + single_triggered, + cross_triggered, + ) + events = set_ak_column_f32(events, f"{channel.name}_trigger_weight_tau_dm{dm}_{direction}", trigger_weight) # noqa: E501 + + return events + + +@producer( + uses={ + "channel_id", "matched_trigger_ids", + tau_trigger_effs_cclub, jet_trigger_efficiencies, + "Jet.{pt,eta,phi,mass}", + }, + produces={ + "tautau_trigger_weight", + "tautau_trigger_weight_jet_{up,down}", + "tautau_trigger_weight_tau_dm{0,1,10,11}_{up,down}", + }, +) +def tautau_trigger_weight( + self: Producer, + events: ak.Array, + **kwargs, +) -> ak.Array: + """ + Produces trigger weight for events that fall into the tautau category. Requires several external file and configs + in the analysis config. + """ + channel = self.config_inst.channels.n.tautau + + # create all tau efficiencies + events = self[tau_trigger_effs_cclub](events, **kwargs) + + # find out which tautau triggers are passed + tt_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + ttj_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + # ttv_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + for trigger in self.config_inst.x.triggers: + if trigger.has_tag("cross_tau_tau"): + tt_trigger_passed = tt_trigger_passed | np.any(events.matched_trigger_ids == trigger.id, axis=-1) + if trigger.has_tag("cross_tau_tau_jet"): + ttj_trigger_passed = ttj_trigger_passed | np.any(events.matched_trigger_ids == trigger.id, axis=-1) # noqa + # if trigger.has_tag("cross_tau_tau_vbf"): + # ttv_trigger_passed = ttv_trigger_passed | np.any(events.matched_trigger_ids == trigger.id, axis=-1) # noqa + + tt_triggered = ((events.channel_id == channel.id) & tt_trigger_passed) + ttj_triggered = ((events.channel_id == channel.id) & ttj_trigger_passed) + # ttv_triggered = ((events.channel_id == channel.id) & ttv_trigger_passed) # vbf treatment left out from here on + + sorted_jet_indices = ak.argsort(events.Jet.pt, axis=1, ascending=False) + leading_Jet_mask = (ak.zeros_like(events.Jet.pt, dtype=int) == ak.local_index(events.Jet.pt)[sorted_hhbjet_indices]) # noqa + jet_mask = (ttj_triggered & leading_Jet_mask) + # create jet trigger efficiencies + events = self[jet_trigger_efficiencies](events, jet_mask, **kwargs) + + # tau efficiencies + # make ditau efficiencies to event level quantity + tt_data_effs = ak.prod(events.tau_trigger_eff_data_tautau, axis=1, mask_identity=False) + tt_mc_effs = ak.prod(events.tau_trigger_eff_mc_tautau, axis=1, mask_identity=False) + ttj_tau_data_effs = ak.prod(events.tau_trigger_eff_data_tautaujet, axis=1, mask_identity=False) + ttj_tau_mc_effs = ak.prod(events.tau_trigger_eff_mc_tautaujet, axis=1, mask_identity=False) + + # jet efficiencies + # make jet efficiencies to event level quantity + # there should be only one such efficiency for the tautaujet trigger + ttj_jet_data_effs = ak.prod(events.jet_trigger_eff_data, axis=1, mask_identity=False) + ttj_jet_mc_effs = ak.prod(events.jet_trigger_eff_mc, axis=1, mask_identity=False) + + trigger_weight = create_trigger_weight( + events, + tt_data_effs, + tt_mc_effs, + ttj_tau_data_effs, + ttj_tau_mc_effs, + ttj_jet_data_effs, + ttj_jet_mc_effs, + channel=channel, + first_trigger_matched=tt_triggered, + second_trigger_matched=ttj_triggered, + ) + events = set_ak_column_f32(events, "tautau_trigger_weight", trigger_weight) + + for direction in ["up", "down"]: + # jet variations + + # tau efficiencies + # make ditau efficiencies to event level quantity + tt_data_effs = ak.prod(events.tau_trigger_eff_data_tautau, axis=1, mask_identity=False) + tt_mc_effs = ak.prod(events.tau_trigger_eff_mc_tautau, axis=1, mask_identity=False) + ttj_tau_data_effs = ak.prod(events.tau_trigger_eff_data_tautaujet, axis=1, mask_identity=False) + ttj_tau_mc_effs = ak.prod(events.tau_trigger_eff_mc_tautaujet, axis=1, mask_identity=False) + + # jet efficiencies + # make jet efficiencies to event level quantity + # there should be only one such efficiency for the tautaujet trigger + ttj_jet_data_effs = ak.prod(events[f"jet_trigger_eff_data_{direction}"], axis=1, mask_identity=False) + ttj_jet_mc_effs = ak.prod(events[f"jet_trigger_eff_mc_{direction}"], axis=1, mask_identity=False) + + trigger_weight = create_trigger_weight( + events, + tt_data_effs, + tt_mc_effs, + ttj_tau_data_effs, + ttj_tau_mc_effs, + ttj_jet_data_effs, + ttj_jet_mc_effs, + channel=channel, + first_trigger_matched=tt_triggered, + second_trigger_matched=ttj_triggered, + ) + events = set_ak_column_f32(events, f"tautau_trigger_weight_jet_{direction}", trigger_weight) + + # tau variations + + # jet efficiencies + # make jet efficiencies to event level quantity + # there should be only one such efficiency for the tautaujet trigger + ttj_jet_data_effs = ak.prod(events.jet_trigger_eff_data, axis=1, mask_identity=False) + ttj_jet_mc_effs = ak.prod(events.jet_trigger_eff_mc, axis=1, mask_identity=False) + + for dm in [0, 1, 10, 11]: + trigger_weight = create_trigger_weight( + events, + ak.prod(events[f"tau_trigger_eff_data_tautau_dm{dm}_{direction}"], axis=1, mask_identity=False), + ak.prod(events[f"tau_trigger_eff_mc_tautau_dm{dm}_{direction}"], axis=1, mask_identity=False), + ak.prod(events[f"tau_trigger_eff_data_tautaujet_dm{dm}_{direction}"], axis=1, mask_identity=False), + ak.prod(events[f"tau_trigger_eff_mc_tautaujet_dm{dm}_{direction}"], axis=1, mask_identity=False), + ttj_jet_data_effs, + ttj_jet_mc_effs, + channel=channel, + first_trigger_matched=tt_triggered, + second_trigger_matched=ttj_triggered, + ) + events = set_ak_column_f32(events, f"tautau_trigger_weight_tau_dm{dm}_{direction}", trigger_weight) + + return events + + +@producer( + uses={ + "channel_id", "matched_trigger_ids", + emu_e_trigger_weight, emu_mu_trigger_weight, + }, + produces={ + "emu_trigger_weight", + "emu_trigger_weight_{e,mu}_{up,down}", + }, +) +def emu_trigger_weight( + self: Producer, + events: ak.Array, + **kwargs, +) -> ak.Array: + """ + Producer for emu trigger scale factors. + """ + # find out which triggers are passed + mu_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + e_trigger_passed = ak.zeros_like(events.channel_id, dtype=np.bool) + for trigger in self.config_inst.x.triggers: + if trigger.has_tag("single_mu"): + mu_trigger_passed = mu_trigger_passed | np.any(events.matched_trigger_ids == trigger.id, axis=-1) + if trigger.has_tag("single_e"): + e_trigger_passed = e_trigger_passed | np.any(events.matched_trigger_ids == trigger.id, axis=-1) + + # create e/mu object-level masks for events in the emu channel, selecting only the leading lepton for + # which the trigger efficiencies were initially calculated (we used the same lepton for matching in the selection) + muon_object_mask = ( + (events.channel_id == self.config_inst.channels.n.emu.id) & + mu_trigger_passed & + (ak.local_index(events.Muon) == 0) + ) + electron_object_mask = ( + (events.channel_id == self.config_inst.channels.n.emu.id) & + e_trigger_passed & + (ak.local_index(events.Electron) == 0) + ) + + # calculate the scale factors for the triggered objects, if the object was not triggered, the SF is 1 + # therefore we can just multiply the SFs for the objects in the event + events = self[emu_e_trigger_weight](events, electron_mask=electron_object_mask, **kwargs) + events = self[emu_mu_trigger_weight](events, muon_mask=muon_object_mask, **kwargs) + + # nominal case + trigger_weight = events.emu_e_trigger_weight * events.emu_mu_trigger_weight + events = set_ak_column_f32(events, "emu_trigger_weight", trigger_weight) + + # e and mu variations + for direction in ["up", "down"]: + # e + trigger_weight = events[f"emu_e_trigger_weight_{direction}"] * events.emu_mu_trigger_weight + events = set_ak_column_f32(events, f"emu_trigger_weight_e_{direction}", trigger_weight) + # mu + trigger_weight = events.emu_e_trigger_weight * events[f"emu_mu_trigger_weight_{direction}"] + events = set_ak_column_f32(events, f"emu_trigger_weight_mu_{direction}", trigger_weight) + + return events + + +@producer( + uses={ + "channel_id", + ee_trigger_weight, + mumu_trigger_weight, + }, + produces={ + "ee_trigger_weight", + "mumu_trigger_weight", + "mumu_trigger_weight_mu_{up,down}", + "ee_trigger_weight_e_{up,down}", + }, +) +def ee_mumu_trigger_weight( + self: Producer, + events: ak.Array, + **kwargs, +) -> ak.Array: + """ + Producer for ee and mumu trigger scale factors. + """ + + ee_mask = (events.channel_id == self.config_inst.channels.n.ee.id) & (ak.local_index(events.Electron) == 0) + mumu_mask = (events.channel_id == self.config_inst.channels.n.mumu.id) & (ak.local_index(events.Muon) == 0) + events = self[ee_trigger_weight](events, electron_mask=ee_mask, **kwargs) + events = self[mumu_trigger_weight](events, muon_mask=mumu_mask, **kwargs) + + # create the columns + events = self[ee_trigger_weight](events, **kwargs) + events = self[mumu_trigger_weight](events, **kwargs) + + # rename ee and mumu variations for consistency + for direction in ["up", "down"]: + events = set_ak_column_f32( + events, + f"mumu_trigger_weight_mu_{direction}", + events[f"mumu_trigger_weight_{direction}"], + ) + events = set_ak_column_f32( + events, + f"ee_trigger_weight_e_{direction}", + events[f"ee_trigger_weight_{direction}"], + ) + + return events + + +@producer( + uses={ + etau_mutau_trigger_weight, + tautau_trigger_weight, + ee_mumu_trigger_weight, + emu_trigger_weight, + }, + produces={ + "trigger_weight", + "trigger_weight_{e,mu,jet}_{up,down}", + "trigger_weight_tau_dm{0,1,10,11}_{up,down}", + }, +) +def trigger_weight( + self: Producer, + events: ak.Array, + **kwargs, +) -> ak.Array: + """ + Producer for trigger scale factors. + """ + # etau and mutau + events = self[etau_mutau_trigger_weight](events, **kwargs) + + # tautau + events = self[tautau_trigger_weight](events, **kwargs) + + # ee and mumu + events = self[ee_mumu_trigger_weight](events, **kwargs) + + # emu + events = self[emu_trigger_weight](events, **kwargs) + + # get channels + channels = { + channel_name: self.config_inst.channels.get(channel_name) + for channel_name in ["etau", "mutau", "tautau", "ee", "mumu", "emu"] + } + + # create the total trigger scale factor + # A multiplication is done here, as every the columns used contain the value 1.0 for events not in the channel + # and the channels are mutually exclusive + for channel_name, channel in channels.items(): + channel_mask = (events.channel_id == channel.id) + if not ak.all(channel_mask | (events[f"{channel_name}_trigger_weight"] == 1.0)): + raise ValueError(f"trigger weight for {channel_name} not all 1.0 for events not in the channel") + trigger_weight = ( + events.etau_trigger_weight * + events.mutau_trigger_weight * + events.tautau_trigger_weight * + events.ee_trigger_weight * + events.mumu_trigger_weight * + events.emu_trigger_weight + ) + events = set_ak_column_f32(events, "trigger_weight", trigger_weight) + + # create the variations + # Do to the choice of triggers, certain channel do not have variations for certain objects + # e.g. etau does not have a muon or jet dependent trigger, therefore the variations are not defined + # we check further down that the columns do not exist for only these specific cases + undefined_variations = { + ("etau", "mu"), ("etau", "jet"), + ("mutau", "e"), ("mutau", "jet"), + ("tautau", "e"), ("tautau", "mu"), + ("ee", "mu"), ("ee", "jet"), + ("ee", "tau_dm0"), ("ee", "tau_dm1"), ("ee", "tau_dm10"), ("ee", "tau_dm11"), + ("mumu", "e"), ("mumu", "jet"), + ("mumu", "tau_dm0"), ("mumu", "tau_dm1"), ("mumu", "tau_dm10"), ("mumu", "tau_dm11"), + ("emu", "jet"), + ("emu", "tau_dm0"), ("emu", "tau_dm1"), ("emu", "tau_dm10"), ("emu", "tau_dm11"), + } + + for direction in ["up", "down"]: + for variation in ["e", "mu", "tau_dm0", "tau_dm1", "tau_dm10", "tau_dm11", "jet"]: + trigger_weight = events.trigger_weight + weight_name = "trigger_weight" + varied_weight_name = f"{weight_name}_{variation}_{direction}" + for channel_name, channel in channels.items(): + # for all variations, the default is the nominal trigger weight + channel_weight_name = f"{channel_name}_{varied_weight_name}" + if channel_weight_name not in events.fields: + if (channel_name, variation) not in undefined_variations: + raise ValueError(f"trigger weight variation {channel_weight_name} not found in events") + channel_weight_name = f"{channel_name}_{weight_name}" + variation_array = events[channel_weight_name] + + # update the trigger weight with the value for the variation + channel_mask = (events.channel_id == channel.id) + trigger_weight = ak.where(channel_mask, variation_array, trigger_weight) + + events = set_ak_column_f32(events, varied_weight_name, trigger_weight) + return events diff --git a/multilepton/production/weights.py b/multilepton/production/weights.py new file mode 100644 index 00000000..a3e9ac85 --- /dev/null +++ b/multilepton/production/weights.py @@ -0,0 +1,278 @@ +# coding: utf-8 + +""" +Column production methods related to generic event weights. +""" + +from __future__ import annotations + +import functools + +import law + +from columnflow.production import Producer, producer +from columnflow.production.cms.pileup import pu_weight +from columnflow.production.cms.pdf import pdf_weights +from columnflow.production.cms.scale import murmuf_weights +from columnflow.production.cms.parton_shower import ps_weights +from columnflow.util import maybe_import, safe_div +from columnflow.columnar_util import set_ak_column + + +ak = maybe_import("awkward") +np = maybe_import("numpy") +hist = maybe_import("hist") + +# helper +set_ak_column_f32 = functools.partial(set_ak_column, value_type=np.float32) + + +@producer( + uses={pu_weight.PRODUCES, "process_id"}, + mc_only=True, +) +def normalized_pu_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: + for weight_name in self.pu_weight_names: + # create a weight vector starting with ones + norm_weight_per_pid = np.ones(len(events), dtype=np.float32) + + # fill weights with a new mask per unique process id (mostly just one) + for pid in self.unique_process_ids: + pid_mask = events.process_id == pid + norm_weight_per_pid[pid_mask] = self.ratio_per_pid[weight_name][pid] + + # multiply with actual weight + norm_weight_per_pid = norm_weight_per_pid * events[weight_name] + + # store it + norm_weight_per_pid = ak.values_astype(norm_weight_per_pid, np.float32) + events = set_ak_column_f32(events, f"normalized_{weight_name}", norm_weight_per_pid) + + return events + + +@normalized_pu_weight.post_init +def normalized_pu_weight_post_init(self: Producer, task: law.Task, **kwargs) -> None: + # remember pu columns to read and produce + self.pu_weight_names = { + weight_name + for weight_name in map(str, self[pu_weight].produced_columns) + if ( + weight_name.startswith("pu_weight") and + (task.global_shift_inst.is_nominal or not weight_name.endswith(("_up", "_down"))) + ) + } + + +@normalized_pu_weight.requires +def normalized_pu_weight_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: + from columnflow.tasks.selection import MergeSelectionStats + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( + task, + branch=-1 if task.is_workflow() else 0, + ) + + +@normalized_pu_weight.setup +def normalized_pu_weight_setup(self: Producer, task: law.Task, inputs: dict, **kwargs) -> None: + # load the selection stats + hists = task.cached_value( + key="selection_hists", + func=lambda: inputs["selection_stats"]["hists"].load(formatter="pickle"), + ) + + # get the unique process ids in that dataset + self.unique_process_ids = list(hists["sum_mc_weight_pu_weight"].axes["process"]) + + # helper to get numerators and denominators + def get_sum(pid, weight_name="", /): + if weight_name: + weight_name = "_" + weight_name + key = f"sum_mc_weight{weight_name}" + return hists[key][{"process": hist.loc(pid)}].sum().value + + # extract the ratio per weight and pid + self.ratio_per_pid = { + weight_name: { + pid: safe_div(get_sum(pid), get_sum(pid, weight_name)) + for pid in self.unique_process_ids + } + for weight_name in (str(route) for route in self[pu_weight].produced_columns) + if weight_name.startswith("pu_weight") + } + + +@producer( + uses={pdf_weights.PRODUCES}, + mc_only=True, +) +def normalized_pdf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: + for weight_name in self.pdf_weight_names: + # create the normalized weight + avg = self.average_pdf_weights[weight_name] + normalized_weight = events[weight_name] / avg + + # store it + events = set_ak_column_f32(events, f"normalized_{weight_name}", normalized_weight) + + return events + + +@normalized_pdf_weight.post_init +def normalized_pdf_weight_post_init(self: Producer, task: law.Task, **kwargs) -> None: + # remember pdf columns to read and produce + self.pdf_weight_names = { + weight_name + for weight_name in map(str, self[pdf_weights].produced_columns) + if ( + weight_name.startswith("pdf_weight") and + (task.global_shift_inst.is_nominal or not weight_name.endswith(("_up", "_down"))) + ) + } + # adjust columns + self.uses.clear() + self.uses |= self.pdf_weight_names + self.produces |= {f"normalized_{weight_name}" for weight_name in self.pdf_weight_names} + + +@normalized_pdf_weight.requires +def normalized_pdf_weight_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: + from columnflow.tasks.selection import MergeSelectionStats + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( + task, + branch=-1 if task.is_workflow() else 0, + ) + + +@normalized_pdf_weight.setup +def normalized_pdf_weight_setup(self: Producer, task: law.Task, inputs: dict, **kwargs) -> None: + # load the selection stats + hists = task.cached_value( + key="selection_hists", + func=lambda: inputs["selection_stats"]["hists"].load(formatter="pickle"), + ) + + # save average weights + self.average_pdf_weights = { + weight_name: safe_div(hists[f"sum_{weight_name}"].sum().value, hists["num_events"].sum()) + for weight_name in self.pdf_weight_names + } + + +# variation of the pdf weights producer that does not store up and down shifted weights +# but that stores all available pdf weights for the full treatment based on histograms +all_pdf_weights = pdf_weights.derive("all_pdf_weights", cls_dict={"store_all_weights": True}) + + +@producer( + uses={murmuf_weights.PRODUCES}, + mc_only=True, +) +def normalized_murmuf_weight(self: Producer, events: ak.Array, **kwargs) -> ak.Array: + for weight_name in self.mu_weight_names: + # create the normalized weight + avg = self.average_mu_weights[weight_name] + normalized_weight = events[weight_name] / avg + + # store it + events = set_ak_column_f32(events, f"normalized_{weight_name}", normalized_weight) + + return events + + +@normalized_murmuf_weight.post_init +def normalized_murmuf_weight_post_init(self: Producer, task: law.Task, **kwargs) -> None: + # remember mur/muf columns to read and produce + self.mu_weight_names = { + weight_name + for weight_name in map(str, self[murmuf_weights].produced_columns) + if ( + weight_name.startswith("murmuf_weight") and + (task.global_shift_inst.is_nominal or not weight_name.endswith(("_up", "_down"))) + ) + } + # adjust columns + self.uses.clear() + self.uses |= self.mu_weight_names + self.produces |= {f"normalized_{weight_name}" for weight_name in self.mu_weight_names} + + +@normalized_murmuf_weight.requires +def normalized_murmuf_weight_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: + from columnflow.tasks.selection import MergeSelectionStats + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( + task, + branch=-1 if task.is_workflow() else 0, + ) + + +@normalized_murmuf_weight.setup +def normalized_murmuf_weight_setup(self: Producer, task: law.Task, inputs: dict, **kwargs) -> None: + # load the selection stats + hists = task.cached_value( + key="selection_hists", + func=lambda: inputs["selection_stats"]["hists"].load(formatter="pickle"), + ) + + # save average weights + self.average_mu_weights = { + weight_name: safe_div(hists[f"sum_{weight_name}"].sum().value, hists["num_events"].sum()) + for weight_name in self.mu_weight_names + } + + +@producer( + uses={ps_weights.PRODUCES}, + mc_only=True, +) +def normalized_ps_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Array: + for weight_name in self.ps_weight_names: + # create the normalized weight + avg = self.average_ps_weights[weight_name] + normalized_weight = events[weight_name] / avg + + # store it + events = set_ak_column_f32(events, f"normalized_{weight_name}", normalized_weight) + + return events + + +@normalized_ps_weights.post_init +def normalized_ps_weights_post_init(self: Producer, task: law.Task, **kwargs) -> None: + # remember ps weight columns to read and produce + self.ps_weight_names = { + weight_name + for weight_name in map(str, self[ps_weights].produced_columns) + if ( + "weight" in weight_name and + (task.global_shift_inst.is_nominal or not weight_name.endswith(("_up", "_down"))) + ) + } + # adjust columns + self.uses.clear() + self.uses |= self.ps_weight_names + self.produces |= {f"normalized_{weight_name}" for weight_name in self.ps_weight_names} + + +@normalized_ps_weights.requires +def normalized_ps_weights_requires(self: Producer, task: law.Task, reqs: dict, **kwargs) -> None: + from columnflow.tasks.selection import MergeSelectionStats + reqs["selection_stats"] = MergeSelectionStats.req_different_branching( + task, + branch=-1 if task.is_workflow() else 0, + ) + + +@normalized_ps_weights.setup +def normalized_ps_weights_setup(self: Producer, task: law.Task, inputs: dict, **kwargs) -> None: + # load the selection stats + hists = task.cached_value( + key="selection_hists", + func=lambda: inputs["selection_stats"]["hists"].load(formatter="pickle"), + ) + + # save average weights + self.average_ps_weights = { + weight_name: safe_div(hists[f"sum_{weight_name}"].sum().value, hists["num_events"].sum()) + for weight_name in self.ps_weight_names + } diff --git a/multilepton/reduction/__init__.py b/multilepton/reduction/__init__.py new file mode 100644 index 00000000..57d631c3 --- /dev/null +++ b/multilepton/reduction/__init__.py @@ -0,0 +1 @@ +# coding: utf-8 diff --git a/multilepton/reduction/default.py b/multilepton/reduction/default.py new file mode 100644 index 00000000..bdb7a787 --- /dev/null +++ b/multilepton/reduction/default.py @@ -0,0 +1,53 @@ +# coding: utf-8 + +""" +Custom event and object reducers. +""" + +from columnflow.reduction import Reducer, reducer +from columnflow.reduction.default import cf_default +from columnflow.production.cms.dy import gen_dilepton, recoil_corrected_met +from columnflow.production.cms.gen_particles import gen_higgs_lookup, gen_top_lookup, gen_dy_lookup +from columnflow.util import maybe_import + +from multilepton.util import IF_DATASET_HAS_HIGGS, IF_DATASET_HAS_TOP, IF_DATASET_IS_DY + +ak = maybe_import("awkward") + + +@reducer( + uses={ + cf_default, + IF_DATASET_HAS_HIGGS(gen_higgs_lookup), + IF_DATASET_HAS_TOP(gen_top_lookup), + IF_DATASET_IS_DY(gen_dy_lookup, gen_dilepton, recoil_corrected_met), + }, + produces={ + cf_default, + IF_DATASET_HAS_HIGGS(gen_higgs_lookup), + IF_DATASET_HAS_TOP(gen_top_lookup), + IF_DATASET_IS_DY(gen_dy_lookup, gen_dilepton, recoil_corrected_met), + }, + check_produced_columns=False, +) +def default(self: Reducer, events: ak.Array, selection: ak.Array, **kwargs) -> ak.Array: + # run cf's default reduction which handles event selection and collection creation + events = self[cf_default](events, selection, **kwargs) + # when there are no events left, return immediately + # (ReduceEvents would anyway not write this chunk to disk and skips it during merging) + if len(events) == 0: + return events + # add generator particles, depending on the dataset + if self.has_dep(gen_higgs_lookup): + events = self[gen_higgs_lookup](events, **kwargs) + if self.has_dep(gen_top_lookup): + events = self[gen_top_lookup](events, **kwargs) + if self.has_dep(gen_dy_lookup): + events = self[gen_dy_lookup](events, **kwargs) + # TODO: is gen_dilepton redundant to what gen_dy_lookup provides? + if self.has_dep(gen_dilepton): + events = self[gen_dilepton](events, **kwargs) + # add recoil corrected met + if self.has_dep(recoil_corrected_met): + events = self[recoil_corrected_met](events, **kwargs) + return events diff --git a/multilepton/selection/__init__.py b/multilepton/selection/__init__.py new file mode 100644 index 00000000..57d631c3 --- /dev/null +++ b/multilepton/selection/__init__.py @@ -0,0 +1 @@ +# coding: utf-8 diff --git a/hbt/selection/default.py b/multilepton/selection/default.py similarity index 63% rename from hbt/selection/default.py rename to multilepton/selection/default.py index c0f3c141..e7a13371 100644 --- a/hbt/selection/default.py +++ b/multilepton/selection/default.py @@ -11,52 +11,49 @@ from collections import defaultdict import law +import order as od +from columnflow.selection.cms.met_filters import met_filters as cf_met_filters from columnflow.selection import Selector, SelectionResult, selector -from columnflow.selection.stats import increment_stats from columnflow.selection.cms.json_filter import json_filter -from columnflow.selection.cms.met_filters import met_filters from columnflow.selection.cms.jets import jet_veto_map from columnflow.production.processes import process_ids from columnflow.production.cms.mc_weight import mc_weight from columnflow.production.cms.pileup import pu_weight from columnflow.production.cms.pdf import pdf_weights from columnflow.production.cms.scale import murmuf_weights -from columnflow.production.cms.top_pt_weight import gen_parton_top +from columnflow.production.cms.parton_shower import ps_weights from columnflow.production.util import attach_coffea_behavior -from columnflow.columnar_util import full_like -from columnflow.util import maybe_import +from columnflow.columnar_util import Route, set_ak_column, full_like +from columnflow.hist_util import create_hist_from_variables, fill_hist +from columnflow.util import maybe_import, DotDict from columnflow.types import Iterable -from hbt.selection.trigger import trigger_selection -from hbt.selection.lepton import lepton_selection -from hbt.selection.jet import jet_selection -import hbt.production.processes as process_producers -from hbt.production.btag import btag_weights_deepjet, btag_weights_pnet -from hbt.production.features import cutflow_features -from hbt.production.patches import patch_ecalBadCalibFilter -from hbt.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3 +import multilepton.production.processes as process_producers + +from multilepton.selection.trigger import trigger_selection +from multilepton.selection.lepton import lepton_selection +from multilepton.selection.jet import jet_selection +from multilepton.production.btag import btag_weights_deepjet, btag_weights_pnet +from multilepton.production.features import cutflow_features +from multilepton.production.patches import patch_ecalBadCalibFilter +from multilepton.util import IF_DATASET_HAS_LHE_WEIGHTS, IF_RUN_3, IF_RUN_3_NOT_NANO_V15 np = maybe_import("numpy") ak = maybe_import("awkward") - - +hist = maybe_import("hist") logger = law.logger.get_logger(__name__) # updated met_filters selector to define dataset dependent filters def get_met_filters(self: Selector) -> Iterable[str]: - if getattr(self, "dataset_inst", None) is None: - return {} - met_filters = set(self.config_inst.x.met_filters[self.dataset_inst.data_source]) if self.dataset_inst.has_tag("broken_ecalBadCalibFilter"): met_filters -= {"Flag.ecalBadCalibFilter"} - return list(met_filters) -hbt_met_filters = met_filters.derive("hbt_met_filters", cls_dict={"get_met_filters": get_met_filters}) +met_filters = cf_met_filters.derive("met_filters", cls_dict={"get_met_filters": get_met_filters}) # helper to identify bad events that should be considered missing altogether @@ -78,21 +75,20 @@ def get_bad_events(self: Selector, events: ak.Array) -> ak.Array: logger.warning( f"found {ak.sum(bad_lhe_mask)} events ({frac * 100:.1f}%) with bad LHEPdfWeights", ) - return bad_mask @selector( uses={ - json_filter, hbt_met_filters, IF_RUN_3(jet_veto_map), trigger_selection, lepton_selection, - jet_selection, mc_weight, pu_weight, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), - process_ids, cutflow_features, increment_stats, attach_coffea_behavior, - patch_ecalBadCalibFilter, IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), + json_filter, met_filters, IF_RUN_3_NOT_NANO_V15(jet_veto_map), trigger_selection, lepton_selection, jet_selection, + mc_weight, pu_weight, ps_weights, btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), process_ids, + cutflow_features, attach_coffea_behavior, patch_ecalBadCalibFilter, + IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), }, produces={ - trigger_selection, lepton_selection, jet_selection, mc_weight, pu_weight, - btag_weights_deepjet, IF_RUN_3(btag_weights_pnet), process_ids, cutflow_features, - increment_stats, IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), + trigger_selection, lepton_selection, jet_selection, mc_weight, pu_weight, ps_weights, btag_weights_deepjet, + process_ids, cutflow_features, IF_RUN_3(btag_weights_pnet), + IF_DATASET_HAS_LHE_WEIGHTS(pdf_weights, murmuf_weights), }, exposed=True, ) @@ -100,29 +96,26 @@ def default( self: Selector, events: ak.Array, stats: defaultdict, + hists: DotDict[str, hist.Hist], **kwargs, ) -> tuple[ak.Array, SelectionResult]: # ensure coffea behavior events = self[attach_coffea_behavior](events, **kwargs) - # prepare the selection results that are updated at every step results = SelectionResult() - # before performing selection steps, drop events that should not be considered at all and # maintain a mask "no_sel" that refers to events that are kept bad_mask = get_bad_events(self, events) no_sel = ~bad_mask results += SelectionResult(steps={"bad": no_sel}) - # filter bad data events according to golden lumi mask if self.dataset_inst.is_data: events, json_filter_results = self[json_filter](events, **kwargs) results += json_filter_results else: results += SelectionResult(steps={"json": full_like(events.event, True, dtype=bool)}) - # met filter selection - events, met_filter_results = self[hbt_met_filters](events, **kwargs) + events, met_filter_results = self[met_filters](events, **kwargs) # patch for the broken "Flag_ecalBadCalibFilter" MET filter in prompt data (tag set in config) if self.dataset_inst.has_tag("broken_ecalBadCalibFilter"): # fold decision into met filter results @@ -132,7 +125,7 @@ def default( events.patchedEcalBadCalibFilter ) results += met_filter_results - + # jet veto map if self.has_dep(jet_veto_map): events, veto_result = self[jet_veto_map](events, **kwargs) @@ -153,7 +146,6 @@ def default( # mc-only functions if self.dataset_inst.is_mc: events = self[mc_weight](events, **kwargs) - # pdf weights if self.has_dep(pdf_weights): events = self[pdf_weights]( @@ -167,6 +159,9 @@ def default( if self.has_dep(murmuf_weights): events = self[murmuf_weights](events, **kwargs) + # parton shower weights + events = self[ps_weights](events, invalid_weights_action="ignore_one", **kwargs) + # pileup weights events = self[pu_weight](events, **kwargs) @@ -194,9 +189,11 @@ def default( else: events = self[process_ids](events, **kwargs) - # some cutflow features + # create jet collections for categorization + events["FatJet"] = events.FatJet[results.objects.FatJet.FatJet] + # store number of jets for stats and histograms + events = set_ak_column(events, "n_jets_stats", results.x.n_central_jets, value_type=np.int32) events = self[cutflow_features](events, results.objects, **kwargs) - # combined event selection after all steps event_sel = reduce(and_, results.steps.values()) results.event = event_sel @@ -211,29 +208,25 @@ def event_sel_nob(btag_weight_cls): return var_sel # increment stats - events, results = setup_and_increment_stats( + events, results = increment_stats( self, events=events, + task=kwargs["task"], results=results, stats=stats, + hists=hists, no_sel=no_sel, event_sel=event_sel, event_sel_variations={ "nob_deepjet": event_sel_nob(btag_weights_deepjet), "nob_pnet": event_sel_nob(btag_weights_pnet) if self.has_dep(btag_weights_pnet) else None, }, - njets=results.x.n_central_jets, - **kwargs, ) - return events, results @default.init -def default_init(self: Selector) -> None: - if getattr(self, "dataset_inst", None) is None: - return - +def default_init(self: Selector, **kwargs) -> None: # build and store derived process id producers for tag in ("dy", "w_lnu"): prod_name = f"process_ids_{tag}" @@ -262,22 +255,42 @@ def default_init(self: Selector) -> None: # save it as an attribute setattr(self, prod_name, prod) - if self.dataset_inst.has_tag("ttbar"): - self.uses.add(gen_parton_top) - self.produces.add(gen_parton_top) + +@default.setup +def default_setup(self: Selector, task: law.Task, **kwargs) -> None: + # pre-define variable objects for creating stats histograms + self.hist_vars = [ + od.Variable( + name="process", + expression="process_id", + aux={ + "axis_type": "intcat", + "axis_kwargs": {"growth": True}, + }, + ), + od.Variable( + name="n_jets", + expression="n_jets_stats", + binning=list(range(9)), + aux={ + "axis_type": "int", + "axis_kwargs": {"growth": True}, + }, + ), + ] empty = default.derive("empty", cls_dict={}) @empty.init -def empty_init(self: Selector) -> None: - super(empty, self).init_func() +def empty_init(self: Selector, **kwargs) -> None: + super(empty, self).init_func(**kwargs) # remove unused dependencies unused = { json_filter, - hbt_met_filters, + met_filters, cutflow_features, patch_ecalBadCalibFilter, jet_selection, @@ -297,6 +310,7 @@ def empty_call( self: Selector, events: ak.Array, stats: defaultdict, + hists: DotDict[str, hist.Hist], **kwargs, ) -> tuple[ak.Array, SelectionResult]: """ @@ -330,6 +344,9 @@ def empty_call( if self.has_dep(murmuf_weights): events = self[murmuf_weights](events, **kwargs) + # parton shower weights + events = self[ps_weights](events, invalid_weights_action="ignore_one", **kwargs) + # pileup weights events = self[pu_weight](events, **kwargs) @@ -363,53 +380,58 @@ def empty_call( events = set_ak_column(events, "tau2_isolated", np.zeros(len(events), dtype=bool)) events = set_ak_column(events, "cross_triggered", np.zeros(len(events), dtype=bool)) events = set_ak_column(events, "single_triggered", np.zeros(len(events), dtype=bool)) + events = set_ak_column(events, "tight_sel", np.zeros(len(events), dtype=bool)) + events = set_ak_column(events, "trig_match", np.zeros(len(events), dtype=bool)) + events = set_ak_column(events, "tight_sel_bdt", np.zeros(len(events), dtype=bool)) + events = set_ak_column(events, "trig_match_bdt", np.zeros(len(events), dtype=bool)) + + # store number of jets for stats and histograms + events = set_ak_column(events, "n_jets_stats", ak.num(events.Jet, axis=1), value_type=np.int32) # trivial selection mask capturing all events results.event = np.ones(len(events), dtype=bool) # increment stats - events, results = setup_and_increment_stats( + events, results = increment_stats( self, events=events, + task=kwargs["task"], results=results, stats=stats, + hists=hists, no_sel=no_sel, event_sel=results.event, event_sel_variations={ "nob_deepjet": results.event, "nob_pnet": results.event if self.has_dep(btag_weights_pnet) else None, }, - njets=ak.num(events.Jet, axis=1), - **kwargs, ) - return events, results -def setup_and_increment_stats( +def increment_stats( self: Selector, *, events: ak.Array, + task: law.Task, results: SelectionResult, stats: defaultdict, - no_sel: np.ndarray | ak.Array | type(Ellipsis), + hists: DotDict[str, hist.Hist], + no_sel: np.ndarray | ak.Array, event_sel: np.ndarray | ak.Array, event_sel_variations: dict[str, np.ndarray | ak.Array] | None = None, - njets: np.ndarray | ak.Array | None = None, - **kwargs, ) -> tuple[ak.Array, SelectionResult]: """ - Helper function that sets up the weight and group maps for the increment_stats task, invokes it - and returns the updated events and results objects. + Helper function that sets up the stats and histograms to bookkeep event counts and weights. :param self: The selector instance. :param events: The events array. + :param task: The law task. :param results: The current selection results. :param stats: The stats dictionary. + :param hists: Dictionary with histograms that can store stats counts. :param event_sel: The general event selection mask. :param event_sel_variations: Named variations of the event selection mask for additional stats. - :param event_sel_nob_pnet: The event selection mask without the bjet step for pnet. - :param njets: The number of central jets. :return: The updated events and results objects in a tuple. """ if event_sel_variations is None: @@ -417,43 +439,62 @@ def setup_and_increment_stats( event_sel_variations = {n: s for n, s in event_sel_variations.items() if s is not None} # when a shift was requested, skip all other systematic variations - skip_shifts = self.global_shift_inst != "nominal" - - # start creating a weight, group and group combination map - weight_map = { - "num_events": no_sel, - "num_events_selected": event_sel, - } + skip_shifts = task.global_shift_inst != "nominal" + + # start creating a "stats map" + # - keys: names of histograms to be created + # - values: (weight array, selection array) + # note that only a subset of entries end up in the stats dictionary, but all are used for histograms + stats_map: dict[str, np.ndarray | ak.Array | tuple[np.ndarray | ak.Array, np.ndarray | ak.Array]] = {} + keys_for_stats = [] + keys_for_hists = [] + + def add(key, sel, weight=None, for_stats=False, for_hists=True): + stats_map[key] = sel if weight is None else (weight, sel) + if for_stats and key not in keys_for_stats: + keys_for_stats.append(key) + if for_hists and key not in keys_for_hists: + keys_for_hists.append(key) + + # basic event counts + add("num_events", no_sel, for_stats=True) + add("num_events_selected", event_sel, for_stats=True) for var_name, var_sel in event_sel_variations.items(): - weight_map[f"num_events_selected_{var_name}"] = var_sel - group_map = {} - group_combinations = [] + add(f"num_events_selected_{var_name}", var_sel, for_stats=True) # add mc info if self.dataset_inst.is_mc: - weight_map["sum_mc_weight"] = (events.mc_weight, no_sel) - weight_map["sum_mc_weight_selected"] = (events.mc_weight, event_sel) + add("sum_mc_weight", no_sel, events.mc_weight, for_stats=True) + add("sum_mc_weight_selected", event_sel, events.mc_weight, for_stats=True) for var_name, var_sel in event_sel_variations.items(): - weight_map[f"sum_mc_weight_selected_{var_name}"] = (events.mc_weight, var_sel) + add(f"sum_mc_weight_selected_{var_name}", var_sel, events.mc_weight, for_stats=True) # pu weights with variations for route in sorted(self[pu_weight].produced_columns): - weight_map[f"sum_mc_weight_{route}"] = (events.mc_weight * route.apply(events), no_sel) + add(f"sum_mc_weight_{route}", no_sel, events.mc_weight * route.apply(events)) # pdf weights with variations if self.has_dep(pdf_weights): for v in (("",) if skip_shifts else ("", "_up", "_down")): - weight_map[f"sum_pdf_weight{v}"] = (events[f"pdf_weight{v}"], no_sel) - weight_map[f"sum_pdf_weight{v}_selected"] = (events[f"pdf_weight{v}"], event_sel) + add(f"sum_pdf_weight{v}", no_sel, events[f"pdf_weight{v}"]) + add(f"sum_pdf_weight{v}_selected", event_sel, events[f"pdf_weight{v}"]) # mur/muf weights with variations if self.has_dep(murmuf_weights): for v in (("",) if skip_shifts else ("", "_up", "_down")): - weight_map[f"sum_murmuf_weight{v}"] = (events[f"murmuf_weight{v}"], no_sel) - weight_map[f"sum_murmuf_weight{v}_selected"] = (events[f"murmuf_weight{v}"], event_sel) + add(f"sum_murmuf_weight{v}", no_sel, events[f"murmuf_weight{v}"]) + add(f"sum_murmuf_weight{v}_selected", event_sel, events[f"murmuf_weight{v}"]) + + # parton shower weights with variations + if self.has_dep(ps_weights): + for v in (("",) if skip_shifts else ("", "_up", "_down")): + add(f"sum_isr_weight{v}", no_sel, events[f"isr_weight{v}"]) + add(f"sum_isr_weight{v}_selected", event_sel, events[f"isr_weight{v}"]) + add(f"sum_fsr_weight{v}", no_sel, events[f"fsr_weight{v}"]) + add(f"sum_fsr_weight{v}_selected", event_sel, events[f"fsr_weight{v}"]) # btag weights - for prod in (btag_weights_deepjet, btag_weights_pnet): + for prod in [btag_weights_deepjet, btag_weights_pnet]: if not self.has_dep(prod): continue for route in sorted(self[prod].produced_columns): @@ -462,42 +503,40 @@ def setup_and_increment_stats( continue if skip_shifts and weight_name.endswith(("_up", "_down")): continue - weight_map[f"sum_{weight_name}"] = (events[weight_name], no_sel) - weight_map[f"sum_{weight_name}_selected"] = (events[weight_name], event_sel) + add(f"sum_{weight_name}", no_sel, events[weight_name]) + add(f"sum_{weight_name}_selected", event_sel, events[weight_name]) for var_name, var_sel in event_sel_variations.items(): - weight_map[f"sum_{weight_name}_selected_{var_name}"] = (events[weight_name], var_sel) - weight_map[f"sum_mc_weight_{weight_name}_selected_{var_name}"] = (events.mc_weight * events[weight_name], var_sel) # noqa: E501 - - # groups - group_map = { - **group_map, - # per process - "process": { - "values": events.process_id, - "mask_fn": (lambda v: events.process_id == v), - }, - } - # per jet multiplicity - if njets is not None: - group_map["njet"] = { - "values": njets, - "mask_fn": (lambda v: njets == v), + add(f"sum_{weight_name}_selected_{var_name}", var_sel, events[weight_name]) + add(f"sum_mc_weight_{weight_name}_selected_{var_name}", var_sel, events.mc_weight * events[weight_name]) # noqa: E501 + + # add num_events_per_process and sum_mc_weight_per_process directly to stats, needed for normalization weight + if "num_events_per_process" not in stats: + stats["num_events_per_process"] = defaultdict(float) + if "sum_mc_weight_per_process" not in stats: + stats["sum_mc_weight_per_process"] = defaultdict(float) + for proc_id in np.unique(events.process_id): + proc_weights = events.mc_weight[events.process_id == proc_id] + stats["num_events_per_process"][str(proc_id)] += float(len(proc_weights)) + stats["sum_mc_weight_per_process"][str(proc_id)] += float(ak.sum(proc_weights)) + + # fill stats and histograms + for key, val in stats_map.items(): + is_num = key.startswith("num_") + weight, sel = ((None,) + law.util.make_tuple(val))[-2:] + + if key in keys_for_hists: + # create the histogram when not existing + if key not in hists: + hists[key] = create_hist_from_variables(*self.hist_vars, storage="double" if is_num else "weight") + # fill it + fill_data = { + v.name: Route(v.expression).apply(events)[sel] + for v in self.hist_vars } + if not is_num: + fill_data["weight"] = weight[sel] + fill_hist(hists[key], fill_data, last_edge_inclusive=True) - # combinations - group_combinations.append(("process", "njet")) - - def skip_func(weight_name: str, group_names: list[str]) -> bool: - # TODO: add not needed combinations here - return False - - return self[increment_stats]( - events, - results, - stats, - weight_map=weight_map, - group_map=group_map, - group_combinations=group_combinations, - skip_func=skip_func, - **kwargs, - ) + if key in keys_for_stats: + stats[key] += float(ak.sum(sel if is_num else weight[sel])) + return events, results diff --git a/hbt/selection/jet.py b/multilepton/selection/jet.py similarity index 50% rename from hbt/selection/jet.py rename to multilepton/selection/jet.py index adc59fe1..2ee9c538 100644 --- a/hbt/selection/jet.py +++ b/multilepton/selection/jet.py @@ -4,19 +4,23 @@ Jet selection methods. """ +from __future__ import annotations + from operator import or_ from functools import reduce +import law + +from columnflow.util import maybe_import from columnflow.selection import Selector, SelectionResult, selector +from columnflow.production.cms.jet import jet_id, fatjet_id from columnflow.columnar_util import ( - EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view, - full_like, + EMPTY_FLOAT, set_ak_column, sorted_indices_from_mask, mask_from_indices, flat_np_view, full_like, ) -from columnflow.util import maybe_import, InsertableDict -from hbt.util import IF_RUN_2 -from hbt.production.hhbtag import hhbtag -from hbt.selection.lepton import trigger_object_matching +from multilepton.selection.lepton import trigger_object_matching +from multilepton.util import IF_RUN_2, IF_NOT_NANO_V15 + np = maybe_import("numpy") ak = maybe_import("awkward") @@ -24,18 +28,19 @@ @selector( uses={ - hhbtag, - "trigger_ids", "TrigObj.{pt,eta,phi}", - "Jet.{pt,eta,phi,mass,jetId}", IF_RUN_2("Jet.puId"), - "FatJet.{pt,eta,phi,mass,msoftdrop,jetId,subJetIdx1,subJetIdx2}", - "SubJet.{pt,eta,phi,mass,btagDeepB}", + jet_id, fatjet_id, + "fired_trigger_ids", "TrigObj.{pt,eta,phi}", + "Jet.{pt,eta,phi,mass}", IF_NOT_NANO_V15("Jet.jetId"), IF_RUN_2("Jet.puId"), + "FatJet.{pt,eta,phi,mass,msoftdrop,subJetIdx1,subJetIdx2}", IF_NOT_NANO_V15("FatJet.jetId"), + "SubJet.{pt,eta,phi,mass}", IF_NOT_NANO_V15("SubJet.btagDeepB"), }, produces={ - # new columns - "Jet.hhbtag", + # hhbtag, + "Jet.hhbtag", "matched_trigger_ids", }, - # shifts are declared dynamically below in jet_selection_init ) + + def jet_selection( self: Selector, events: ak.Array, @@ -45,7 +50,6 @@ def jet_selection( ) -> tuple[ak.Array, SelectionResult]: """ Jet selection based on ultra-legacy recommendations. - Resources: https://twiki.cern.ch/twiki/bin/view/CMS/JetID?rev=107#nanoAOD_Flags https://twiki.cern.ch/twiki/bin/view/CMS/JetID13TeVUL?rev=15#Recommendations_for_the_13_T_AN1 @@ -53,19 +57,25 @@ def jet_selection( https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookNanoAOD?rev=100#Jets """ is_2016 = self.config_inst.campaign.x.year == 2016 - ch_tautau = self.config_inst.get_channel("tautau") + ch_tautau = self.config_inst.get_channel("ctautau") # local jet index li = ak.local_index(events.Jet) + # recompute jet ids + events = self[jet_id](events, **kwargs) + events = self[fatjet_id](events, **kwargs) + # # default jet selection # - + # ak.all(events.Jet.metric_table(lepton_results.x.leading_taus) > 0.5, axis=2) # common ak4 jet mask for normal and vbf jets ak4_mask = ( (events.Jet.jetId == 6) & # tight plus lepton veto - ak.all(events.Jet.metric_table(lepton_results.x.leading_taus) > 0.5, axis=2) + ak.all(events.Jet.metric_table(events.Tau[lepton_results.x.taus]) > 0.5, axis=2) & + ak.all(events.Jet.metric_table(events.Muon[lepton_results.x.mus]) > 0.5, axis=2) & + ak.all(events.Jet.metric_table(events.Electron[lepton_results.x.eles]) > 0.5, axis=2) ) # puId for run 2 @@ -82,12 +92,19 @@ def jet_selection( (abs(events.Jet.eta) < 2.5) ) + # default jets (no cleaning) + default_mask_noclean = ( + (events.Jet.pt > 5) & + (abs(events.Jet.eta) < 2.5) + ) + # - # hhb-jet identification + # FIXME need to go hhb-jet identification # - - # get the hhbtag values per jet per event - hhbtag_scores = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs) + # events = self[hhbtag](events, default_mask, lepton_results.x.lepton_pair, **kwargs) + # hhbtag_scores = events.hhbtag_score + # just set hhbtag to zero for now, later remove + hhbtag_scores = 0 * events.Jet.pt # create a mask where only the two highest scoring hhbjets are selected score_indices = ak.argsort(hhbtag_scores, axis=1, ascending=False) @@ -100,30 +117,105 @@ def jet_selection( # two strategies were studied a) and b) but strategy a) seems to not comply with how trigger # matching should be done and should therefore be ignored. - # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger false_mask = full_like(events.event, False, dtype=bool) + # create mask for tautau events that fired and matched tautau trigger + tt_match_mask = ( + (events.channel_id == ch_tautau.id) & + ak.any(reduce( + or_, + [(events.matched_trigger_ids == tid) for tid in self.trigger_ids_tt], + false_mask, + ), axis=1) + ) + + # create a mask to select tautau events that were triggered by a tau-tau-jet cross trigger + # and passed the tautau matching in the lepton selection ttj_mask = ( (events.channel_id == ch_tautau.id) & - ak.any(reduce(or_, [(events.trigger_ids == tid) for tid in self.trigger_ids_ttjc], false_mask), axis=1) + ak.any(reduce( + or_, + [(lepton_results.x.lepton_part_trigger_ids == tid) for tid in self.trigger_ids_ttj], + false_mask, + ), axis=1) + ) + + # create mask for tautau events that matched taus in vbf trigger + ttv_mask = ( + (events.channel_id == ch_tautau.id) & + ak.any(reduce( + or_, + [(lepton_results.x.lepton_part_trigger_ids == tid) for tid in self.trigger_ids_ttv], + false_mask, + ), axis=1) ) + # we want to remove tautau events for which after trigger and tau tau matching, only ttj/v + # triggers are under consideration, but the jet leg cannot be matched, so create a mask that + # rejects these events + match_at_least_one_trigger = full_like(events.event, True, dtype=bool) + + # prepare to fill the list of matched trigger ids with the events passing tautaujet and vbf + matched_trigger_ids_list = [events.matched_trigger_ids] + # only perform this special treatment when applicable if ak.any(ttj_mask): + # store the leading hhbjet + sel_hhbjet_mask = hhbjet_mask[ttj_mask] + pt_sorting_indices = ak.argsort(events.Jet.pt[ttj_mask][sel_hhbjet_mask], axis=1, ascending=False) + + # define mask for matched hhbjets + # constrain to jets with a score and a minimum pt corresponding to the trigger jet leg + constraints_mask_matched_hhbjet = ( + (hhbjet_mask[ttj_mask] != EMPTY_FLOAT) & + (events.Jet.pt[ttj_mask] > 60.0) # ! Note: hardcoded value + ) # check which jets can be matched to any of the jet legs matching_mask = full_like(events.Jet.pt[ttj_mask], False, dtype=bool) for trigger, _, leg_masks in trigger_results.x.trigger_data: - if trigger.id in self.trigger_ids_ttjc: + if trigger.id in self.trigger_ids_ttj: trig_objs = events.TrigObj[leg_masks["jet"]] + trigger_matching_mask = trigger_object_matching(events.Jet[ttj_mask], trig_objs[ttj_mask]) + + # update overall matching mask to be used for the hhbjet selection matching_mask = ( matching_mask | - trigger_object_matching(events.Jet[ttj_mask], trig_objs[ttj_mask]) + trigger_matching_mask ) + + # update trigger matching mask with constraints on the jets + trigger_matching_mask = ( + trigger_matching_mask & + constraints_mask_matched_hhbjet + ) + + # add trigger_id to matched_trigger_ids if the pt-leading jet is matched + leading_matched = ak.fill_none( + ak.firsts(trigger_matching_mask[sel_hhbjet_mask][pt_sorting_indices], axis=1), + False, + ) + + # cast leading matched mask to event mask + leading_matched_all_events = full_like(events.event, False, dtype=bool) + flat_leading_matched_all_events = flat_np_view(leading_matched_all_events) + flat_leading_matched_all_events[flat_np_view(ttj_mask)] = flat_np_view(leading_matched) + + # store the matched trigger ids + ids = ak.where(leading_matched_all_events, np.float32(trigger.id), np.float32(np.nan)) + matched_trigger_ids_list.append(ak.singletons(ak.nan_to_none(ids))) + + # store the matched trigger ids + matched_trigger_ids = ak.concatenate(matched_trigger_ids_list, axis=1) + # replace the existing column matched_trigger_ids from the lepton selection with the updated one + events = set_ak_column(events, "matched_trigger_ids", matched_trigger_ids, value_type=np.int32) # constrain to jets with a score and a minimum pt corresponding to the trigger jet leg matching_mask = ( - matching_mask & - (hhbjet_mask[ttj_mask] != EMPTY_FLOAT) & - (events.Jet.pt[ttj_mask] > 60.0) # ! Note: hardcoded value + constraints_mask_matched_hhbjet + ) + + # create a mask to select tautau events that were only triggered by a tau-tau-jet cross trigger + only_ttj_mask = ( + ttj_mask & ~tt_match_mask & ~ttv_mask ) # @@ -156,19 +248,23 @@ def jet_selection( # b) # two hhb-tagged jets must be selected. The highest and second-highest scoring jets are selected. # - If the jet with the highest pt matches the trigger leg, the event is accepted. - # - Otherwise the event is rejected. + # - Otherwise the event is rejected if it was only triggered by the tautaujet trigger. # - # check if the pt-leading jet of the two hhbhets is matchedfold back into hhbjet_mask - sel_hhbjet_mask = ak.Array(hhbjet_mask[ttj_mask]) - pt_sorting_indices = ak.argsort(events.Jet.pt[ttj_mask][sel_hhbjet_mask], axis=1, ascending=False) + # check if the pt-leading jet of the two hhbjets is matched for any tautaujet trigger + # and fold back into hhbjet_mask leading_matched = ak.fill_none(ak.firsts(matching_mask[sel_hhbjet_mask][pt_sorting_indices], axis=1), False) - sel_hhbjet_mask = sel_hhbjet_mask & leading_matched - - # insert back into the full hhbjet_mask - flat_hhbjet_mask = flat_np_view(hhbjet_mask) - flat_jet_mask = ak.flatten(full_like(events.Jet.pt, False, dtype=bool) | ttj_mask) - flat_hhbjet_mask[flat_jet_mask] = ak.flatten(sel_hhbjet_mask) + # cast full leading matched mask to event mask + full_leading_matched_all_events = full_like(events.event, False, dtype=bool) + flat_full_leading_matched_all_events = flat_np_view(full_leading_matched_all_events) + flat_full_leading_matched_all_events[flat_np_view(ttj_mask)] = flat_np_view(leading_matched) + + # remove all events where the matching did not work if they were only triggered by the tautaujet trigger + match_at_least_one_trigger = ak.where( + only_ttj_mask & ~flat_full_leading_matched_all_events, + False, + match_at_least_one_trigger, + ) # validate that either none or two hhbjets were identified assert ak.all(((n_hhbjets := ak.sum(hhbjet_mask, axis=1)) == 0) | (n_hhbjets == 2)) @@ -176,17 +272,19 @@ def jet_selection( # # fat jets # - fatjet_mask = ( (events.FatJet.jetId == 6) & # tight plus lepton veto (events.FatJet.msoftdrop > 30.0) & (events.FatJet.pt > 250.0) & # ParticleNet not trained for lower values (abs(events.FatJet.eta) < 2.5) & - ak.all(events.FatJet.metric_table(lepton_results.x.leading_taus) > 0.8, axis=2) & + ak.all(events.FatJet.metric_table(events.Tau[lepton_results.x.taus]) > 0.8, axis=2) & + ak.all(events.FatJet.metric_table(events.Muon[lepton_results.x.mus]) > 0.8, axis=2) & + ak.all(events.FatJet.metric_table(events.Electron[lepton_results.x.eles]) > 0.8, axis=2) & (events.FatJet.subJetIdx1 >= 0) & (events.FatJet.subJetIdx2 >= 0) ) - + + # ak.all(events.FatJet.metric_table(lepton_results.x.leading_taus) > 0.8, axis=2) & # store fatjet and subjet indices fatjet_indices = ak.local_index(events.FatJet.pt)[fatjet_mask] subjet_indices = ak.concatenate( @@ -205,7 +303,6 @@ def jet_selection( # # vbf jets # - vbf_mask = ( ak4_mask & (events.Jet.pt > 20.0) & @@ -224,13 +321,67 @@ def jet_selection( (abs(vbf1.eta - vbf2.eta) > 3.0) ) + # redefine the trigger matched list after it was updated with tautaujet ids + matched_trigger_ids_list = [events.matched_trigger_ids] + # extra requirements for events for which only the tau tau vbf cross trigger fired - if not self.trigger_ids_ttvc: - cross_vbf_mask = full_like(1 * events.event, False, dtype=bool) + if not self.trigger_ids_ttv: + cross_vbf_mask = full_like(events.event, False, dtype=bool) else: - cross_vbf_masks = [events.trigger_ids == tid for tid in self.trigger_ids_ttvc] - # This combines "at least one cross trigger is fired" and "no other triggers are fired" + ttv_fired_all_matched = full_like(events.event, False, dtype=bool) + for trigger, _, leg_masks in trigger_results.x.trigger_data: + if trigger.id in self.trigger_ids_ttv: + ttv_fired_tt_matched = ( + (events.channel_id == ch_tautau.id) & + ak.any(lepton_results.x.lepton_part_trigger_ids == trigger.id, axis=1) + ) + # TODO: add vbf jets matching when SF procedure has been decided not available for + # now, so define the final mask just from the tt matching decision for now + _ttv_fired_all_matched = ttv_fired_tt_matched + ttv_fired_all_matched = ttv_fired_all_matched | _ttv_fired_all_matched + ids = ak.where(_ttv_fired_all_matched, np.float32(trigger.id), np.float32(np.nan)) + matched_trigger_ids_list.append(ak.singletons(ak.nan_to_none(ids))) + + # store the matched trigger ids + matched_trigger_ids = ak.concatenate(matched_trigger_ids_list, axis=1) + events = set_ak_column(events, "matched_trigger_ids", matched_trigger_ids, value_type=np.int32) + + # update the "ttv only" mask + cross_vbf_masks = [events.matched_trigger_ids == tid for tid in self.trigger_ids_ttv] cross_vbf_mask = ak.all(reduce(or_, cross_vbf_masks), axis=1) + + # remove all events that fired only vbf trigger but were not matched or + # that fired vbf and tautaujet triggers and matched the taus but not the jets + ttv_fired_v_not_matched = ( + # need to match either only vbf or vbf and tautaujet triggers + (events.channel_id == ch_tautau.id) & # need to be a tautau event + ~tt_match_mask & # need to not match the tautau trigger + ttv_mask & # need to match the taus in the vbf trigger + # need to not match the jet legs in the vbf trigger + ~ttv_fired_all_matched # need to not match the jet legs in the vbf trigger + ) + if ak.any(ttj_mask): + # case where vbf and tautaujet triggers were both fired + ttjv_fired_vj_not_matched = ( + ttv_fired_v_not_matched & + ttj_mask & + ~full_leading_matched_all_events + ) + match_at_least_one_trigger = ak.where( + ttjv_fired_vj_not_matched, + False, + match_at_least_one_trigger, + ) + # case where only vbf trigger was fired + ttv_fired_v_not_matched = ( + ttv_fired_v_not_matched & + ~ttj_mask + ) + + match_at_least_one_trigger = ak.where(ttv_fired_v_not_matched, False, match_at_least_one_trigger) + + # impose additional cuts on the vbf pair in case only a ttv trigger fired (and all objects + # matched), but no other trigger vbf_pair_mask = vbf_pair_mask & ( (~cross_vbf_mask) | ( (vbfjj.mass > 800) & @@ -258,9 +409,9 @@ def jet_selection( # # final selection and object construction # - # pt sorted indices to convert mask jet_indices = sorted_indices_from_mask(default_mask, events.Jet.pt, ascending=False) + jet_indices_no_clean = sorted_indices_from_mask(default_mask_noclean, events.Jet.pt, ascending=False) # get indices of the two hhbjets hhbjet_indices = sorted_indices_from_mask(hhbjet_mask, hhbtag_scores, ascending=False) @@ -276,15 +427,17 @@ def jet_selection( # perform a cut on ≥1 jet and all other cuts first, and then cut on ≥2, resulting in an # additional, _skippable_ step jet_sel = ( - (ak.sum(default_mask, axis=1) >= 1) + (ak.sum(default_mask, axis=1) >= 0) & #dont require jets at the moment, will be added on per channel basis + match_at_least_one_trigger # add additional cuts here in the future ) - jet_sel2 = jet_sel & (ak.sum(default_mask, axis=1) >= 2) + #jet_sel2 = jet_sel & (ak.sum(default_mask, axis=1) >= 2) # some final type conversions jet_indices = ak.values_astype(ak.fill_none(jet_indices, 0), np.int32) - hhbjet_indices = ak.values_astype(hhbjet_indices, np.int32) - non_hhbjet_indices = ak.values_astype(ak.fill_none(non_hhbjet_indices, 0), np.int32) + jet_indices_no_clean = ak.values_astype(ak.fill_none(jet_indices_no_clean, 0), np.int32) + #hhbjet_indices = ak.values_astype(hhbjet_indices, np.int32) + #non_hhbjet_indices = ak.values_astype(ak.fill_none(non_hhbjet_indices, 0), np.int32) fatjet_indices = ak.values_astype(fatjet_indices, np.int32) vbfjet_indices = ak.values_astype(ak.fill_none(vbfjet_indices, 0), np.int32) @@ -295,7 +448,7 @@ def jet_selection( result = SelectionResult( steps={ "jet": jet_sel, - "jet2": jet_sel2, + #"jet2": jet_sel2, # the btag weight normalization requires a selection with everything but the bjet # selection, so add this step here # note: there is currently no b-tag discriminant cut at this point, so skip it @@ -305,8 +458,9 @@ def jet_selection( objects={ "Jet": { "Jet": jet_indices, - "HHBJet": hhbjet_indices, - "NonHHBJet": non_hhbjet_indices, + "NonCleanedJet": jet_indices_no_clean, + #"HHBJet": hhbjet_indices, + #"NonHHBJet": non_hhbjet_indices, "VBFJet": vbfjet_indices, }, "FatJet": { @@ -324,12 +478,11 @@ def jet_selection( "n_central_jets": ak.num(jet_indices, axis=1), }, ) - return events, result @jet_selection.init -def jet_selection_init(self: Selector) -> None: +def jet_selection_init(self: Selector, **kwargs) -> None: # register shifts self.shifts |= { shift_inst.name @@ -339,13 +492,17 @@ def jet_selection_init(self: Selector) -> None: @jet_selection.setup -def jet_selection_setup(self: Selector, reqs: dict, inputs: dict, reader_targets: InsertableDict) -> None: +def jet_selection_setup(self: Selector, task: law.Task, **kwargs) -> None: # store ids of tau-tau cross triggers - self.trigger_ids_ttjc = [ + self.trigger_ids_tt = [ + trigger.id for trigger in self.config_inst.x.triggers + if trigger.has_tag("cross_tau_tau") + ] + self.trigger_ids_ttj = [ trigger.id for trigger in self.config_inst.x.triggers if trigger.has_tag("cross_tau_tau_jet") ] - self.trigger_ids_ttvc = [ + self.trigger_ids_ttv = [ trigger.id for trigger in self.config_inst.x.triggers if trigger.has_tag("cross_tau_tau_vbf") ] diff --git a/multilepton/selection/lepton.py b/multilepton/selection/lepton.py new file mode 100644 index 00000000..4fd2e40a --- /dev/null +++ b/multilepton/selection/lepton.py @@ -0,0 +1,2025 @@ +# coding: utf-8 + +""" +Lepton selection methods. +""" + +from __future__ import annotations + +import law + +from operator import or_ +from functools import reduce + +from columnflow.selection import Selector, SelectionResult, selector +from columnflow.columnar_util import ( + set_ak_column, sorted_indices_from_mask, flat_np_view, full_like, +) +from columnflow.util import maybe_import + +from multilepton.util import IF_NANO_V9, IF_NANO_GE_V10, IF_NANO_V12, IF_NANO_V14, IF_NANO_V15 +from multilepton.config.util import Trigger + +np = maybe_import("numpy") +ak = maybe_import("awkward") +logger = law.logger.get_logger(__name__) + + +def trigger_object_matching( + vectors1: ak.Array, + vectors2: ak.Array, + /, + *, + threshold: float = 0.5, + axis: int = 2, + event_mask: ak.Array | type(Ellipsis) | None = None, +) -> ak.Array: + """ + Helper to check per object in *vectors1* if there is at least one object in *vectors2* that + leads to a delta R metric below *threshold*. The final reduction is applied over *axis* of the + resulting metric table containing the full combinatorics. If an *event_mask* is given, the + the matching is performed only for those events, but a full object mask with the same shape as + that of *vectors1* is returned, which all objects set to *False* where not matching was done. + """ + # handle event masks + used_event_mask = event_mask is not None and event_mask is not Ellipsis + event_mask = Ellipsis if event_mask is None else event_mask + # delta_r for all combinations + dr = vectors1[event_mask].metric_table(vectors2[event_mask]) + # check per element in vectors1 if there is at least one matching element in vectors2 + any_match = ak.any(dr < threshold, axis=axis) + # expand to original shape if an event mask was given + if used_event_mask: + full_any_match = full_like(vectors1.pt, False, dtype=bool) + flat_full_any_match = flat_np_view(full_any_match) + flat_full_any_match[flat_np_view(full_any_match | event_mask)] = flat_np_view(any_match) + any_match = full_any_match + return any_match + + +def update_channel_ids( + events: ak.Array, + previous_channel_ids: ak.Array, + correct_channel_id: int, + channel_mask: ak.Array, +) -> ak.Array: + """ + Check if the events in the is_mask can be inside the given channel + or have already been sorted in another channel before. + """ + events_not_in_channel = (previous_channel_ids != 0) & (previous_channel_ids != correct_channel_id) + channel_id_overwrite = events_not_in_channel & channel_mask + if ak.any(channel_id_overwrite): + raise ValueError( + "The channel_ids of some events are being set to two different values. " + "The first event of this chunk concerned has index", + ak.where(channel_id_overwrite)[0], + ) + return ak.where(channel_mask, correct_channel_id, previous_channel_ids) + + +@selector( + uses={ + "Electron.{pt,eta,phi,dxy,dz}", + "Electron.{pfRelIso03_all,seediEtaOriX,seediPhiOriY,sip3d,miniPFRelIso_all,sieie}", + "Electron.{hoe,eInvMinusPInv,convVeto,lostHits,jetPtRelv2,jetIdx}", + "Jet.btagDeepFlavB", + IF_NANO_V12("Electron.mvaTTH"), + IF_NANO_V14("Electron.promptMVA"), + IF_NANO_V15("Electron.promptMVA"), + IF_NANO_V9("Electron.mvaFall17V2{Iso_WP80,Iso_WP90}"), + IF_NANO_GE_V10("Electron.{mvaIso_WP80,mvaIso_WP90}"), + }, + exposed=False, +) +def electron_selection( + self: Selector, + events: ak.Array, + trigger: Trigger, + **kwargs, +) -> tuple[ak.Array | None, ak.Array]: + """ + Electron selection returning two sets of masks for default and veto electrons. + See https://twiki.cern.ch/twiki/bin/view/CMS/EgammaNanoAOD?rev=4 + """ + # ch_key = kwargs.get("ch_key", None) + # is_2016 = self.config_inst.campaign.x.year == 2016 + is_2022_post = ( + self.config_inst.campaign.x.year == 2022 and + self.config_inst.campaign.has_tag("postEE") + ) + is_single = trigger.has_tag("single_e") or trigger.has_tag("single_mu") + is_cross = trigger.has_tag("cross_e_tau") + + # obtain mva flags, which might be located at different routes, depending on the nano version + if "mvaIso_WP80" in events.Electron.fields: + # >= nano v10 + # beware that the available Iso should be mvaFall17V2 for run2 files, not Winter22V1, + # check this in original root files if necessary + mva_iso_wp80 = events.Electron.mvaIso_WP80 + mva_iso_wp90 = events.Electron.mvaIso_WP90 + else: + # <= nano v9 + mva_iso_wp80 = events.Electron.mvaFall17V2Iso_WP80 + mva_iso_wp90 = events.Electron.mvaFall17V2Iso_WP90 + + if "promptMVA" in events.Electron.fields: + # >= nano v14 + promptMVA = events.Electron.promptMVA + else: + # nano 10.0) & + (abs(events.Electron.eta) < 2.5) & + (abs(events.Electron.dxy) < 0.05) & + (abs(events.Electron.dz) < 0.1) & + (events.Electron.sip3d < 8) & + (events.Electron.miniPFRelIso_all < 0.4) & + (events.Electron.sieie < 0.019) & + (events.Electron.hoe < 0.1) & + (events.Electron.eInvMinusPInv > -0.04) & + (events.Electron.convVeto == 1) & + (events.Electron.lostHits == 0) & + atleast_medium & + (promptMVA > 0.3) & + (btag_values < btagcut) + ) + loose_mask = ( + (events.Electron.pt > 7.0) & + (abs(events.Electron.eta) < 3.0) & + (abs(events.Electron.dxy) < 0.05) & + (abs(events.Electron.dz) < 0.1) & + (events.Electron.sip3d < 8) & + (events.Electron.miniPFRelIso_all < 0.4) & + (events.Electron.lostHits <= 1) & + atleast_medium + ) + idlepmvapassed = (atleast_medium & (promptMVA > 0.3)) + idlepmvafailed = ((mva_iso_wp90 == 1) & (promptMVA <= 0.3)) # loose doesnt exist anymore :( + btaglepmvapassed = ((btag_values < btagcut) & (promptMVA < 0.3)) + btaglepmvafailed = ((btag_values < btagcut_tight) & (promptMVA > 0.3)) + jetisolepmvapassed = (promptMVA > 0.3) + jetisolepmvafailed = ((promptMVA < 0.3) & (events.Electron.jetPtRelv2 < (1. / 1.7))) + fakeable_mask = ( + (events.Electron.pt > 10.0) & + (abs(events.Electron.eta) < 2.5) & + (abs(events.Electron.dxy) < 0.05) & + (abs(events.Electron.dz) < 0.1) & + (events.Electron.sip3d < 8) & + (events.Electron.miniPFRelIso_all < 0.4) & + (events.Electron.sieie < 0.019) & + (events.Electron.hoe < 0.1) & + (events.Electron.eInvMinusPInv > -0.04) & + (events.Electron.convVeto == 1) & + (events.Electron.lostHits == 0) & + (idlepmvapassed | idlepmvafailed) & + (btaglepmvapassed | btaglepmvafailed) & + (jetisolepmvapassed | jetisolepmvafailed) + ) + if is_2022_post: + tight_mask = tight_mask & ~( + (events.Electron.eta > 1.556) & + (events.Electron.seediEtaOriX < 45) & + (events.Electron.seediPhiOriY > 72) + ) + fakeable_mask = fakeable_mask & ~( + (events.Electron.eta > 1.556) & + (events.Electron.seediEtaOriX < 45) & + (events.Electron.seediPhiOriY > 72) + ) + # if ch_key == "eormu": + # fakeable_mask = loose_mask + veto_mask = loose_mask & (abs(events.Electron.eta) < 2.5) + # analysis_mask = tight_mask & (events.Electron.pt > min_pt) + control_mask = fakeable_mask + return tight_mask, control_mask, veto_mask + + +@electron_selection.init +def electron_selection_init(self) -> None: + if self.config_inst.campaign.x.run == 3 and self.config_inst.campaign.x.year == 2022: + self.shifts |= { + shift_inst.name for shift_inst in self.config_inst.shifts + if shift_inst.has_tag(("ees", "eer")) + } + + +@selector( + uses={"{Electron,TrigObj}.{pt,eta,phi}"}, + exposed=False, +) +def electron_trigger_matching( + self: Selector, + events: ak.Array, + trigger: Trigger, + trigger_fired: ak.Array, + leg_masks: dict[str, ak.Array], + **kwargs, +) -> tuple[ak.Array]: + """ + Electron trigger matching. + """ + is_single = trigger.has_tag("single_e") + is_cross = trigger.has_tag("cross_e_tau") + + # catch config errors + assert is_single or is_cross + assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) + assert abs(trigger.legs["e"].pdg_id) == 11 + return trigger_object_matching( + events.Electron, + events.TrigObj[leg_masks["e"]], + event_mask=trigger_fired, + ) + + +@selector( + uses={ + "Muon.{pt,eta,phi,looseId,mediumId,tightId}", + "Muon.{pfRelIso04_all,dxy,dz,sip3d,miniPFRelIso_all,jetPtRelv2,jetIdx}", + "Jet.btagDeepFlavB", + IF_NANO_V12("Muon.mvaTTH"), + IF_NANO_V14("Muon.promptMVA"), + IF_NANO_V15("Muon.promptMVA"), + }, + exposed=False, +) +def muon_selection( + self: Selector, + events: ak.Array, + trigger: Trigger, + **kwargs, +) -> tuple[ak.Array | None, ak.Array]: + """ + Muon selection returning two sets of masks for default and veto muons. + References: + - Isolation working point: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2?rev=59 + - ID und ISO : https://twiki.cern.ch/twiki/bin/view/CMS/MuonUL2017?rev=15 + relaxed for multilepton, to be replaced with lepMVA later on + """ + # ch_key = kwargs.get("ch_key", None) + # is_2016 = self.config_inst.campaign.x.year == 2016 + # is_single = trigger.has_tag("single_mu") + is_single = trigger.has_tag("single_mu") or trigger.has_tag("single_e") + is_cross = trigger.has_tag("cross_mu_tau") + + # default muon mask + tight_mask = None + control_mask = None + if is_single or is_cross or True: # investigate why trigger dependence on providing masks at all + # if is_2016: + # min_pt = 23.0 if is_single else 20.0 + # else: + # min_pt = 26.0 if is_single else 22.0 + if "promptMVA" in events.Muon.fields: + # >= nano v14 + promptMVA = events.Muon.promptMVA + else: + # nano 10) & + (abs(events.Muon.eta) < 2.4) & + (abs(events.Muon.dxy) < 0.05) & + (abs(events.Muon.dz) < 0.1) & + (events.Muon.sip3d < 8) & + (events.Muon.miniPFRelIso_all < 0.4) & + atleast_medium & + (btag_values < btagcut) & + (promptMVA > 0.5) + ) + loose_mask = ( + (events.Muon.pt > 5) & + (abs(events.Muon.eta) < 3.0) & + (abs(events.Muon.dxy) < 0.05) & + (abs(events.Muon.dz) < 0.1) & + (events.Muon.sip3d < 8) & + (events.Muon.miniPFRelIso_all < 0.4) & + atleast_loose + ) + btaglepmvapassed = ((btag_values < btagcut) & (promptMVA < 0.5)) + btaglepmvafailed = ((btag_values < btagcut_tight) & (promptMVA > 0.5)) + fakeable_mask = ( + (events.Muon.pt > 10) & + (abs(events.Muon.eta) < 2.4) & + (abs(events.Muon.dxy) < 0.05) & + (abs(events.Muon.dz) < 0.1) & + (events.Muon.sip3d < 8) & + (events.Muon.miniPFRelIso_all < 0.4) & + atleast_loose & + (btaglepmvapassed | btaglepmvafailed) & + ((promptMVA > 0.3) | ((promptMVA <= 0.5) & (events.Muon.jetPtRelv2 < (1. / 1.8)))) + ) + # if ch_key == "eormu": + # fakeable_mask = loose_mask + veto_mask = loose_mask & (abs(events.Muon.eta) < 2.4) + # analysis_mask = tight_mask & (events.Muon.pt > min_pt) + control_mask = fakeable_mask + return tight_mask, control_mask, veto_mask + + +@selector( + uses={"{Muon,TrigObj}.{pt,eta,phi}"}, + exposed=False, +) +def muon_trigger_matching( + self: Selector, + events: ak.Array, + trigger: Trigger, + trigger_fired: ak.Array, + leg_masks: dict[str, ak.Array], + **kwargs, +) -> tuple[ak.Array]: + """ + Muon trigger matching. + """ + is_single = trigger.has_tag("single_mu") + is_cross = trigger.has_tag("cross_mu_tau") + + # catch config errors + assert is_single or is_cross + assert trigger.n_legs == len(leg_masks) == (1 if is_single else 2) + assert abs(trigger.legs["mu"].pdg_id) == 13 + return trigger_object_matching( + events.Muon, + events.TrigObj[leg_masks["mu"]], + event_mask=trigger_fired, + ) + + +@selector( + uses={ + "Tau.{pt,eta,phi,dz,decayMode}", + "{Electron,Muon,TrigObj}.{pt,eta,phi}", + }, + # shifts are declared dynamically below in tau_selection_init + exposed=False, +) +def tau_selection( + self: Selector, + events: ak.Array, + trigger: Trigger, + electron_mask: ak.Array | None, + muon_mask: ak.Array | None, + **kwargs, +) -> tuple[ak.Array, ak.Array]: + """ + Tau selection returning a masks for taus that are at least VVLoose isolated (vs jet) + and a second mask to select isolated ones, eventually to separate normal and iso inverted taus + for QCD estimations. + """ + # return empty mask if no tagged taus exists in the chunk + if ak.all(ak.num(events.Tau) == 0): + logger.info("no taus found in event chunk") + false_mask = full_like(events.Tau.pt, False, dtype=bool) + return false_mask, false_mask + + is_single_e = trigger.has_tag("single_e") + is_single_mu = trigger.has_tag("single_mu") + is_cross_e = trigger.has_tag("cross_e_tau") + is_cross_mu = trigger.has_tag("cross_mu_tau") + is_cross_tau = trigger.has_tag("cross_tau_tau") + is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") + is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") + is_2016 = self.config_inst.campaign.x.year == 2016 + is_run3 = self.config_inst.campaign.x.run == 3 + get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" + wp_config = self.config_inst.x.tau_id_working_points + + # determine minimum pt and maximum eta + max_eta = 2.5 + base_pt = 20.0 + if is_single_e or is_single_mu: + min_pt = 20.0 + elif is_cross_e: + # only existing after 2016 + min_pt = 0.0 if is_2016 else 35.0 + elif is_cross_mu: + min_pt = 25.0 if is_2016 else 32.0 + elif is_cross_tau: + min_pt = 40.0 + elif is_cross_tau_vbf: + # only existing after 2016 + min_pt = 0.0 if is_2016 else 25.0 + elif is_cross_tau_jet: + min_pt = None if not is_run3 else 35.0 + + # no_id mask for tagge rindependent tests + noid_mask = ( + (abs(events.Tau.eta) < max_eta) & + (events.Tau.pt > base_pt) & + (abs(events.Tau.dz) < 0.2) + ) + + # base tau mask for default and qcd sideband tau + base_mask = noid_mask & ( + reduce(or_, [events.Tau.decayMode == mode for mode in (0, 1, 10, 11)]) & + (events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.vvvloose) + # vs e and mu cuts are channel dependent and thus applied in the overall lepton selection + ) + + # remove taus with too close spatial separation to previously selected leptons + if electron_mask is not None: + base_mask = base_mask & ak.all(events.Tau.metric_table(events.Electron[electron_mask]) > 0.5, axis=2) + if muon_mask is not None: + base_mask = base_mask & ak.all(events.Tau.metric_table(events.Muon[muon_mask]) > 0.5, axis=2) + + # trigger dependent cuts + trigger_specific_mask = base_mask & (events.Tau.pt > min_pt) + # compute the isolation mask separately as it is used to defined (qcd) categories later on + iso_mask = events.Tau[get_tau_tagger("jet")] >= wp_config.tau_vs_jet.medium + return base_mask, trigger_specific_mask, iso_mask, noid_mask + + +@tau_selection.init +def tau_selection_init(self: Selector) -> None: + # register tec shifts + self.shifts |= { + shift_inst.name + for shift_inst in self.config_inst.shifts + if shift_inst.has_tag("tec") + } + # Add columns for the right tau tagger + self.uses |= { + f"Tau.id{self.config_inst.x.tau_tagger}VS{tag}" + for tag in ("e", "mu", "jet") + } + + +@selector( + uses={"{Tau,TrigObj}.{pt,eta,phi}"}, + # shifts are declared dynamically below in tau_selection_init + exposed=False, +) +def tau_trigger_matching( + self: Selector, + events: ak.Array, + trigger: Trigger, + trigger_fired: ak.Array, + leg_masks: dict[str, ak.Array], + **kwargs, +) -> tuple[ak.Array]: + """ + Tau trigger matching. + """ + if ak.all(ak.num(events.Tau) == 0): + logger.info("no taus found in event chunk") + return full_like(events.Tau.pt, False, dtype=bool) + + is_cross_e = trigger.has_tag("cross_e_tau") + is_cross_mu = trigger.has_tag("cross_mu_tau") + is_cross_tau = trigger.has_tag("cross_tau_tau") + is_cross_tau_vbf = trigger.has_tag("cross_tau_tau_vbf") + is_cross_tau_jet = trigger.has_tag("cross_tau_tau_jet") + is_any_cross_tau = is_cross_tau or is_cross_tau_vbf or is_cross_tau_jet + assert is_cross_e or is_cross_mu or is_any_cross_tau + + # start per-tau mask with trigger object matching per leg + if is_cross_e or is_cross_mu: + # catch config errors + assert trigger.n_legs == len(leg_masks) == 2 + assert abs(trigger.legs["tau"].pdg_id) == 15 + # match leg 1 + return trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau"]], + event_mask=trigger_fired, + ) + + # is_any_cross_tau + # catch config errors + assert trigger.n_legs == len(leg_masks) >= 2 + assert abs(trigger.legs["tau1"].pdg_id) == 15 + assert abs(trigger.legs["tau2"].pdg_id) == 15 + + # match both legs + matches_leg0 = trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau1"]], + event_mask=trigger_fired, + ) + matches_leg1 = trigger_object_matching( + events.Tau, + events.TrigObj[leg_masks["tau2"]], + event_mask=trigger_fired, + ) + + # taus need to be matched to at least one leg, but as a side condition + # each leg has to have at least one match to a tau + matches = ( + (matches_leg0 | matches_leg1) & + ak.any(matches_leg0, axis=1) & + ak.any(matches_leg1, axis=1) + ) + return matches + + +@selector( + uses={ + electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, + tau_selection, tau_trigger_matching, + "event", "{Electron,Muon,Tau}.{charge,mass}", + }, + produces={ + electron_selection, electron_trigger_matching, muon_selection, muon_trigger_matching, + tau_selection, tau_trigger_matching, + # new columns + "channel_id", "leptons_os", "tau2_isolated", "single_triggered", "cross_triggered", + "matched_trigger_ids", "tight_sel", "trig_match", "tight_sel_bdt", "trig_match_bdt", "ok_bdt_eormu", + "ok_bdt_eormu_bveto", + }, +) +def lepton_selection( + self: Selector, + events: ak.Array, + trigger_results: SelectionResult, + **kwargs, +) -> tuple[ak.Array, SelectionResult]: + """ + Combined lepton selection. + """ + wp_config = self.config_inst.x.tau_id_working_points + get_tau_tagger = lambda tag: f"id{self.config_inst.x.tau_tagger}VS{tag}" + + # get channels from the config + print(self.config_inst) + # ch_etau = self.config_inst.get_channel("etau") + # ch_mutau = self.config_inst.get_channel("mutau") + # ch_tautau = self.config_inst.get_channel("tautau") + # ch_ee = self.config_inst.get_channel("ee") + # ch_mumu = self.config_inst.get_channel("mumu") + # ch_emu = self.config_inst.get_channel("emu") + # new 3l channels + ch_3e = self.config_inst.get_channel("c3e") + ch_2emu = self.config_inst.get_channel("c2emu") + ch_e2mu = self.config_inst.get_channel("ce2mu") + ch_3mu = self.config_inst.get_channel("c3mu") + # new 4l channels + ch_4e = self.config_inst.get_channel("c4e") + ch_3emu = self.config_inst.get_channel("c3emu") + ch_2e2mu = self.config_inst.get_channel("c2e2mu") + ch_e3mu = self.config_inst.get_channel("ce3mu") + ch_4mu = self.config_inst.get_channel("c4mu") + # new 3l1tau channels + ch_3etau = self.config_inst.get_channel("c3etau") + ch_2emutau = self.config_inst.get_channel("c2emutau") + ch_e2mutau = self.config_inst.get_channel("ce2mutau") + ch_3mutau = self.config_inst.get_channel("c3mutau") + # new 2l2tau channels + ch_2e2tau = self.config_inst.get_channel("c2e2tau") + ch_2mu2tau = self.config_inst.get_channel("c2mu2tau") + ch_emu2tau = self.config_inst.get_channel("cemu2tau") + # new 1l3tau + ch_e3tau = self.config_inst.get_channel("ce3tau") + ch_mu3tau = self.config_inst.get_channel("cmu3tau") + # new 4tau channel + ch_4tau = self.config_inst.get_channel("c4tau") + # new 2lss channels + ch_2e0or1tau = self.config_inst.get_channel("c2e0or1tau") + ch_emu0or1tau = self.config_inst.get_channel("cemu0or1tau") + ch_2mu0or1tau = self.config_inst.get_channel("c2mu0or1tau") + + CHANNELS = { + "3e": {"id": ch_3e.id}, + "4e": {"id": ch_4e.id}, + "3mu": {"id": ch_3mu.id}, + "4mu": {"id": ch_4mu.id}, + "2emu": {"id": ch_2emu.id}, + "e2mu": {"id": ch_e2mu.id}, + "3emu": {"id": ch_3emu.id}, + "e3mu": {"id": ch_e3mu.id}, + "2e2mu": {"id": ch_2e2mu.id}, + "3etau": {"id": ch_3etau.id}, + "2emutau": {"id": ch_2emutau.id}, + "e2mutau": {"id": ch_e2mutau.id}, + "3mutau": {"id": ch_3mutau.id}, + "2e2tau": {"id": ch_2e2tau.id}, + "2mu2tau": {"id": ch_2mu2tau.id}, + "emu2tau": {"id": ch_emu2tau.id}, + "e3tau": {"id": ch_e3tau.id}, + "mu3tau": {"id": ch_mu3tau.id}, + "4tau": {"id": ch_4tau.id}, + "2e0or1tau": {"id": ch_2e0or1tau.id}, + "emu0or1tau": {"id": ch_emu0or1tau.id}, + "2mu0or1tau": {"id": ch_2mu0or1tau.id}, + "eormu": {"id": "eormu"}, + } + + # prepare vectors for output vectors + false_mask = (abs(events.event) < 0) + channel_id = np.uint32(1) * false_mask + ok_bdt_eormu = false_mask + ok_bdt_eormu_bveto = false_mask + tau2_isolated = false_mask + leptons_os = false_mask + single_triggered = false_mask + cross_triggered = false_mask + tight_sel = false_mask + trig_match = false_mask + tight_sel_bdt = false_mask + trig_match_bdt = false_mask + sel_electron_mask = full_like(events.Electron.pt, False, dtype=bool) + sel_looseelectron_mask = full_like(events.Electron.pt, False, dtype=bool) + sel_tightelectron_mask = full_like(events.Electron.pt, False, dtype=bool) + sel_muon_mask = full_like(events.Muon.pt, False, dtype=bool) + sel_loosemuon_mask = full_like(events.Muon.pt, False, dtype=bool) + sel_tightmuon_mask = full_like(events.Muon.pt, False, dtype=bool) + sel_tau_mask = full_like(events.Tau.pt, False, dtype=bool) + sel_isotau_mask = full_like(events.Tau.pt, False, dtype=bool) + sel_noid_tau_mask = full_like(events.Tau.pt, False, dtype=bool) + leading_taus = events.Tau[:, :0] + matched_trigger_ids = [] + lepton_part_trigger_ids = [] + + # indices for sorting taus first by isolation, then by pt + # for this, combine iso and pt values, e.g. iso 255 and pt 32.3 -> 2550032.3 + f = 10**(np.ceil(np.log10(ak.max(events.Tau.pt))) + 2) + tau_sorting_key = events.Tau[f"raw{self.config_inst.x.tau_tagger}VSjet"] * f + events.Tau.pt + # tau_sorting_indices = ak.argsort(tau_sorting_key, axis=-1, ascending=False) + + # perform each lepton election step separately per trigger, avoid caching + # sel_kwargs = {**kwargs, "call_force": True} + + # INSERTING THE TWO LOOPS HERE + # ──────────────────────────────────────────────────────────────── + # 1 FIRST LOOP – build and cache masks once per fired trigger + # ──────────────────────────────────────────────────────────────── + + _trig_cache = {} + _tid_tags = {} + e_trig_any = full_like(events.event, False, dtype=bool) # we OR all fired flags for single_e here + mu_trig_any = full_like(events.event, False, dtype=bool) # we OR all fired flags for single_mu here + tau_trig_any = full_like(events.event, False, dtype=bool) + e_match_any = full_like(events.Electron.pt, False, dtype=bool) + mu_match_any = full_like(events.Muon.pt, False, dtype=bool) + + for trigger, fired, leg_masks in trigger_results.x.trigger_data: + + if not ak.any(fired): + continue + + e_mask, e_ctrl, e_veto = self[electron_selection](events, trigger, **kwargs) + mu_mask, mu_ctrl, mu_veto = self[muon_selection](events, trigger, **kwargs) + e_mask_bdt, e_ctrl_bdt, e_veto_bdt = self[electron_selection](events, trigger, ch_key="eormu", **kwargs) + mu_mask_bdt, mu_ctrl_bdt, mu_veto_bdt = self[muon_selection](events, trigger, ch_key="eormu", **kwargs) + tau_mask, tau_trigger_specific_mask, tau_iso_mask, noid_tau_mask = self[tau_selection](events, trigger, e_mask, mu_mask, **kwargs) + + # early study tagger independendt taus + sel_noid_tau_mask = noid_tau_mask + if trigger.has_tag({"single_e"}): + e_match = self[electron_trigger_matching](events, trigger, fired, leg_masks, **kwargs) + e_trig_any = e_trig_any | fired # “any single_e fired in this event?” + e_match_any = e_match_any | e_match # OR electron matching across all single_e tids + else: + # same jagged shape as events.Electron.pt; all False means "no e matched this trigger" + e_match = full_like(events.Electron.pt, False, dtype=bool) + + # muon matching: only for triggers with a muon leg + if trigger.has_tag({"single_mu"}): + mu_match = self[muon_trigger_matching](events, trigger, fired, leg_masks, **kwargs) + mu_trig_any = mu_trig_any | fired # “any single_mu fired in this event?” + mu_match_any = mu_match_any | mu_match + else: + mu_match = full_like(events.Muon.pt, False, dtype=bool) + + if (trigger.has_tag({"cross_tau_tau"}) or trigger.has_tag({"cross_tau_tau_vbf"}) or + trigger.has_tag({"cross_tau_tau_jet"}) or trigger.has_tag({"cross_e_tau"}) or + trigger.has_tag({"cross_mu_tau"})): + tau_match = self[tau_trigger_matching](events, trigger, fired, leg_masks, **kwargs) + tau_trig_any = tau_trig_any | fired + else: + tau_match = full_like(events.Tau.pt, False, dtype=bool) + + tid = trigger.id # caching information particular to any trigger id + _trig_cache.update({ + (tid, "e"): e_mask, (tid, "e_ctrl"): e_ctrl, (tid, "e_veto"): e_veto, + (tid, "mu"): mu_mask, (tid, "mu_ctrl"): mu_ctrl, (tid, "mu_veto"): mu_veto, + (tid, "e_match"): e_match, (tid, "mu_match"): mu_match, + (tid, "tau_mask"): tau_mask, + (tid, "tau_match"): tau_match, + (tid, "tau_iso_mask"): tau_iso_mask, + (tid, "e_ctrl_bdt"): e_ctrl_bdt, (tid, "e_mask_bdt"): e_mask_bdt, (tid, "e_veto_bdt"): e_veto_bdt, + (tid, "mu_ctrl_bdt"): mu_ctrl_bdt, (tid, "mu_mask_bdt"): mu_mask_bdt, (tid, "mu_veto_bdt"): mu_veto_bdt, + }) + + _tid_tags[tid] = set(trigger.tags) + + # Now it is useful to define orthogonal masks: events trigger only on single electrons or single muons + e_only = e_trig_any & ~mu_trig_any # only single_e fired + mu_only = mu_trig_any & ~e_trig_any # only single_mu fired + both_families = e_trig_any & mu_trig_any # both fired + + # Addapted logic for channels with all flavours + e_only_emutau = e_trig_any & ~mu_trig_any & ~tau_trig_any + mu_only_emutau = mu_trig_any & ~e_trig_any & ~tau_trig_any + cross_e_tau_only = tau_trig_any & ~e_trig_any & ~mu_trig_any + single_e_tids = [tid for tid, tags in _tid_tags.items() if "single_e" in tags] + single_mu_tids = [tid for tid, tags in _tid_tags.items() if "single_mu" in tags] + cross_tau_tau_any = [tid for tid, tags in _tid_tags.items() if {"cross_tau_tau", "cross_tau_tau_jet", "cross_tau_tau_vbf"} & tags] + cross_e_tau = [tid for tid, tags in _tid_tags.items() if "cross_e_tau" in tags] + cross_mu_tau = [tid for tid, tags in _tid_tags.items() if "cross_mu_tau" in tags] + + _trig_cache.update({ + # set of events that have triggered at least one single_e trigger + ("fam", "e_trig_any"): e_trig_any, + # set of events that have triggered at least one single_mu trigger + ("fam", "mu_trig_any"): mu_trig_any, + ("fam", "tau_trig_any"): tau_trig_any, + # set of events that have triggered at least one single_e trigger and no one single_mu trigger + ("fam", "e_only"): e_only, + # set of events that have triggered at least one single_mu trigger and no one single_e trigger + ("fam", "mu_only"): mu_only, + # set of events that have triggered at least one singe_e and single_mu trigger + ("fam", "both_families"): both_families, + ("fam", "e_only_emutau"): e_only_emutau, + ("fam", "mu_only_emutau"): mu_only_emutau, + ("fam", "cross_e_tau_only"): cross_e_tau_only, + # Electrons that have matched a single_e trigger object + ("fam", "e_match_any"): e_match_any, + ("fam", "mu_match_any"): mu_match_any, + }) + + # ──────────────────────────────────────────────────────────────── + # 2 SECOND LOOP – evaluate every physics channel once + # ──────────────────────────────────────────────────────────────── + for ch_key, spec in CHANNELS.items(): + + if ch_key not in {"eormu", "3e", "3mu", "2emu", "e2mu", "4e", "4mu", "3emu", "2e2mu", "e3mu", + "3etau", "2e2tau", "e3tau", "2mu2tau", "mu3tau", "3mutau", "2emutau", + "e2mutau", "emu2tau", "4tau", "2e0or1tau", "emu0or1tau", "2mu0or1tau"}: + continue + + if ch_key in {"3e", "4e", "2e0or1tau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("ee"): + trig_ids = single_e_tids + else: + continue + + elif ch_key in {"3mu", "4mu", "2mu0or1tau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("mumu"): + trig_ids = single_mu_tids + else: + continue + + elif ch_key in {"2emu", "e2mu", "2e2mu", "3emu", "e3mu", "emu0or1tau", "eormu"}: + if self.dataset_inst.has_tag("emu_from_e"): + trig_ids = single_e_tids + elif self.dataset_inst.has_tag("emu_from_mu"): + trig_ids = single_mu_tids + elif self.dataset_inst.is_mc: + trig_ids = single_e_tids + single_mu_tids + else: + continue + + elif ch_key in {"3etau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("etau"): + trig_ids = single_e_tids + cross_e_tau + else: + continue + + elif ch_key in {"2e2tau", "e3tau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("etau"): + trig_ids = single_e_tids + cross_e_tau + elif self.dataset_inst.has_tag("tautau"): + trig_ids = cross_tau_tau_any + else: + continue + + elif ch_key in {"3mutau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("mutau"): + trig_ids = single_mu_tids + cross_mu_tau + else: + continue + + elif ch_key in {"2mu2tau", "mu3tau"}: + if self.dataset_inst.is_mc: + trig_ids = single_mu_tids + cross_mu_tau + cross_tau_tau_any + elif self.dataset_inst.has_tag("mutau"): + trig_ids = single_mu_tids + cross_mu_tau + elif self.dataset_inst.has_tag("tautau"): + trig_ids = cross_tau_tau_any + else: + continue + + elif ch_key in {"2emutau", "e2mutau"}: + if self.dataset_inst.is_mc: + trig_ids = single_e_tids + single_mu_tids + cross_e_tau + cross_mu_tau + elif self.dataset_inst.has_tag("etau"): + trig_ids = single_e_tids + cross_e_tau + elif self.dataset_inst.has_tag("mutau"): + trig_ids = single_mu_tids + cross_mu_tau + else: + continue + + elif ch_key in {"emu2tau"}: + if self.dataset_inst.is_mc: + trig_ids = single_e_tids + single_mu_tids + cross_e_tau + cross_mu_tau + cross_tau_tau_any + elif self.dataset_inst.has_tag("etau"): + trig_ids = single_e_tids + cross_e_tau + elif self.dataset_inst.has_tag("mutau"): + trig_ids = single_mu_tids + cross_mu_tau + elif self.dataset_inst.has_tag("tautau"): + trig_ids = cross_tau_tau_any + else: + continue + + elif ch_key in {"4tau"}: + if self.dataset_inst.is_mc or self.dataset_inst.has_tag("tautau"): + trig_ids = cross_tau_tau_any + else: + continue + + else: + continue + + good_evt = ak.zeros_like(events.event, dtype=bool) + + for tid in trig_ids: + e_mask = _trig_cache[(tid, "e")] + e_ctrl = _trig_cache[(tid, "e_ctrl")] + mu_mask = _trig_cache[(tid, "mu")] + mu_ctrl = _trig_cache[(tid, "mu_ctrl")] + e_veto = _trig_cache[(tid, "e_veto")] + mu_veto = _trig_cache[(tid, "mu_veto")] + e_match = _trig_cache[(tid, "e_match")] + mu_match = _trig_cache[(tid, "mu_match")] + tau_mask = _trig_cache[(tid, "tau_mask")] + tau_iso_mask = _trig_cache[(tid, "tau_iso_mask")] + e_ctrl_bdt = _trig_cache[(tid, "e_ctrl_bdt")] + e_veto_bdt = _trig_cache[(tid, "e_veto_bdt")] + e_mask_bdt = _trig_cache[(tid, "e_mask_bdt")] + mu_ctrl_bdt = _trig_cache[(tid, "mu_ctrl_bdt")] + mu_veto_bdt = _trig_cache[(tid, "mu_veto_bdt")] + mu_mask_bdt = _trig_cache[(tid, "mu_mask_bdt")] + + # channel dependent deeptau cuts vs e and mu, taumask has vs jet vvloose + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvvloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.vloose) + ) + + ok = ak.ones_like(events.event, dtype=bool) + + if ch_key == "eormu": + + e_base = ( + (ak.sum(e_veto_bdt, axis=1) >= 1) & + (ak.sum(ch_tau_mask, axis=1) >= 0) + ) + mu_base = ( + (ak.sum(mu_veto_bdt, axis=1) >= 1) & + (ak.sum(ch_tau_mask, axis=1) >= 0) + ) + + base_ok = e_base | mu_base + ok_bdt_eormu = ok_bdt_eormu | base_ok + ok_bdt_eormu_bveto = ok_bdt_eormu + + sel_electron_mask = sel_electron_mask | (e_base & e_ctrl_bdt) + sel_looseelectron_mask = sel_looseelectron_mask | (e_base & e_veto_bdt) + sel_tightelectron_mask = sel_tightelectron_mask | (e_base & e_mask_bdt) + sel_muon_mask = sel_muon_mask | (mu_base & mu_ctrl_bdt) + sel_loosemuon_mask = sel_loosemuon_mask | (mu_base & mu_veto_bdt) + sel_tightmuon_mask = sel_tightmuon_mask | (mu_base & mu_mask_bdt) + + # leptons_os = ak.where(ok_bdt_eormu, False, leptons_os) + tight_ok = (e_base & (ak.sum(e_mask_bdt, axis=1) >= 1)) | (mu_base & (ak.sum(mu_mask_bdt, axis=1) >= 1)) # noqa E501 + tight_sel_bdt = tight_sel_bdt | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl_bdt, axis=1) >= 1) + elif tid in single_mu_tids: + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl_bdt, axis=1) >= 1) + + trig_match_bdt = trig_match_bdt | trig_match_ok + single_triggered = ak.where(trig_match_ok, True, single_triggered) + + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + continue + + elif ch_key == "3e": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 3) & + (ak.sum(e_veto, axis=1) == 3) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + + e_charge = events.Electron.charge[e_ctrl] + chargeok = (np.abs(ak.sum(e_charge, axis=1)) == 1) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(e_mask, axis=1) == 3) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "3mu": + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 3) & + (ak.sum(mu_veto, axis=1) == 3) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs(ak.sum(mu_charge, axis=1)) == 1) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(mu_mask, axis=1) == 3) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2emu": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 2) & + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 1) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 2) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "e2mu": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_ctrl, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 2) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 1) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 1) & (ak.sum(mu_mask, axis=1) == 2)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "4e": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 4) & + (ak.sum(e_veto, axis=1) == 4) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + + e_charge = events.Electron.charge[e_ctrl] + chargeok = (np.abs(ak.sum(e_charge, axis=1)) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(e_mask, axis=1) == 4) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "4mu": + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 4) & + (ak.sum(mu_veto, axis=1) == 4) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs(ak.sum(mu_charge, axis=1)) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(mu_mask, axis=1) == 4) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "3emu": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 3) & + (ak.sum(e_veto, axis=1) == 3) & + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 3) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2e2mu": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 2) & + (ak.sum(mu_ctrl, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 2) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 2) & (ak.sum(mu_mask, axis=1) == 2)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "e3mu": + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_ctrl, axis=1) == 3) & + (ak.sum(mu_veto, axis=1) == 3) & + (ak.sum(ch_tau_mask, axis=1) == 0) + ) + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 1) & (ak.sum(mu_mask, axis=1) == 3)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "3etau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 3) & + (ak.sum(e_veto, axis=1) == 3) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1))) == 0) & (np.abs(ak.sum(e_charge, axis=1)) == 1)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 1) & (ak.sum(e_mask, axis=1) == 3)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & e_only_emutau & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2e2tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 2) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1))) == 0) & (np.abs(ak.sum(e_charge, axis=1)) == 0)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 2) & (ak.sum(e_mask, axis=1) == 2)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & e_only_emutau & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + elif tid in cross_tau_tau_any: + trig_match_ok = base_ok & (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "e3tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 3) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = (np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 3) & (ak.sum(e_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & e_only_emutau & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "3mutau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 3) & + (ak.sum(mu_veto, axis=1) == 3) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(mu_charge, axis=1))) == 0) & (np.abs((ak.sum(mu_charge, axis=1))) == 1)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 1) & + (ak.sum(mu_mask, axis=1) == 3)) + tight_sel = tight_sel | tight_ok + + if tid in single_mu_tids: + trig_match_ok = base_ok & mu_only_emutau & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2mu2tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 2) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(mu_charge, axis=1))) == 0) & (np.abs(ak.sum(mu_charge, axis=1)) == 0)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 2) & (ak.sum(mu_mask, axis=1) == 2)) + tight_sel = tight_sel | tight_ok + + if tid in single_mu_tids: + trig_match_ok = base_ok & mu_only_emutau & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + elif tid in cross_tau_tau_any: + trig_match_ok = base_ok & (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "mu3tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 3) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = (np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(mu_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 3) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_mu_tids: + trig_match_ok = base_ok & mu_only_emutau & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2emutau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 2) & + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(ch_tau_mask, axis=1) == 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 0) & + (np.abs((ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 1)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 1) & + (ak.sum(e_mask, axis=1) == 2) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in single_mu_tids: + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "e2mutau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_ctrl, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 2) & + (ak.sum(ch_tau_mask, axis=1) == 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 0) & + (np.abs((ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 1)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 1) & + (ak.sum(e_mask, axis=1) == 1) & (ak.sum(mu_mask, axis=1) == 2)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in single_mu_tids: + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "emu2tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(ch_tau_mask, axis=1) == 2) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = ((np.abs((ak.sum(tau_charge, axis=1)) + + (ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 0) & + (np.abs((ak.sum(e_charge, axis=1)) + (ak.sum(mu_charge, axis=1))) == 0)) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 2) & + (ak.sum(e_mask, axis=1) == 1) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in single_mu_tids: + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + elif tid in cross_e_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(e_match & e_ctrl, axis=1) >= 1) + ) + if_mu_fired = base_ok & mu_trig_any & (ak.sum(mu_match_any & mu_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(mu_trig_any, trig_match_ok & if_mu_fired, trig_match_ok) + elif tid in cross_mu_tau: + trig_match_ok = base_ok & ( + (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) & + (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + ) + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + elif tid in cross_tau_tau_any: + trig_match_ok = base_ok & (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "4tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vvloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.vloose) + ) + + base_ok = ( + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) == 4) + ) + + ok = ak.where(base_ok, ok, False) + + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + tau_charge = events.Tau.charge[ch_tau_mask] + chargeok = (np.abs((ak.sum(tau_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum((ch_tau_mask & tau_iso_mask), axis=1) == 4) + tight_sel = tight_sel | tight_ok + + if tid in cross_tau_tau_any: + trig_match_ok = base_ok & (ak.sum(tau_match & ch_tau_mask, axis=1) >= 1) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2e0or1tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) <= 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + chargeok = (np.abs(ak.sum(e_charge, axis=1)) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(e_mask, axis=1) == 2) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "2mu0or1tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(mu_ctrl, axis=1) == 2) & + (ak.sum(mu_veto, axis=1) == 2) & + (ak.sum(e_veto, axis=1) == 0) & + (ak.sum(ch_tau_mask, axis=1) <= 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs(ak.sum(mu_charge, axis=1)) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & (ak.sum(mu_mask, axis=1) == 2) + tight_sel = tight_sel | tight_ok + + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + elif ch_key == "emu0or1tau": + + ch_tau_mask = ( + tau_mask & + (events.Tau[get_tau_tagger("e")] >= wp_config.tau_vs_e.vloose) & + (events.Tau[get_tau_tagger("mu")] >= wp_config.tau_vs_mu.tight) + ) + + base_ok = ( + (ak.sum(e_ctrl, axis=1) == 1) & + (ak.sum(e_veto, axis=1) == 1) & + (ak.sum(mu_ctrl, axis=1) == 1) & + (ak.sum(mu_veto, axis=1) == 1) & + (ak.sum(ch_tau_mask, axis=1) <= 1) + ) + + ok = ak.where(base_ok, ok, False) + + sel_electron_mask = sel_electron_mask | (ok & e_ctrl) + sel_looseelectron_mask = sel_looseelectron_mask | (ok & e_veto) + sel_tightelectron_mask = sel_tightelectron_mask | (ok & e_mask) + sel_muon_mask = sel_muon_mask | (ok & mu_ctrl) + sel_loosemuon_mask = sel_loosemuon_mask | (ok & mu_veto) + sel_tightmuon_mask = sel_tightmuon_mask | (ok & mu_mask) + sel_tau_mask = sel_tau_mask | (ok & ch_tau_mask) + sel_isotau_mask = sel_isotau_mask | (ok & (ch_tau_mask & tau_iso_mask)) + + e_charge = events.Electron.charge[e_ctrl] + mu_charge = events.Muon.charge[mu_ctrl] + chargeok = (np.abs((ak.sum(e_charge, axis=1) + ak.sum(mu_charge, axis=1))) == 0) + leptons_os = ak.where(ok, chargeok, leptons_os) + + tight_ok = ok & ((ak.sum(e_mask, axis=1) == 1) & (ak.sum(mu_mask, axis=1) == 1)) + tight_sel = tight_sel | tight_ok + + if tid in single_e_tids: + # emu_from_e — accept ONLY events with e_only (anti-overlap) + trig_match_ok = base_ok & e_only & (ak.sum(e_match & e_ctrl, axis=1) >= 1) + + elif tid in single_mu_tids: + # emu_from_mu — allow both_families; the matching/logic below handles e-side + trig_match_ok = base_ok & (ak.sum(mu_match & mu_ctrl, axis=1) >= 1) + # for events with both triggers firing: + if_e_fired = base_ok & e_trig_any & (ak.sum(e_match_any & e_ctrl, axis=1) >= 1) + trig_match_ok = ak.where(e_trig_any, trig_match_ok & if_e_fired, trig_match_ok) + + trig_match = trig_match | trig_match_ok + + single_triggered = ak.where(trig_match_ok, True, single_triggered) + ids = ak.where(trig_match_ok, np.float32(tid), np.float32(np.nan)) + matched_trigger_ids.append(ak.singletons(ak.nan_to_none(ids))) + + # accumulate over triggers + good_evt = ak.where(ok, True, good_evt) + + if ch_key != "eormu": + channel_id = update_channel_ids(events, channel_id, spec["id"], good_evt) + + # some final type conversions + channel_id = ak.values_astype(channel_id, np.uint32) + leptons_os = ak.fill_none(leptons_os, False) + tight_sel = ak.fill_none(tight_sel, False) + tight_sel_bdt = ak.fill_none(tight_sel_bdt, False) + trig_match = ak.fill_none(trig_match, False) + trig_match_bdt = ak.fill_none(trig_match_bdt, False) + ok_bdt_eormu = ak.fill_none(ok_bdt_eormu, False) + ok_bdt_eormu_bveto = ak.fill_none(ok_bdt_eormu_bveto, False) + + # concatenate matched trigger ids + empty_ids = ak.singletons(full_like(events.event, 0, dtype=np.int32), axis=0)[:, :0] + merge_ids = lambda ids: ak.values_astype(ak.concatenate(ids, axis=1), np.int32) if ids else empty_ids + matched_trigger_ids = merge_ids(matched_trigger_ids) + lepton_part_trigger_ids = merge_ids(lepton_part_trigger_ids) + + # save new columns + events = set_ak_column(events, "channel_id", channel_id) + events = set_ak_column(events, "leptons_os", leptons_os) + events = set_ak_column(events, "tau2_isolated", tau2_isolated) + events = set_ak_column(events, "single_triggered", single_triggered) + events = set_ak_column(events, "cross_triggered", cross_triggered) + events = set_ak_column(events, "matched_trigger_ids", matched_trigger_ids) + + # new columns for lepton bdt + events = set_ak_column(events, "ok_bdt_eormu", ok_bdt_eormu) + events = set_ak_column(events, "ok_bdt_eormu_bveto", ok_bdt_eormu_bveto) + events = set_ak_column(events, "tight_sel_bdt", tight_sel_bdt) + events = set_ak_column(events, "trig_match_bdt", trig_match_bdt) + + # new selections for the physical channels + events = set_ak_column(events, "tight_sel", tight_sel) + events = set_ak_column(events, "trig_match", trig_match) + + # convert lepton masks to sorted indices (pt for e/mu, iso for tau) + sel_electron_indices = sorted_indices_from_mask(sel_electron_mask, events.Electron.pt, ascending=False) + sel_muon_indices = sorted_indices_from_mask(sel_muon_mask, events.Muon.pt, ascending=False) + sel_tau_indices = sorted_indices_from_mask(sel_tau_mask, tau_sorting_key, ascending=False) + sel_noid_tau_indicies = sorted_indices_from_mask(sel_noid_tau_mask, events.Tau.pt, ascending=False) + + sel_looseelectron_indices = sorted_indices_from_mask(sel_looseelectron_mask, events.Electron.pt, ascending=False) + sel_loosemuon_indices = sorted_indices_from_mask(sel_loosemuon_mask, events.Muon.pt, ascending=False) + + sel_tightelectron_indices = sorted_indices_from_mask(sel_tightelectron_mask, events.Electron.pt, ascending=False) + sel_tightmuon_indices = sorted_indices_from_mask(sel_tightmuon_mask, events.Muon.pt, ascending=False) + sel_isotau_indices = sorted_indices_from_mask(sel_isotau_mask, tau_sorting_key, ascending=False) + + return events, SelectionResult( + steps={ + "lepton": (channel_id != 0) | ok_bdt_eormu | ok_bdt_eormu_bveto, + }, + objects={ + "Electron": { + "Electron": sel_electron_indices, + "ElectronLoose": sel_looseelectron_indices, + "ElectronTight": sel_tightelectron_indices, + }, + "Muon": { + "Muon": sel_muon_indices, + "MuonLoose": sel_loosemuon_indices, + "MuonTight": sel_tightmuon_indices, + }, + "Tau": { + "Tau": sel_tau_indices, + "TauIso": sel_isotau_indices, + "TauNoID": sel_noid_tau_indicies, + }, + }, + aux={ + # save the selected lepton pair for the duration of the selection + # multiplication of a coffea particle with 1 yields the lorentz vector + "lepton_pair": ak.concatenate( + [ + events.Electron[sel_electron_indices] * 1, + events.Muon[sel_muon_indices] * 1, + events.Tau[sel_tau_indices] * 1, + ], + axis=1, + )[:, :2], + + # save the matched trigger ids of the trigger with jet legs for the duration of the selection + # these will be updated in the jet selection and then stored in the matched_trigger_ids column + "lepton_part_trigger_ids": lepton_part_trigger_ids, + # save the leading taus for the duration of the selection + # exactly 1 for etau/mutau and exactly 2 for tautau + "leading_taus": leading_taus, + "eles": sel_electron_indices, + "mus": sel_muon_indices, + "taus": sel_tau_indices, + # new collections + "eles_loose": sel_looseelectron_indices, + "mus_loose": sel_loosemuon_indices, + "eles_tight": sel_tightelectron_indices, + "mus_tight": sel_tightmuon_indices, + "taus_iso": sel_isotau_indices, + }, + ) + + +@lepton_selection.init +def lepton_selection_init(self: Selector, **kwargs) -> None: + # add column to load the raw tau tagger score + self.uses.add(f"Tau.raw{self.config_inst.x.tau_tagger}VSjet") diff --git a/hbt/selection/trigger.py b/multilepton/selection/trigger.py similarity index 93% rename from hbt/selection/trigger.py rename to multilepton/selection/trigger.py index 48ffb667..0486b29e 100644 --- a/hbt/selection/trigger.py +++ b/multilepton/selection/trigger.py @@ -18,7 +18,7 @@ "TrigObj.{id,pt,eta,phi,filterBits}", }, produces={ - "trigger_ids", + "fired_trigger_ids", }, exposed=True, ) @@ -84,7 +84,7 @@ def trigger_selection( # store the fired trigger ids trigger_ids = ak.concatenate(trigger_ids, axis=1) - events = set_ak_column(events, "trigger_ids", trigger_ids, value_type=np.int32) + events = set_ak_column(events, "fired_trigger_ids", trigger_ids, value_type=np.int32) return events, SelectionResult( steps={ @@ -97,10 +97,7 @@ def trigger_selection( @trigger_selection.init -def trigger_selection_init(self: Selector) -> None: - if getattr(self, "dataset_inst", None) is None: - return - +def trigger_selection_init(self: Selector, **kwargs) -> None: # full used columns self.uses |= { opt(trigger.name) diff --git a/multilepton/tasks/__init__.py b/multilepton/tasks/__init__.py new file mode 100644 index 00000000..d8675132 --- /dev/null +++ b/multilepton/tasks/__init__.py @@ -0,0 +1,9 @@ +# coding: utf-8 +# flake8: noqa + +# provisioning imports +import multilepton.tasks.base +import multilepton.tasks.stats +import multilepton.tasks.studies +#import multilepton.tasks.fastLFNsfetch + diff --git a/multilepton/tasks/base.py b/multilepton/tasks/base.py new file mode 100644 index 00000000..e17cf60b --- /dev/null +++ b/multilepton/tasks/base.py @@ -0,0 +1,19 @@ +# coding: utf-8 + +""" +Custom base tasks for HH -> Multileptons. +""" + +import luigi + +from columnflow.tasks.framework.base import BaseTask + +class MultileptonTask(BaseTask): + + task_namespace = "multilepton" + + # add a parameter that can be set on the command line + limit_dataset_files = luigi.IntParameter( + default=-1, # -1 means "no limit" + description="Limit number of dataset files to process. -1 means no limit." + ) diff --git a/multilepton/tasks/fastLFNsfetch.py b/multilepton/tasks/fastLFNsfetch.py new file mode 100644 index 00000000..39454ed7 --- /dev/null +++ b/multilepton/tasks/fastLFNsfetch.py @@ -0,0 +1,33 @@ +import luigi + +from columnflow.tasks.plotting import PlotVariables1D as CFPlotVariables1D + +from multilepton.tasks.base import MultileptonTask + + +class PlotVariables1D(MultileptonTask, CFPlotVariables1D): + """ + Wrapper around ColumnFlow's PlotVariables1D to include the MultileptonTask parameters. + """ + task_namespace = "cf" + + def run(self): + self.logger.info(f"Running PlotVariables1D with limit_dataset_files = {self.limit_dataset_files}") + # Pass it to the config factory + for config_name, factory in self.analysis.configs.items(): + factory_kwargs = {"limit_dataset_files": self.limit_dataset_files} + config_obj = factory(**factory_kwargs) + + # Now pass the command-line parameter to the config factories + for module, attr, name, cid in datasets: + add_lazy_config( + campaign_module=module, + campaign_attr=attr, + config_name=name, + config_id=cid, + add_limited=False, + limit_dataset_files=self.limit_dataset_files + ) + + ## Continue with normal CF run + super().run() diff --git a/hbt/tasks/parameters.py b/multilepton/tasks/parameters.py similarity index 100% rename from hbt/tasks/parameters.py rename to multilepton/tasks/parameters.py diff --git a/hbt/tasks/stats.py b/multilepton/tasks/stats.py similarity index 93% rename from hbt/tasks/stats.py rename to multilepton/tasks/stats.py index fde4dc12..1ab3d7e1 100644 --- a/hbt/tasks/stats.py +++ b/multilepton/tasks/stats.py @@ -4,30 +4,25 @@ Tasks to print various statistics. """ +import tabulate import functools - import law from columnflow.tasks.framework.base import ConfigTask -from columnflow.util import dev_sandbox -from hbt.tasks.base import HBTTask -from hbt.tasks.parameters import table_format_param +from multilepton.tasks.base import MultileptonTask +from multilepton.tasks.parameters import table_format_param -class ListDatasetStats(HBTTask, ConfigTask, law.tasks.RunOnceTask): +class ListDatasetStats(MultileptonTask, ConfigTask, law.tasks.RunOnceTask): + single_config = True table_format = table_format_param - # no version required version = None - sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox")) - def run(self): - import tabulate tabulate.PRESERVE_WHITESPACE = True - # color helpers green = functools.partial(law.util.colored, color="green") green_bright = functools.partial(law.util.colored, color="green", style="bright") @@ -49,7 +44,6 @@ def get_color(dataset_inst): # headers headers = ["Dataset", "Files", "Events"] - # content rows = [] sum_files_s_nonres, sum_events_s_nonres = 0, 0 @@ -57,6 +51,7 @@ def get_color(dataset_inst): sum_files_b_nom, sum_events_b_nom = 0, 0 sum_files_b_syst, sum_events_b_syst = 0, 0 sum_files_data, sum_events_data = 0, 0 + for dataset_inst in self.config_inst.datasets: col = get_color(dataset_inst) # nominal info @@ -74,14 +69,17 @@ def get_color(dataset_inst): else: sum_files_b_nom += dataset_inst.n_files sum_events_b_nom += dataset_inst.n_events + # potential shifts for shift_name, info in dataset_inst.info.items(): if shift_name == "nominal" or shift_name not in self.config_inst.shifts: continue + rows.append([yellow_bright(f" → {shift_name}"), info.n_files, info.n_events]) # increment sums sum_files_b_syst += info.n_files sum_events_b_syst += info.n_events + # overall sum_files_all = ( sum_files_s_nonres + sum_files_s_res + sum_files_b_nom + sum_files_b_syst + @@ -91,7 +89,6 @@ def get_color(dataset_inst): sum_events_s_nonres + sum_events_s_res + sum_events_b_nom + sum_events_b_syst + sum_events_data ) - # sums rows.append([bright("total signal (non-res.)"), sum_files_s_nonres, sum_events_s_nonres]) rows.append([bright("total signal (res.)"), sum_files_s_res, sum_events_s_res]) @@ -102,7 +99,7 @@ def get_color(dataset_inst): rows.append([bright("total data"), sum_files_data, sum_events_data]) if sum_files_all or sum_events_all: rows.append([bright("total"), sum_files_all, sum_events_all]) - + # print the table table = tabulate.tabulate(rows, headers=headers, tablefmt=self.table_format, intfmt="_") self.publish_message(table) diff --git a/hbt/tasks/studies/__init__.py b/multilepton/tasks/studies/__init__.py similarity index 56% rename from hbt/tasks/studies/__init__.py rename to multilepton/tasks/studies/__init__.py index 475e8a5d..087fc041 100644 --- a/hbt/tasks/studies/__init__.py +++ b/multilepton/tasks/studies/__init__.py @@ -2,4 +2,4 @@ # flake8: noqa # provisioning imports -import hbt.tasks.studies.triggers +import multilepton.tasks.studies.triggers diff --git a/hbt/tasks/studies/triggers.py b/multilepton/tasks/studies/triggers.py similarity index 87% rename from hbt/tasks/studies/triggers.py rename to multilepton/tasks/studies/triggers.py index ac0eff6f..b91e181c 100644 --- a/hbt/tasks/studies/triggers.py +++ b/multilepton/tasks/studies/triggers.py @@ -14,22 +14,19 @@ from columnflow.tasks.external import GetDatasetLFNs from columnflow.util import ensure_proxy, dev_sandbox -from hbt.tasks.base import HBTTask -from hbt.tasks.parameters import table_format_param, escape_markdown_param +from multilepton.tasks.base import MultileptonTask +from multilepton.tasks.parameters import table_format_param, escape_markdown_param logger = law.logger.get_logger(__name__) -class HBTTriggerTask(HBTTask): +class MultileptonTriggerTask(MultileptonTask): """ Base task for trigger related studies. """ - sandbox = dev_sandbox(law.config.get("analysis", "default_columnar_sandbox")) - version = None - lfn_indices = law.CSVParameter( cls=luigi.IntParameter, default=(0,), @@ -39,20 +36,16 @@ class HBTTriggerTask(HBTTask): ) -class PrintTriggersInFile(HBTTriggerTask, DatasetTask, law.tasks.RunOnceTask): +class PrintTriggersInFile(MultileptonTriggerTask, DatasetTask, law.tasks.RunOnceTask): """ Prints a list of all HLT paths contained in the first file of a dataset. - Example: - - > law run hbt.PrintTriggersInFile --dataset hh_ggf_bbtautau_madgraph + > law run multilepton.PrintTriggersInFile --dataset hh_ggf_bbtautau_madgraph """ - # upstream requirements reqs = Requirements( GetDatasetLFNs=GetDatasetLFNs, ) - def requires(self): return self.reqs.GetDatasetLFNs.req(self) @@ -65,7 +58,6 @@ def run(self): logger.warning("multiple LFN indices are not supported in this task, using the first one") lfn_index = self.lfn_indices[0] logger.info(f"Printing HLT paths for LFN index {lfn_index}") - # prepare input input_file = list(self.requires().iter_nano_files(self, lfn_indices=[lfn_index]))[0][1] @@ -78,28 +70,22 @@ def run(self): key[4:] for key in nano_file["Events"].keys() if key.startswith("HLT_") ] - # print them print("") print("\n".join(hlt_paths)) print("") -class PrintExistingConfigTriggers(HBTTriggerTask, DatasetsProcessesMixin, law.tasks.RunOnceTask): +class PrintExistingConfigTriggers(MultileptonTriggerTask, DatasetsProcessesMixin, law.tasks.RunOnceTask): """ Prints a table showing datasets (one per column) and contained HLT paths (one per row). - Example: - - > law run hbt.PrintExistingConfigTriggers --datasets "hh_ggf_bbtautau_madgraph,data_mu_{b,c,d,e,f}" + > law run multilepton.PrintExistingConfigTriggers --datasets "hh_ggf_bbtautau_madgraph,data_mu_{b,c,d,e,f}" """ - table_format = table_format_param escape_markdown = escape_markdown_param - processes = None allow_empty_processes = True - # upstream requirements reqs = Requirements( GetDatasetLFNs=GetDatasetLFNs, @@ -118,11 +104,9 @@ def run(self): from tabulate import tabulate fmt = law.util.escape_markdown if self.escape_markdown else (lambda s: s) - # table data header = ["HLT path"] rows = [[fmt(trigger.hlt_field)] for trigger in self.config_inst.x.triggers] - lfn_indices = list(set(self.lfn_indices)) logger.info(f"Printing HLT paths for LFN indices: {lfn_indices}") @@ -145,7 +129,6 @@ def run(self): header.append(fmt(f"{dataset}{postfix}")) for trigger, row in zip(self.config_inst.x.triggers, rows): row.append(int(trigger.name in hlt_paths)) - print("") print(tabulate(rows, headers=header, tablefmt=self.table_format)) print("") diff --git a/multilepton/util.py b/multilepton/util.py new file mode 100644 index 00000000..4033646e --- /dev/null +++ b/multilepton/util.py @@ -0,0 +1,251 @@ +# coding: utf-8 + +""" +Helpful utils. +""" + +from __future__ import annotations + +__all__ = [] + +import functools + +from columnflow.types import Any +from columnflow.columnar_util import ArrayFunction, deferred_column +from columnflow.util import maybe_import + +np = maybe_import("numpy") +ak = maybe_import("awkward") + + +@deferred_column +def IF_DATA(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.dataset_inst.is_data else None + + +@deferred_column +def IF_MC(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.dataset_inst.is_mc else None + + +@deferred_column +def IF_NANO_V9(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version == 9 else None + + +@deferred_column +def IF_NANO_V11(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version == 11 else None + + +@deferred_column +def IF_NANO_V12(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version == 12 else None + + +@deferred_column +def IF_NANO_V14(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version == 14 else None + + +@deferred_column +def IF_NANO_V15(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version == 15 else None + + +@deferred_column +def IF_NOT_NANO_V15(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version != 15 else None + + +@deferred_column +def IF_NANO_GE_V10(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version >= 10 else None + + +@deferred_column +def IF_NANO_GE_V14(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.version >= 14 else None + + +@deferred_column +def IF_RUN_2(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.run == 2 else None + + +@deferred_column +def IF_RUN_3(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if func.config_inst.campaign.x.run == 3 else None + + +@deferred_column +def IF_RUN_3_NOT_NANO_V15(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.version != 15) else None + + +@deferred_column +def IF_RUN_3_2022(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.year == 2022) else None + + +@deferred_column +def IF_RUN_3_2023(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.year == 2023) else None + + +@deferred_column +def IF_RUN_3_2024(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.year == 2024) else None + + +@deferred_column +def IF_RUN_3_22_23(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + return self.get() if (func.config_inst.campaign.x.run == 3 and func.config_inst.campaign.x.year in {2022, 2023}) else None # noqa: E501 + + +def IF_DATASET_HAS_TAG(*args, negate: bool = False, **kwargs) -> ArrayFunction.DeferredColumn: + @deferred_column + def deferred( + self: ArrayFunction.DeferredColumn, + func: ArrayFunction, + ) -> Any | set[Any]: + return self.get() if func.dataset_inst.has_tag(*args, **kwargs) is not negate else None + + return deferred + + +IF_DATASET_NOT_HAS_TAG = functools.partial(IF_DATASET_HAS_TAG, negate=True) + +IF_DATASET_HAS_LHE_WEIGHTS = IF_DATASET_NOT_HAS_TAG("no_lhe_weights") +IF_DATASET_HAS_TOP = IF_DATASET_HAS_TAG("has_top") +IF_DATASET_HAS_HIGGS = IF_DATASET_HAS_TAG("has_higgs") +IF_DATASET_IS_TT = IF_DATASET_HAS_TAG("ttbar") +IF_DATASET_IS_DY = IF_DATASET_HAS_TAG("dy") +IF_DATASET_IS_DY_MADGRAPH = IF_DATASET_HAS_TAG("dy_madgraph") +IF_DATASET_IS_DY_AMCATNLO = IF_DATASET_HAS_TAG("dy_amcatnlo") +IF_DATASET_IS_DY_POWHEG = IF_DATASET_HAS_TAG("dy_powheg") +IF_DATASET_IS_W_LNU = IF_DATASET_HAS_TAG("w_lnu") + + +@deferred_column +def MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + met_name = func.config_inst.x("met_name", None) + if not met_name: + raise Exception("'met_name' has not been configured") + return f"{met_name}.{self.get()}" + + +@deferred_column +def RAW_MET_COLUMN(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: + raw_met_name = func.config_inst.x("raw_met_name", None) + if not raw_met_name: + raise Exception("'raw_met_name' has not been configured") + return f"{raw_met_name}.{self.get()}" + + +def hash_events(arr: np.ndarray) -> np.ndarray: + """ + Helper function to create a hash value from the event, run and luminosityBlock columns. + The values are padded to specific lengths and concatenated to a single integer. + """ + import awkward as ak + + def assert_value(arr: np.ndarray, field: str, max_value: int) -> None: + """ + Helper function to check if a column does not exceed a maximum value. + """ + digits = len(str(arr[field].to_numpy().max())) + assert digits <= max_value, f"{field} digit count is {digits} and exceed max value {max_value}" + + max_digits_run = 6 + max_digits_luminosityBlock = 6 + max_digits_event = 8 + assert_value(arr, "run", max_digits_run) + assert_value(arr, "luminosityBlock", max_digits_luminosityBlock) + assert_value(arr, "event", max_digits_event) + + max_digits_hash = max_digits_event + max_digits_luminosityBlock + max_digits_run + assert max_digits_hash <= 20, "sum of digits exceeds uint64" + + # upcast to uint64 to avoid overflow + return ( + ak.values_astype(arr.run, np.uint64) * 10**(max_digits_luminosityBlock + max_digits_event) + + ak.values_astype(arr.luminosityBlock, np.uint64) * 10**max_digits_event + + ak.values_astype(arr.event, np.uint64) + ) + + +def with_type(type_name: str, data: dict[str, ak.Array], behavior: dict | None = None) -> ak.Array: + """ + Attaches a named behavior *type_name* to the structured *data* and returns an array with that behavior. The source + behavior is extracted from the *behavior* mapping, which is extracted from the first data column if not provided. + + :param type_name: The name of the type to attach. + :param data: The structured data to attach the behavior to. + :param behavior: The behavior to attach, defaults to the first data column's behavior. + :return: Array with the specified behavior. + """ + # extract the behavior from the first data column + if behavior is None: + behavior = next(iter(data.values())).behavior + return ak.Array(data, with_name=type_name, behavior=behavior) + + +def create_lvector_exyz(e: ak.Array, px: ak.Array, py: ak.Array, pz: ak.Array, behavior: dict | None = None) -> ak.Array: + """ + Creates a Lorentz vector with the given energy and momentum components. + + :param e: Energy component. + :param px: x-component of momentum. + :param py: y-component of momentum. + :param pz: z-component of momentum. + :return: Lorentz vector as an awkward array. + """ + data = { + "e": e, + "px": px, + "py": py, + "pz": pz, + } + return with_type("PtEtaPhiMLorentzVector", data, behavior=behavior) + + +def create_lvector_xyz(px: ak.Array, py: ak.Array, pz: ak.Array, behavior: dict | None = None) -> ak.Array: + """ + Creates a Lorentz vector with the given momentum components and zero mass. + + :param px: x-component of momentum. + :param py: y-component of momentum. + :param pz: z-component of momentum. + :return: Lorentz vector as an awkward array. + """ + p = (px**2 + py**2 + pz**2)**0.5 + return create_lvector_exyz(p, px, py, pz, behavior=behavior) + + +_uppercase_wps = { + "vvvvloose": "VVVVLoose", + "vvvloose": "VVVLoose", + "vvloose": "VVLoose", + "vloose": "VLoose", + "loose": "Loose", + "medium": "Medium", + "tight": "Tight", + "vtight": "VTight", + "vvtight": "VVTight", + "vvvtight": "VVVTight", + "vvvvtight": "VVVVTight", +} + + +def uppercase_wp(wp: str) -> str: + """ + Converts a working point string to uppercase format. + + :param wp: Working point string. + :return: Uppercase working point string. + """ + wp = wp.lower() + if wp not in _uppercase_wps: + raise ValueError(f"unknown working point for uppercase conversion: {wp}") + return _uppercase_wps[wp] diff --git a/run_multilepton.sh b/run_multilepton.sh new file mode 100755 index 00000000..e86e0fc4 --- /dev/null +++ b/run_multilepton.sh @@ -0,0 +1,39 @@ +law run cf.PlotVariables1D \ + --version testnanov15_2024__ver4 \ + --producers default \ + --variables nmu \ + --categories ceormu \ + --datasets data_mu_e \ + --view-cmd imgcat \ + --configs 24_v15_central \ + --parallel-jobs 200 \ + $1 + + #--version prod1 \ + #--datasets all_backgrounds \ + #--workflow slurm \ + + # FIXME to test out the functionality of these + #--limit-dataset-files 1 \ + #--log-file slurm + # --workers 6 + # --pilot + +# options: + +# --configs: +# 22preEE_v14_private, 22postEE_v14_private, 23preBPix_v14_private, 23postBPix_v14_private +# 22preEE_v12_central, 22postEE_v12_central, 23preBPix_v12_central, 23postBPix_v12_central, 24_v15_central + +# --processes: +# all_data, all_signals, all_backgrounds, +# resonant, nonresonant, nonresonant_ggf, nonresonant_vbf +# ggf_4v, ggf_4t, ggf_2t2v, vbf_4v, vbf_4t, vbf_2t2v +# 4v, 4t, 2t2v + +# --datasets: +# all_data, all_backgrounds, all_signals +# ttbar, single_top, dy, wjets, qcd, zz, single_higgs, vvv, others + + + diff --git a/sandboxes/clib.txt b/sandboxes/clib.txt deleted file mode 100644 index 15684f6b..00000000 --- a/sandboxes/clib.txt +++ /dev/null @@ -1,4 +0,0 @@ -# version 1 - -# "correction summary" does not work with correctionlib >= 2.6, so keep an old version -correctionlib==2.5.0 diff --git a/sandboxes/columnar_tf.txt b/sandboxes/columnar_tf.txt deleted file mode 100644 index 7c7aab36..00000000 --- a/sandboxes/columnar_tf.txt +++ /dev/null @@ -1,5 +0,0 @@ -# version 9 - --r ../modules/columnflow/sandboxes/columnar.txt - -tensorflow~=2.16.1 diff --git a/sandboxes/columnar_torch.txt b/sandboxes/columnar_torch.txt deleted file mode 100644 index e8838457..00000000 --- a/sandboxes/columnar_torch.txt +++ /dev/null @@ -1,7 +0,0 @@ -# version 1 - --r ../modules/columnflow/sandboxes/columnar.txt - -torch~=2.5.1 -lightning~=2.4.0 -torchmetrics~=1.6.0 diff --git a/sandboxes/dev.txt b/sandboxes/dev.txt new file mode 100644 index 00000000..f61b7f63 --- /dev/null +++ b/sandboxes/dev.txt @@ -0,0 +1,6 @@ +# version 1 + +tensorflow~=2.16.1 +torch~=2.8.0 +torchmetrics~=1.8.2 +torchdata~=0.11.0 diff --git a/sandboxes/multilepton.txt b/sandboxes/multilepton.txt new file mode 100644 index 00000000..ef764b1c --- /dev/null +++ b/sandboxes/multilepton.txt @@ -0,0 +1,11 @@ +# version 1 + +-r ../modules/columnflow/sandboxes/columnar.txt + +# upgarde/downgrade to these minimum versions + +# upgrade/downgrade to these exact versions +pyarrow==18.1.0 +pandas==2.3.2 +optree==0.16.0 +boost-histogram==1.5.2 diff --git a/sandboxes/venv_clib.sh b/sandboxes/venv_clib.sh deleted file mode 100644 index 30acf423..00000000 --- a/sandboxes/venv_clib.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -# Script that sets up a virtual env in $CF_VENV_PATH. -# For more info on functionality and parameters, see the generic setup script _setup_venv.sh. - -action() { - local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" - local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" - local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" - - # set variables and source the generic venv setup - export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}" - export CF_VENV_NAME="$( basename "${this_file%.sh}" )" - export CF_VENV_REQUIREMENTS="${this_dir}/clib.txt" - - source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@" -} -action "$@" diff --git a/sandboxes/venv_columnar_torch.sh b/sandboxes/venv_columnar_torch.sh deleted file mode 100644 index 589742c0..00000000 --- a/sandboxes/venv_columnar_torch.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -# Script that sets up a virtual env in $CF_VENV_PATH. -# For more info on functionality and parameters, see the generic setup script _setup_venv.sh. - -action() { - local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" - local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" - local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" - - # set variables and source the generic venv setup - export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}" - export CF_VENV_NAME="$( basename "${this_file%.sh}" )" - export CF_VENV_REQUIREMENTS="${this_dir}/columnar_torch.txt" - - source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@" -} -action "$@" diff --git a/sandboxes/venv_columnar_torch_dev.sh b/sandboxes/venv_columnar_torch_dev.sh deleted file mode 100644 index 27c35e68..00000000 --- a/sandboxes/venv_columnar_torch_dev.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -# Script that sets up a virtual env in $CF_VENV_PATH. -# For more info on functionality and parameters, see the generic setup script _setup_venv.sh. - -action() { - local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" - local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" - local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" - - # set variables and source the generic venv setup - export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}" - export CF_VENV_NAME="$( basename "${this_file%.sh}" )" - export CF_VENV_REQUIREMENTS="${this_dir}/columnar_torch.txt,${CF_BASE}/sandboxes/dev.txt" - - source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@" -} -action "$@" diff --git a/sandboxes/venv_columnar_tf.sh b/sandboxes/venv_multilepton.sh old mode 100644 new mode 100755 similarity index 91% rename from sandboxes/venv_columnar_tf.sh rename to sandboxes/venv_multilepton.sh index 13045ae6..1692733e --- a/sandboxes/venv_columnar_tf.sh +++ b/sandboxes/venv_multilepton.sh @@ -11,7 +11,7 @@ action() { # set variables and source the generic venv setup export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}" export CF_VENV_NAME="$( basename "${this_file%.sh}" )" - export CF_VENV_REQUIREMENTS="${this_dir}/columnar_tf.txt" + export CF_VENV_REQUIREMENTS="${this_dir}/multilepton.txt" source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@" diff --git a/sandboxes/venv_columnar_tf_dev.sh b/sandboxes/venv_multilepton_dev.sh old mode 100644 new mode 100755 similarity index 91% rename from sandboxes/venv_columnar_tf_dev.sh rename to sandboxes/venv_multilepton_dev.sh index 1d4d723b..5d3969a4 --- a/sandboxes/venv_columnar_tf_dev.sh +++ b/sandboxes/venv_multilepton_dev.sh @@ -11,7 +11,7 @@ action() { # set variables and source the generic venv setup export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}" export CF_VENV_NAME="$( basename "${this_file%.sh}" )" - export CF_VENV_REQUIREMENTS="${this_dir}/columnar_tf.txt,${CF_BASE}/sandboxes/dev.txt" + export CF_VENV_REQUIREMENTS="${this_dir}/multilepton.txt,${CF_BASE}/sandboxes/dev.txt" source "${CF_BASE}/sandboxes/_setup_venv.sh" "$@" diff --git a/setup.sh b/setup.sh old mode 100644 new mode 100755 index 0e2d3e14..c501d85d --- a/setup.sh +++ b/setup.sh @@ -1,62 +1,79 @@ #!/usr/bin/env bash -setup_hbt() { +setup_multilepton() { # Runs the project setup, leading to a collection of environment variables starting with either # - "CF_", for controlling behavior implemented by columnflow, or - # - "HBT_", for features provided by the analysis repository itself. + # - "MULTILEPTON_", for features provided by the analysis repository itself. # Check the setup.sh in columnflow for documentation of the "CF_" variables. The purpose of all - # "HBT_" variables is documented below. + # "MULTILEPTON_" variables is documented below. # # The setup also handles the installation of the software stack via virtual environments, and # optionally an interactive setup where the user can configure certain variables. # - # # Arguments: - # 1. The name of the setup. "default" (which is itself the default when no name is set) - # triggers a setup with good defaults, avoiding all queries to the user and the writing of - # a custom setup file. See "interactive_setup()" for more info. - # - # - # Optinally preconfigured environment variables: - # None yet. - # + # 1. A "name" of setup. + # 2. "minimal" or "full" setup, affect which venv from the sandbox will be sourced # # Variables defined by the setup and potentially required throughout the analysis: - # HBT_BASE + # MULTILEPTON_BASE # The absolute analysis base directory. Used to infer file locations relative to it. - # HBT_SETUP + # MULTILEPTON_SETUP # A flag that is set to 1 after the setup was successful. - + + + if [ $# -lt 1 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then + echo "" + echo "Usage: source setup.sh [sandbox_type]" + echo "" + echo "Arguments:" + echo " Name of the setup (random name of your choice)" + echo " [sandbox_type] Optional: choose between 'minimal' (default) or 'full'" + echo "" + cf_color green "Examples:" + cf_color green " source setup.sh dev # uses minimal environment" + cf_color green " source setup.sh dev full # uses extended environment" + echo "" + cf_color cyan "'minimal'→ uses MINIMAL environment from (sandboxes/venv_multilepton.sh)" + cf_color cyan "'full' → uses FULL environment from (sandboxes/venv_multilepton_dev.sh)" + echo "" + return 1 + fi + + # # load cf setup helpers # - local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" local cf_base="${this_dir}/modules/columnflow" CF_SKIP_SETUP="true" source "${cf_base}/setup.sh" "" || return "$?" - - # - # prevent repeated setups - # - - cf_export_bool HBT_SETUP - if ${HBT_SETUP} && ! ${CF_ON_SLURM}; then - >&2 echo "the HH -> bbtautau analysis was already succesfully setup" - >&2 echo "re-running the setup requires a new shell" - return "1" - fi - + # # prepare local variables # + #forcing this + if [ $# -lt 1 ]; then + echo "Require exactly one argument! usage : source setup.sh " + return 1 + fi local orig="${PWD}" - local setup_name="${1:-default}" + local setup_name="$1" + local which_sandbox="${2:-minimal}" # default to "minimal" if nothing passed local setup_is_default="false" [ "${setup_name}" = "default" ] && setup_is_default="true" + # + # prevent repeated setups + # + cf_export_bool MULTILEPTON_SETUP + if ${MULTILEPTON_SETUP} && ! ${CF_ON_SLURM}; then + >&2 echo "The HH → Multilepton analysis was already succesfully setup" + >&2 echo "re-running the setup requires a new shell" + return "1" + fi + # zsh options if ${shell_is_zsh}; then emulate -L bash @@ -65,30 +82,52 @@ setup_hbt() { # # global variables - # (HBT = hh2bbtautau, CF = columnflow) + # (MULTILEPTON = hhmultilepton, CF = columnflow) # - + # start exporting variables - export HBT_BASE="${this_dir}" + export MULTILEPTON_BASE="${this_dir}" export CF_BASE="${cf_base}" - export CF_REPO_BASE="${HBT_BASE}" - export CF_REPO_BASE_ALIAS="HBT_BASE" + export CF_REPO_BASE="${MULTILEPTON_BASE}" + export CF_REPO_BASE_ALIAS="MULTILEPTON_BASE" export CF_SETUP_NAME="${setup_name}" export CF_SCHEDULER_HOST="${CF_SCHEDULER_HOST:-naf-cms14.desy.de}" export CF_SCHEDULER_PORT="${CF_SCHEDULER_PORT:-8088}" + # Choose between minimal and extended sandboxes + if [[ "${which_sandbox}" == "minimal" || "${1}" == *"minimal"* ]]; then + export CF_INTERACTIVE_VENV_FILE="${CF_INTERACTIVE_VENV_FILE:-${MULTILEPTON_BASE}/sandboxes/venv_multilepton.sh}" + cf_color green "→ Using MINIMAL venv from (sandboxes/venv_multilepton.sh)" + else + export CF_INTERACTIVE_VENV_FILE="${CF_INTERACTIVE_VENV_FILE:-${MULTILEPTON_BASE}/sandboxes/venv_multilepton_dev.sh}" + cf_color green "→ Using EXTENDED venv from (sandboxes/venv_multilepton_dev.sh)" + fi + [ ! -z "${CF_INTERACTIVE_VENV_FILE}" ] && export CF_INSPECT_SANDBOX="$( basename "${CF_INTERACTIVE_VENV_FILE%.*}" )" + # default job flavor settings (starting with naf / maxwell cluster defaults) + # used by law.cfg and, in turn, modules/columnflow/tasks/framework/remote.py + local cf_htcondor_flavor_default="cern_el9" + local cf_slurm_flavor_default="manivald" + local cf_slurm_partition_default="main" + local hname="$( hostname 2> /dev/null )" + if [ "$?" = "0" ]; then + # lxplus + if [[ "${hname}" == lx*.cern.ch ]]; then + cf_htcondor_flavor_default="cern" + fi + fi + export CF_HTCONDOR_FLAVOR="${CF_HTCONDOR_FLAVOR:-${cf_htcondor_flavor_default}}" + export CF_SLURM_FLAVOR="${CF_SLURM_FLAVOR:-${cf_slurm_flavor_default}}" + export CF_SLURM_PARTITION="${CF_SLURM_PARTITION:-${cf_slurm_partition_default}}" # interactive setup if ! ${CF_REMOTE_ENV}; then cf_setup_interactive_body() { # the flavor will be cms export CF_FLAVOR="cms" - # query common variables cf_setup_interactive_common_variables - # specific variables would go here } - cf_setup_interactive "${CF_SETUP_NAME}" "${HBT_BASE}/.setups/${CF_SETUP_NAME}.sh" || return "$?" + cf_setup_interactive "${CF_SETUP_NAME}" "${MULTILEPTON_BASE}/.setups/${CF_SETUP_NAME}.sh" || return "$?" fi # continue the fixed setup @@ -99,53 +138,36 @@ setup_hbt() { # # common variables # - cf_setup_common_variables || return "$?" # # minimal local software setup # - cf_setup_software_stack "${CF_SETUP_NAME}" || return "$?" # ammend paths that are not covered by the central cf setup - export PATH="${HBT_BASE}/bin:${PATH}" - export PYTHONPATH="${HBT_BASE}:${HBT_BASE}/modules/cmsdb:${PYTHONPATH}" + export PATH="${MULTILEPTON_BASE}/bin:${PATH}" + export PYTHONPATH="${MULTILEPTON_BASE}:${MULTILEPTON_BASE}/modules/cmsdb:${PYTHONPATH}" # initialze submodules - if ! ${CF_REMOTE_ENV} && [ -e "${HBT_BASE}/.git" ]; then + if ! ${CF_REMOTE_ENV} && [ -e "${MULTILEPTON_BASE}/.git" ]; then local m - for m in $( ls -1q "${HBT_BASE}/modules" ); do - cf_init_submodule "${HBT_BASE}" "modules/${m}" + for m in $( ls -1q "${MULTILEPTON_BASE}/modules" ); do + cf_init_submodule "${MULTILEPTON_BASE}" "modules/${m}" done fi # - # git hooks + # additional common cf setup steps # - - if ${CF_LOCAL_ENV}; then - cf_setup_git_hooks || return "$?" - fi - - # - # law setup - # - - export LAW_HOME="${LAW_HOME:-${HBT_BASE}/.law}" - export LAW_CONFIG_FILE="${LAW_CONFIG_FILE:-${HBT_BASE}/law.cfg}" - - # run the indexing when not remote - if ! ${CF_REMOTE_ENV} && which law &> /dev/null; then - # source law's bash completion scipt - source "$( law completion )" "" - - # add completion to the claw command - complete -o bashdefault -o default -F _law_complete claw - - # silently index - law index -q - fi + if ! (micromamba env export | grep -q correctionlib); then + echo correctionlib misisng, installing... + micromamba install \ + correctionlib==2.7.0 \ + || return "$?" + micromamba clean --yes --all + fi + cf_setup_post_install || return "$?" # update the law config file to switch from mirrored to bare wlcg targets # as local mounts are typically not available remotely @@ -153,25 +175,37 @@ setup_hbt() { sed -i -r 's/(.+\: ?)wlcg_mirrored, local_.+, ?(wlcg_[^\s]+)/\1wlcg, \2/g' "${LAW_CONFIG_FILE}" fi + # # finalize - export HBT_SETUP="true" + # + export MULTILEPTON_SETUP="true" + PS1="\[\033[1;35m\][multilepton_venv]\[\033[0m\] \u@\h:\W\$ " +} + +multilepton_show_banner() { + cat << EOF + $(cf_color blue_bright ' ╦ ╦ ╦ ╦')$(cf_color red_bright ' ')$(cf_color blue_bright '') + $(cf_color blue_bright ' ╠═╣ ╠═╣')$(cf_color red_bright ' (H→WW/ZZ/𝜏𝜏)')$(cf_color blue_bright ' → Multi-Leptons') + $(cf_color blue_bright ' ╩ ╩ ╩ ╩')$(cf_color red_bright ' ')$(cf_color blue_bright '') +EOF } main() { # Invokes the main action of this script, catches possible error codes and prints a message. - # run the actual setup - if setup_hbt "$@"; then - cf_color green "HH -> bbtautau analysis successfully setup" + if setup_multilepton "$@"; then + multilepton_show_banner + cf_color green "HH -> Multilepton analysis successfully setup" return "0" else local code="$?" - cf_color red "setup failed with code ${code}" + cf_color red "HH -> Multilepton analysis setup failed with code ${code}" return "${code}" fi + } # entry point -if [ "${HBT_SKIP_SETUP}" != "true" ]; then +if [ "${MULTILEPTON_SKIP_SETUP}" != "true" ]; then main "$@" fi diff --git a/tests/__init__.py b/tests/__init__.py index e929cff7..388b2605 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -13,7 +13,7 @@ import sys base = os.path.normpath(os.path.join(os.path.abspath(__file__), "../..")) sys.path.append(base) -import hbt # noqa +import multilepton # noqa # import all tests # ... diff --git a/tests/run_linting b/tests/run_linting index e8c45a72..6552a438 100755 --- a/tests/run_linting +++ b/tests/run_linting @@ -6,11 +6,11 @@ action() { local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )" local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )" local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )" - local hbt_dir="$( dirname "${this_dir}" )" + local multilepton_dir="$( dirname "${this_dir}" )" ( - cd "${hbt_dir}" && \ - flake8 hbt tests + cd "${multilepton_dir}" && \ + flake8 multilepton tests ) } action "$@"