Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion columnflow/calibration/cms/jets.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def get_jer_config_default(self: Calibrator) -> DotDict:
# whether gen jet matching should be performed relative to the nominal jet pt, or the jec varied values
gen_jet_matching_nominal=False,
# regions where stochastic smearing is applied
stochastic_smearing_mask=lambda self, jets: ak.ones_like(jets.pt, dtype=np.bool),
stochastic_smearing_mask=lambda self, jets: ak.ones_like(jets.pt, dtype=np.bool_),
)
def jer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
"""
Expand Down
6 changes: 6 additions & 0 deletions columnflow/calibration/cmsGhent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# coding: utf-8
# flake8: noqa

import columnflow.calibration.cmsGhent.lepton_mva
import columnflow.calibration.cmsGhent.hdamp
import columnflow.calibration.cmsGhent.ttbar_minnlo
121 changes: 121 additions & 0 deletions columnflow/calibration/cmsGhent/hdamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""
Code to add lepton MVA to NanoAOD
"""

import law

from columnflow.calibration import Calibrator, calibrator
from columnflow.util import maybe_import, four_vec
from columnflow.columnar_util import set_ak_column
from columnflow.columnar_util_Ghent import TetraVec
from columnflow.tasks.external import BundleExternalFiles

np = maybe_import("numpy")
ak = maybe_import("awkward")
coffea = maybe_import("coffea")
maybe_import("coffea.nanoevents.methods.nanoaod")


@calibrator(
uses=four_vec("GenPart", "pdgId"),
produces={"hdamp_{up,down}"},
sandbox="bash::$CF_BASE/sandboxes/venv_onnxruntime.sh",
maxM=243.9517,
default_hdamp=1.379,
)
def hdamp_reweighting_producer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
"""
Produces the hdamp reweighting scores.
Based on https://twiki.cern.ch/twiki/pub/CMS/MLReweighting/ImplementationCMSSW.pdf
Requires an external file in the config under ``hdamp``:

.. code-block:: python

cfg.x.external_files = DotDict.wrap({
"hdamp": {
"up": f"YOURDIRECTORY/mymodel12_hdamp_up_13TeV.onnx",
"down": f"YOURDIRECTORY/mymodel12_hdamp_down_13TeV.onnx",
},
})

The onnx files can be found on this twiki:
https://twiki.cern.ch/twiki/bin/view/CMS/MLReweighting

Requires adding the environment venv_onnx which includes onnx to the analysis or config. E.g.

analysis_inst.x.bash_sandboxes = [
"$CF_BASE/sandboxes/cf.sh",
"$CF_BASE/sandboxes/venv_onnxruntime.sh",
]

"""
input = []
sum_top = None
for pdgId in [6, -6]:
# get the initial top quarks
top = events.GenPart[events.GenPart.pdgId == pdgId][:, 0]
top = TetraVec(top)

# sum top quarks
sum_top = top if sum_top is None else (sum_top + top)

#
top_inp = np.array([
np.log10(top.pt),
top.rapidity,
top.phi,
top.mass / self.maxM,
] + [
np.full(len(top), cst)
for cst in [
{6: 0.1, -6: 0.2}[pdgId],
self.default_hdamp,
]
])
input.append(top_inp)

# if pt of sum larger then 1000, no reweighting
mask = sum_top.pt < 1000

# (2, 6, N) > (N, 2, 6)
input = np.rollaxis(np.array(input), -1)[mask]
for variation, model in self.models.items():
label_name = model.get_outputs()[0].name
input_name = model.get_inputs()[0].name
pred = model.run([label_name], {input_name: input.astype(np.float32)})[0]
out = np.ones(len(events))
out[mask] = pred[:, 0] / pred[:, 1]
events = set_ak_column(events, f"hdamp_{variation}", out)

return events


@hdamp_reweighting_producer.requires
def hdamp_reweighting_producer_requires(
self: Calibrator,
task: law.Task,
reqs: dict,
**kwargs,
) -> None:
if "external_files" in reqs:
return
reqs["external_files"] = BundleExternalFiles.req(task)


@hdamp_reweighting_producer.setup
def hdamp_reweighting_producer_setup(
self: Calibrator,
task: law.Task,
reqs: dict,
inputs: dict,
reader_targets: law.util.InsertableDict,
) -> None:
bundle = reqs["external_files"]

# create the xgboost predictor
import onnxruntime

self.models = {}
for variation in ["up", "down"]:
file = bundle.files.hdamp[variation].path
self.models[variation] = onnxruntime.InferenceSession(file)
2 changes: 1 addition & 1 deletion columnflow/calibration/cmsGhent/lepton_mva.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def lepton_mva_producer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Arra


@lepton_mva_producer.requires
def lepton_mva_producer_requires(self: Calibrator, task: law.Task, reqs: dict) -> None:
def lepton_mva_producer_requires(self: Calibrator, task: law.Task, reqs: dict, **kwargs) -> None:
if "external_files" in reqs:
return
reqs["external_files"] = BundleExternalFiles.req(task)
Expand Down
133 changes: 133 additions & 0 deletions columnflow/calibration/cmsGhent/ttbar_minnlo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
Code to add lepton MVA to NanoAOD
"""

import law

from columnflow.calibration import Calibrator, calibrator
from columnflow.util import maybe_import, four_vec
from columnflow.columnar_util import set_ak_column
from columnflow.columnar_util_Ghent import TetraVec
from columnflow.tasks.external import BundleExternalFiles

np = maybe_import("numpy")
ak = maybe_import("awkward")


def norm(X, mean, std, scale):
if scale == "log":
X = np.log(np.clip(X, a_min=1e-6, a_max=None))
# recenter and renormalize
return (X - mean) / np.where(std > 1e-2, std, 1)


@calibrator(
uses=four_vec("GenPart", {"pdgId", "statusFlags"}),
produces={"ttbar_minnlo_weight"},
sandbox="bash::$CF_BASE/sandboxes/venv_onnxruntime.sh",
input_norm={
# keys: 0 = ttbar, (-)6 = (anti)top
# values: mean, std, scale
"pt": {
0: (3.6520673599656903, 1.0123402362573612, "log"),
6: (4.595855742518925, 0.7101176940989488, "log"),
-6: (4.5986175957604045, 0.7103218938891299, "log"),
},
"rapidity": {
0: (0.0001718810581680775, 1.0362455506718102, "linear"),
6: (0.00022746366634849002, 1.213207643109532, "linear"),
-6: (0.00011712322394057398, 1.2076422016031159, "linear"),
},
"phi": {
0: (2.8943571877384285e-05, 1.8139038706413384, "linear"),
6: (-0.00028213870737636996, 1.8136544140703632, "linear"),
-6: (0.0003628069129526392, 1.8139415747773364, "linear"),
},
"mass": {
0: (6.21729978047307, 0.2771419580231537, "log"),
6: (171.93706459943778, 6.9652037622153, "linear"),
-6: (171.93691192651536, 6.9500586980501575, "linear"),
},
},
)
def ttbar_minnlo_reweighting_producer(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
"""
Produces the HVQ to MiNNLO reweighting values.
Based on https://twiki.cern.ch/twiki/pub/CMS/MLReweighting/CMSSW_script_afterShower.py.txt
Requires an external file in the config under ``ttbar_minnlo``:

.. code-block:: python

cfg.x.external_files = DotDict.wrap({
"ttbar_minnlo": f"YOURDIRECTORY/mymodel12_13TeV_MiNNLO_afterShower.onnx", },
})

Requires adding the environment venv_onnx which includes onnx to the analysis or config. E.g.

analysis_inst.x.bash_sandboxes = [
"$CF_BASE/sandboxes/cf.sh",
"$CF_BASE/sandboxes/venv_onnxruntime.sh",
]

"""
input = []
sum_top = None
for pdgId in [6, -6, 0]:
# get the initial top quarks
if not pdgId:
top = sum_top
else:
top = events.GenPart[events.GenPart.pdgId == pdgId]
top = top[top.hasFlags("isLastCopy")][:, 0]
top = TetraVec(top)
# sum top quarks
sum_top = top if sum_top is None else (sum_top + top)

# inputs
top_inp = [
norm(getattr(top, inp), *norms[pdgId])
for inp, norms in self.input_norm.items()
] + [np.full(len(top), pdgId / 10)]
input.append(np.array(top_inp))

# top, antitop, ttbar > ttbar, top, antitop
input = np.roll(input, 1, axis=0)

# (3, 6, N) > (N, 3, 6)
input = np.rollaxis(input, -1)

label_name = self.model.get_outputs()[0].name
input_name = self.model.get_inputs()[0].name
pred = self.model.run([label_name], {input_name: input.astype(np.float32)})[0]
events = set_ak_column(events, "ttbar_minnlo_weight", pred[:, 1] / pred[:, 0])

return events


@ttbar_minnlo_reweighting_producer.requires
def ttbar_minnlo_reweighting_producer_requires(
self: Calibrator,
task: law.Task,
reqs: dict,
**kwargs,
) -> None:
if "external_files" in reqs:
return
reqs["external_files"] = BundleExternalFiles.req(task)


@ttbar_minnlo_reweighting_producer.setup
def ttbar_minnlo_reweighting_producer_setup(
self: Calibrator,
task: law.Task,
reqs: dict,
inputs: dict,
reader_targets: law.util.InsertableDict,
) -> None:
bundle = reqs["external_files"]

# create the xgboost predictor
import onnxruntime

file = bundle.files.ttbar_minnlo.path
self.model = onnxruntime.InferenceSession(file)
2 changes: 1 addition & 1 deletion columnflow/production/cms/pileup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def pu_weight(
"NumTrueInteractions": events.Pileup.nTrueInt,
}

any_outlier_mask = ak.zeros_like(events.Pileup.nTrueInt, dtype=np.bool)
any_outlier_mask = ak.zeros_like(events.Pileup.nTrueInt, dtype=np.bool_)
for column_name, syst in (
("pu_weight", "nominal"),
("pu_weight_minbias_xs_up", "up"),
Expand Down
2 changes: 1 addition & 1 deletion law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ default_config: run2_pp_2018
default_dataset: st_tchannel_t

production_modules: columnflow.production.{categories,processes,normalization,veto}
calibration_modules: columnflow.calibration
calibration_modules: columnflow.calibration,columnflow.calibration.cmsGhent
selection_modules: columnflow.selection.empty
reduction_modules: columnflow.reduction.default
categorization_modules: columnflow.categorization
Expand Down
8 changes: 8 additions & 0 deletions sandboxes/onnxruntime.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# version 11

# use packages from columnar sandbox as baseline
-r columnar.txt

# add numpy and onnxruntime with exact version requirement
onnxruntime==1.15.1
numpy==1.26.4
18 changes: 18 additions & 0 deletions sandboxes/venv_onnxruntime.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

# Script that sets up a virtual env in $CF_VENV_BASE.
# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.

action() {
local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"

# set variables and source the generic venv setup
export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
export CF_VENV_REQUIREMENTS="${this_dir}/onnxruntime.txt"

source "${this_dir}/_setup_venv.sh" "$@"
}
action "$@"
18 changes: 18 additions & 0 deletions sandboxes/venv_onnxruntime_dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

# Script that sets up a virtual env in $CF_VENV_BASE.
# For more info on functionality and parameters, see the generic setup script _setup_venv.sh.

action() {
local shell_is_zsh="$( [ -z "${ZSH_VERSION}" ] && echo "false" || echo "true" )"
local this_file="$( ${shell_is_zsh} && echo "${(%):-%x}" || echo "${BASH_SOURCE[0]}" )"
local this_dir="$( cd "$( dirname "${this_file}" )" && pwd )"

# set variables and source the generic venv setup
export CF_SANDBOX_FILE="${CF_SANDBOX_FILE:-${this_file}}"
export CF_VENV_NAME="$( basename "${this_file%.sh}" )"
export CF_VENV_REQUIREMENTS="${this_dir}/onnxruntime.txt,${this_dir}/dev.txt"

source "${this_dir}/_setup_venv.sh" "$@"
}
action "$@"