GhentAnalysis · JulesVandenbroeck · Nov 13, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 10, 2025
diff --git a/analysis_templates/cms_minimal/law.cfg b/analysis_templates/cms_minimal/law.cfg
@@ -27,7 +27,7 @@ default_analysis: __cf_module_name__.config.analysis___cf_short_name_lc__.analys
 default_config: run2_2017_nano_v9
 default_dataset: st_tchannel_t_4f_powheg
 
-calibration_modules: columnflow.calibration.cms.{jets,met,tau}, __cf_module_name__.calibration.example
+calibration_modules: columnflow.calibration.cms.{jets,met,tau,egamma,muon}, __cf_module_name__.calibration.example
 selection_modules: columnflow.selection.empty, columnflow.selection.cms.{json_filter,met_filters}, __cf_module_name__.selection.example
 reduction_modules: columnflow.reduction.default, __cf_module_name__.reduction.example
 production_modules: columnflow.production.{categories,matching,normalization,processes}, columnflow.production.cms.{btag,electron,jet,matching,mc_weight,muon,pdf,pileup,scale,parton_shower,seeds,gen_particles}, __cf_module_name__.production.example
@@ -65,6 +65,7 @@ htcondor_flavor: $CF_HTCONDOR_FLAVOR
 htcondor_share_software: False
 htcondor_memory: -1
 htcondor_disk: -1
+htcondor_runtime: 3h
 slurm_flavor: $CF_SLURM_FLAVOR
 slurm_partition: $CF_SLURM_PARTITION
 

diff --git a/bin/cf_inspect.py b/bin/cf_inspect.py
@@ -59,10 +59,13 @@ def _load_nano_root(fname: str, treepath: str | None = None, **kwargs) -> ak.Arr
     except:
         return uproot.open(fname)
 
-
-def _load_h5(fname: str, **kwargs):
-    import h5py
-    return h5py.File(fname, "r")
+    return coffea.nanoevents.NanoEventsFactory.from_root(
+        source,
+        treepath=treepath,
+        mode="eager",
+        runtime_cache=None,
+        persistent_cache=None,
+    ).events()
 
 
 def load(fname: str, **kwargs) -> Any:
@@ -78,8 +81,6 @@ def load(fname: str, **kwargs) -> Any:
         return _load_nano_root(fname, **kwargs)
     if ext == ".json":
         return _load_json(fname, **kwargs)
-    if ext in [".h5", ".hdf5"]:
-        return _load_h5(fname, **kwargs)
     raise NotImplementedError(f"no loader implemented for extension '{ext}'")
 
 

diff --git a/columnflow/calibration/__init__.py b/columnflow/calibration/__init__.py
@@ -8,44 +8,82 @@
 
 import inspect
 
-from columnflow.types import Callable
+import law
+
 from columnflow.util import DerivableMeta
 from columnflow.columnar_util import TaskArrayFunction
+from columnflow.types import Callable, Sequence, Any
+
+
+class TaskArrayFunctionWithCalibratorRequirements(TaskArrayFunction):
+
+    require_calibrators: Sequence[str] | set[str] | None = None
+
+    def _req_calibrator(self, task: law.Task, calibrator: str) -> Any:
+        # hook to customize how required calibrators are requested
+        from columnflow.tasks.calibration import CalibrateEvents
+        return CalibrateEvents.req_other_calibrator(task, calibrator=calibrator)
 
+    def requires_func(self, task: law.Task, reqs: dict, **kwargs) -> None:
+        # no requirements for workflows in pilot mode
+        if callable(getattr(task, "is_workflow", None)) and task.is_workflow() and getattr(task, "pilot", False):
+            return
 
-class Calibrator(TaskArrayFunction):
+        # add required calibrators when set
+        if (calibs := self.require_calibrators):
+            reqs["required_calibrators"] = {calib: self._req_calibrator(task, calib) for calib in calibs}
+
+    def setup_func(
+        self,
+        task: law.Task,
+        reqs: dict,
+        inputs: dict,
+        reader_targets: law.util.InsertableDict,
+        **kwargs,
+    ) -> None:
+        if "required_calibrators" in inputs:
+            for calib, inp in inputs["required_calibrators"].items():
+                reader_targets[f"required_calibrator_{calib}"] = inp["columns"]
+
+
+class Calibrator(TaskArrayFunctionWithCalibratorRequirements):
     """
     Base class for all calibrators.
     """
 
     exposed = True
 
+    # register attributes for arguments accepted by decorator
+    mc_only: bool = False
+    data_only: bool = False
+
     @classmethod
     def calibrator(
         cls,
         func: Callable | None = None,
         bases: tuple = (),
         mc_only: bool = False,
         data_only: bool = False,
+        require_calibrators: Sequence[str] | set[str] | None = None,
         **kwargs,
     ) -> DerivableMeta | Callable:
         """
-        Decorator for creating a new :py:class:`~.Calibrator` subclass with additional, optional
-        *bases* and attaching the decorated function to it as ``call_func``.
+        Decorator for creating a new :py:class:`~.Calibrator` subclass with additional, optional *bases* and attaching
+        the decorated function to it as ``call_func``.
 
-        When *mc_only* (*data_only*) is *True*, the calibrator is skipped and not considered by
-        other calibrators, selectors and producers in case they are evalauted on a
-        :py:class:`order.Dataset` (using the :py:attr:`dataset_inst` attribute) whose ``is_mc``
-        (``is_data``) attribute is *False*.
+        When *mc_only* (*data_only*) is *True*, the calibrator is skipped and not considered by other calibrators,
+        selectors and producers in case they are evalauted on a :py:class:`order.Dataset` (using the
+        :py:attr:`dataset_inst` attribute) whose ``is_mc`` (``is_data``) attribute is *False*.
 
         All additional *kwargs* are added as class members of the new subclasses.
 
         :param func: Function to be wrapped and integrated into new :py:class:`Calibrator` class.
         :param bases: Additional bases for the new :py:class:`Calibrator`.
-        :param mc_only: Boolean flag indicating that this :py:class:`Calibrator` should only run on
-            Monte Carlo simulation and skipped for real data.
-        :param data_only: Boolean flag indicating that this :py:class:`Calibrator` should only run
-            on real data and skipped for Monte Carlo simulation.
+        :param mc_only: Boolean flag indicating that this :py:class:`Calibrator` should only run on Monte Carlo
+            simulation and skipped for real data.
+        :param data_only: Boolean flag indicating that this :py:class:`Calibrator` should only run on real data and
+            skipped for Monte Carlo simulation.
+        :param require_calibrators: Sequence of names of other calibrators to add to the requirements.
         :return: New :py:class:`Calibrator` subclass.
         """
         def decorator(func: Callable) -> DerivableMeta:
@@ -55,6 +93,7 @@ def decorator(func: Callable) -> DerivableMeta:
                 "call_func": func,
                 "mc_only": mc_only,
                 "data_only": data_only,
+                "require_calibrators": require_calibrators,
             }
 
             # get the module name

diff --git a/columnflow/calibration/cms/egamma.py b/columnflow/calibration/cms/egamma.py
@@ -23,7 +23,7 @@
 from columnflow.calibration import Calibrator, calibrator
 from columnflow.calibration.util import ak_random
 from columnflow.util import maybe_import, load_correction_set, DotDict
-from columnflow.columnar_util import set_ak_column, full_like
+from columnflow.columnar_util import TAFConfig, set_ak_column, full_like
 from columnflow.types import Any
 
 ak = maybe_import("awkward")
@@ -37,7 +37,7 @@
 
 
 @dataclasses.dataclass
-class EGammaCorrectionConfig:
+class EGammaCorrectionConfig(TAFConfig):
     """
     Container class to describe energy scaling and smearing configurations. Example:
 
@@ -54,7 +54,7 @@ class EGammaCorrectionConfig:
     smear_syst_correction_set: str
     scale_compound: bool = False
     smear_syst_compound: bool = False
-    systs: list[str] = dataclasses.field(default_factory=list)
+    systs: list[str] = dataclasses.field(default_factory=lambda: ["scale_down", "scale_up", "smear_down", "smear_up"])
     corrector_kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)
 
 
@@ -72,9 +72,10 @@ def _egamma_scale_smear(self: Calibrator, events: ak.Array, **kwargs) -> ak.Arra
     # gather inputs
     coll = events[self.collection_name]
     variable_map = {
-        "run": events.run,
+        "run": events.run if ak.sum(ak.num(coll, axis=1), axis=0) else [],
         "pt": coll.pt,
         "ScEta": coll.superclusterEta,
+        "AbsScEta": abs(coll.superclusterEta),
         "r9": coll.r9,
         "seedGain": coll.seedGain,
         **self.cfg.corrector_kwargs,
@@ -109,22 +110,21 @@ def get_inputs(corrector, **additional_variables):
             events = set_ak_column(events, f"{self.collection_name}.pt_smear_uncorrected", coll.pt)
             events = set_ak_column(events, f"{self.collection_name}.energyErr_smear_uncorrected", coll.energyErr)
 
-        # helper to compute random variables in the shape of the collection
-        def get_rnd(syst):
-            args = (full_like(coll.pt, 0.0), full_like(coll.pt, 1.0))
-            if self.use_deterministic_seeds:
-                args += (coll.deterministic_seed,)
-                rand_func = self.deterministic_normal[syst]
-            else:
-                # TODO: bit generator could be configurable
-                rand_func = np.random.Generator(np.random.SFC64((events.event + sum(map(ord, syst))).to_list())).normal
-            return ak_random(*args, rand_func=rand_func)
+        # compute random variables in the shape of the collection once
+        rnd_args = (full_like(coll.pt, 0.0), full_like(coll.pt, 1.0))
+        if self.use_deterministic_seeds:
+            rnd_args += (coll.deterministic_seed,)
+            rand_func = self.deterministic_normal
+        else:
+            # TODO: bit generator could be configurable
+            rand_func = np.random.Generator(np.random.SFC64((events.event).to_list())).normal
+        rnd = ak_random(*rnd_args, rand_func=rand_func)
 
         # helper to compute smeared pt and energy error values given a syst
         def apply_smearing(syst):
             # get smeared pt
             smear = self.smear_syst_corrector.evaluate(syst, *get_inputs(self.smear_syst_corrector))
-            smear_factor = 1.0 + smear * get_rnd(syst)
+            smear_factor = 1.0 + smear * rnd
             pt_smeared = coll.pt * smear_factor
             # get smeared energy error
             energy_err_smeared = (((coll.energyErr)**2 + (coll.energy * smear)**2) * smear_factor)**0.5
@@ -219,11 +219,8 @@ def _deterministic_normal(loc, scale, seed, idx_offset=0):
                 for _loc, _scale, _seed in zip(loc, scale, seed)
             ])
 
-        self.deterministic_normal = {
-            "smear": functools.partial(_deterministic_normal, idx_offset=0),
-            "smear_up": functools.partial(_deterministic_normal, idx_offset=1),
-            "smear_down": functools.partial(_deterministic_normal, idx_offset=2),
-        }
+        # each systematic is to be evaluated with the same random number so use a fixed offset
+        self.deterministic_normal = functools.partial(_deterministic_normal, idx_offset=0)
 
 
 electron_scale_smear = _egamma_scale_smear.derive(