diff --git a/lawluigi_configs/KingMaker_luigi.cfg b/lawluigi_configs/KingMaker_luigi.cfg index 709ac56..e525db1 100644 --- a/lawluigi_configs/KingMaker_luigi.cfg +++ b/lawluigi_configs/KingMaker_luigi.cfg @@ -51,6 +51,13 @@ only_missing = True ; bootstrap file to be sourced at beginning of htcondor jobs (relative PATH to framework.py) bootstrap_file = setup_law_remote.sh files_per_task = 10 + +; If some datasets are known to result in files with a large number of events, a lower number +; of files per task can be set for these datasets to avoid long runtimes. This is a mapping +; based on dataset name to number of files per task following "sample_type" key of +; sample_database collection, e.g., {"ttbar": 3, "wjets": 20}. +custom_files_per_task = {} + ; scopes and shifts are to be provided in the config, or as command line arguments via --scope and --shift ; in both cases, the values are expected to be comma-separated lists without spaces or quotes scopes = mt,et @@ -112,4 +119,4 @@ friend_mapping = {} silent = True ; set to False to print out the datasets -[QuantitiesMap] \ No newline at end of file +[QuantitiesMap] diff --git a/processor/framework.py b/processor/framework.py index 842e437..8763569 100644 --- a/processor/framework.py +++ b/processor/framework.py @@ -73,8 +73,8 @@ class Task(law.Task): description="Tag to differentiate workflow runs. Set to a timestamp as default.", ) nanoAOD_version = luigi.Parameter( - default=NanoAODVersions.v12.value, - description="Version of the NanoAOD files that are used in the analysis. 'NanoAOD_v12' is the default.", + default=NanoAODVersions.v15.value, + description="Version of the NanoAOD files that are used in the analysis. 'NanoAOD_v15' is the default.", ) # Ensure that branch parameter is processed normally diff --git a/processor/tasks/CROWNBase.py b/processor/tasks/CROWNBase.py index 695cce5..0696f88 100644 --- a/processor/tasks/CROWNBase.py +++ b/processor/tasks/CROWNBase.py @@ -152,6 +152,11 @@ class CROWNExecuteBase(HTCondorWorkflow, law.LocalWorkflow): analysis = luigi.Parameter() config = luigi.Parameter() files_per_task = luigi.IntParameter() + custom_files_per_task = luigi.DictParameter( + default={}, + significant=False, + description="Map specific sample_types to custom files_per_task", + ) def htcondor_output_directory(self): return law.LocalDirectoryTarget(self.local_path(f"htcondor_files/{self.nick}")) diff --git a/processor/tasks/CROWNRun.py b/processor/tasks/CROWNRun.py index f09fe45..2d7463a 100644 --- a/processor/tasks/CROWNRun.py +++ b/processor/tasks/CROWNRun.py @@ -45,6 +45,9 @@ def create_branch_map(self): if len(inputdata["filelist"]) == 0: raise Exception("No files found for dataset {}".format(self.nick)) files_per_task = self.files_per_task + custom_fpt = self.custom_files_per_task.get(self.sample_type) + if custom_fpt is not None: + files_per_task = int(custom_fpt) if self.sample_type == "data" and any( era in self.nick for era in self.problematic_eras ):