From a32e3eb6df5baf9ce5f3341570ff129683047ef9 Mon Sep 17 00:00:00 2001 From: Aashay Date: Thu, 16 Oct 2025 06:27:00 +0200 Subject: [PATCH 1/6] update skimmer for data --- .gitignore | 3 +- skimmer/datasets-data-2024.txt | 61 +++++++++++++++++ skimmer/datasets-signal.txt | 24 +++---- skimmer/executable.py | 115 +++++++++------------------------ skimmer/jetId.h | 46 +++++++++++++ skimmer/runSkimmer.sh | 13 ++-- 6 files changed, 158 insertions(+), 104 deletions(-) create mode 100644 skimmer/datasets-data-2024.txt create mode 100644 skimmer/jetId.h diff --git a/.gitignore b/.gitignore index 39ed8fe..0aebb28 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -validation/ \ No newline at end of file +validation/ +skimmer/logs/ \ No newline at end of file diff --git a/skimmer/datasets-data-2024.txt b/skimmer/datasets-data-2024.txt new file mode 100644 index 0000000..1372520 --- /dev/null +++ b/skimmer/datasets-data-2024.txt @@ -0,0 +1,61 @@ +/EGamma0/Run2024C-MINIv6NANOv15-v1/NANOAOD +/EGamma1/Run2024C-MINIv6NANOv15-v1/NANOAOD +/Muon0/Run2024C-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024C-MINIv6NANOv15-v1/NANOAOD +/JetMET0/Run2024C-MINIv6NANOv15-v1/NANOAOD +/JetMET1/Run2024C-MINIv6NANOv15-v1/NANOAOD + +/EGamma0/Run2024D-MINIv6NANOv15-v1/NANOAOD +/EGamma1/Run2024D-MINIv6NANOv15-v1/NANOAOD +/Muon0/Run2024D-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024D-MINIv6NANOv15-v1/NANOAOD +/JetMET0/Run2024D-MINIv6NANOv15-v1/NANOAOD +/JetMET1/Run2024D-MINIv6NANOv15-v1/NANOAOD + + +/EGamma0/Run2024E-MINIv6NANOv15-v1/NANOAOD +/EGamma1/Run2024E-MINIv6NANOv15-v1/NANOAOD +/JetMET0/Run2024E-MINIv6NANOv15-v1/NANOAOD +/JetMET1/Run2024E-MINIv6NANOv15-v1/NANOAOD +/Muon0/Run2024E-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024E-MINIv6NANOv15-v1/NANOAOD + + +/EGamma0/Run2024F-MINIv6NANOv15-v1/NANOAOD +/EGamma1/Run2024F-MINIv6NANOv15-v1/NANOAOD +/JetMET0/Run2024F-MINIv6NANOv15-v2/NANOAOD +/JetMET1/Run2024F-MINIv6NANOv15-v2/NANOAOD +/Muon0/Run2024F-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024F-MINIv6NANOv15-v1/NANOAOD + + +/EGamma0/Run2024G-MINIv6NANOv15-v2/NANOAOD +/EGamma1/Run2024G-MINIv6NANOv15-v2/NANOAOD +/JetMET0/Run2024G-MINIv6NANOv15-v2/NANOAOD +/JetMET1/Run2024G-MINIv6NANOv15-v2/NANOAOD +/Muon0/Run2024G-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024G-MINIv6NANOv15-v2/NANOAOD + + +/EGamma0/Run2024H-MINIv6NANOv15-v2/NANOAOD +/EGamma1/Run2024H-MINIv6NANOv15-v1/NANOAOD +/JetMET0/Run2024H-MINIv6NANOv15-v2/NANOAOD +/JetMET1/Run2024H-MINIv6NANOv15-v2/NANOAOD +/Muon0/Run2024H-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024H-MINIv6NANOv15-v2/NANOAOD + + +/EGamma0/Run2024I-MINIv6NANOv15-v1/NANOAOD +/EGamma0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD +/EGamma1/Run2024I-MINIv6NANOv15-v1/NANOAOD +/EGamma1/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD +/JetMET0/Run2024I-MINIv6NANOv15-v2/NANOAOD +/JetMET0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD +/JetMET1/Run2024I-MINIv6NANOv15-v1/NANOAOD +/JetMET1/Run2024I-MINIv6NANOv15_v2-v2/NANOAOD +/Muon0/Run2024I-MINIv6NANOv15-v1/NANOAOD +/Muon0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD +/Muon1/Run2024I-MINIv6NANOv15-v1/NANOAOD +/Muon1/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD + + diff --git a/skimmer/datasets-signal.txt b/skimmer/datasets-signal.txt index b37141f..1bd2a4c 100644 --- a/skimmer/datasets-signal.txt +++ b/skimmer/datasets-signal.txt @@ -1,14 +1,14 @@ -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V1p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V1p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V1p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V1p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V1p5_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V1p5_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V1p5_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V1p5_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V2p0_13p6TeV_5f_LO_TuneCP5 -/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V2p0_13p6TeV_5f_LO_TuneCP5 \ No newline at end of file +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V2p0_13p6TeV_5f_LO_TuneCP5 +/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V2p0_13p6TeV_5f_LO_TuneCP5 \ No newline at end of file diff --git a/skimmer/executable.py b/skimmer/executable.py index 17001b9..2bfcd48 100755 --- a/skimmer/executable.py +++ b/skimmer/executable.py @@ -12,13 +12,14 @@ import ROOT as r -r.gInterpreter.Declare('#include "truthSelections.h"') - subprocess.run("python3 -m pip install --user --no-binary=correctionlib correctionlib", shell=True, check=True) import importlib correctionlib = importlib.import_module("correctionlib") correctionlib.register_pyroot_binding() +r.gInterpreter.Declare('#include "truthSelections.h"') +r.gInterpreter.Declare('#include "jetId.h"') + # Constants CONDOR_OUTPUT_DIR = "output" XROOTD_REDIRECTOR = "root://xrootd-cms.infn.it/" @@ -26,16 +27,15 @@ MAX_RETRIES = 10 SLEEP_DURATION = 60 # 1 minute in seconds -JET_ID_JSONS = {"2024": "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/JME/2024_Summer24/jetid.json.gz"} class Skimmer(): - def __init__(self, inFiles, outDir, keepDropFile): + def __init__(self, inFiles, outDir, keepDropFile, is_signal): self.inFiles = inFiles self.outDir = outDir self.keepDropFile = keepDropFile + self.is_signal = is_signal self.df = r.RDataFrame("Events", self.inFiles) - r.RDF.Experimental.AddProgressBar(self.df) columns = self.df.GetColumnNames() for col in columns: if col.startswith("Muon_") or col.startswith("Electron_") or col.startswith("Jet_") or col.startswith("FatJet_"): @@ -79,7 +79,7 @@ def genSelection(df): return df - def analyze(self, is_signal): + def analyze(self): self.df = self.df.Define("__tight_mu_mask", "Muon_pt > 35. && abs(Muon_eta) < 2.4 && Muon_tightId") \ .Define("__tight_ele_mask", "Electron_pt > 35. && abs(Electron_eta) < 2.5 && Electron_cutBased >= 4") \ .Define("__n_tight_leptons", "Sum(__tight_mu_mask) + Sum(__tight_ele_mask)") \ @@ -87,48 +87,13 @@ def analyze(self, is_signal): .Define("__n_fatjets", "Sum(__fatjet_mask)") \ .Filter("(__n_fatjets + __n_tight_leptons) >= 1") - if self.sample_year in JET_ID_JSONS: - jet_id_json = JET_ID_JSONS[self.sample_year] - self.df = self.df.Define("Jet_multiplicity", "Jet_chMultiplicity + Jet_neMultiplicity") \ .Define("FatJet_multiplicity", "FatJet_chMultiplicity + FatJet_neMultiplicity") - r.gInterpreter.Declare(""" - #include - using namespace ROOT::VecOps; - - RVec evalJetID(const RVec& eta, const RVec& chHEF, const RVec& neHEF, - const RVec& chEmEF, const RVec& neEmEF, - const RVec& muEF, const RVec& chMultiplicity, - const RVec& neMultiplicity, const RVec& multiplicity) { - auto cset_jetId = correction::CorrectionSet::from_file(\"""" + jet_id_json + """\"); - RVec jetId(eta.size(), 0.0); - for (size_t i = 0; i < eta.size(); ++i) { - jetId[i] += 2 * cset_jetId->at(\"AK4PUPPI_Tight\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); - jetId[i] += 4 * cset_jetId->at(\"AK4PUPPI_TightLeptonVeto\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); - } - return jetId; - } - - RVec evalFatJetID(const RVec& eta, const RVec& chHEF, const RVec& neHEF, - const RVec& chEmEF, const RVec& neEmEF, - const RVec& muEF, const RVec& chMultiplicity, - const RVec& neMultiplicity, const RVec& multiplicity) { - auto cset_fatJetId = correction::CorrectionSet::from_file(\"""" + jet_id_json + """\"); - RVec fatJetId(eta.size(), 0.0); - for (size_t i = 0; i < eta.size(); ++i) { - fatJetId[i] += 2 * cset_fatJetId->at(\"AK8PUPPI_Tight\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); - fatJetId[i] += 4 * cset_fatJetId->at(\"AK8PUPPI_TightLeptonVeto\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); - } - return fatJetId; - } - - """) - - self.df = self.df.Define("Jet_jetId", "evalJetID(Jet_eta, Jet_chHEF, Jet_neHEF, Jet_chEmEF, Jet_neEmEF, Jet_muEF, Jet_chMultiplicity, Jet_neMultiplicity, Jet_multiplicity)") \ - .Define("FatJet_jetId", "evalFatJetID(FatJet_eta, FatJet_chHEF, FatJet_neHEF, FatJet_chEmEF, FatJet_neEmEF, FatJet_muEF, FatJet_chMultiplicity, FatJet_neMultiplicity, FatJet_multiplicity)") - - if is_signal: + self.df = self.df.Define("Jet_jetId", f"evalJetID{self.sample_year}(Jet_eta, Jet_chHEF, Jet_neHEF, Jet_chEmEF, Jet_neEmEF, Jet_muEF, Jet_chMultiplicity, Jet_neMultiplicity, Jet_multiplicity)") \ + .Define("FatJet_jetId", f"evalFatJetID{self.sample_year}(FatJet_eta, FatJet_chHEF, FatJet_neHEF, FatJet_chEmEF, FatJet_neEmEF, FatJet_muEF, FatJet_chMultiplicity, FatJet_neMultiplicity, FatJet_multiplicity)") + + if self.is_signal: self.df = self.genSelection(self.df) # Run3 event filters @@ -144,7 +109,7 @@ def analyze(self, is_signal): return self.df.Count().GetValue() def Snapshot(self, tag): - all_cols = [str(col) for col in self.df.GetColumnNames()] + all_cols = [str(col) for col in self.df.GetColumnNames() if not col.startswith("__")] keep_cols = {col: 0 for col in all_cols} comment = re.compile(r"#.*") ops = [] @@ -182,11 +147,12 @@ def Snapshot(self, tag): @property def sample_year(self): - match = re.search(r'Run3Summer24|RunIII2024Summer24NanoAODv15', self.inFiles[0]) - if match: - return "2024" + match = re.search(r'Run3Summer24|RunIII2024Summer24NanoAODv15|Run2024', self.inFiles[0]) + if not match: + raise ValueError("Could not determine sample year from filename") else: - return None + return "2024" + def run_skimmer(input_file, output_dir, is_signal): print(f"Running skimmer on {input_file}") @@ -195,43 +161,22 @@ def run_skimmer(input_file, output_dir, is_signal): inFiles = [XROOTD_REDIRECTOR + input_file if input_file.startswith('/store') else 'file://' + input_file] keepDropFile = "keep_and_drop_skim.txt" - skimmer = Skimmer(inFiles, output_dir, keepDropFile) - passed = skimmer.analyze(is_signal) + skimmer = Skimmer(inFiles, output_dir, keepDropFile, is_signal) + passed = skimmer.analyze() if passed: - skimmer.Snapshot("skim") + skimmer.Snapshot("output") return True else: print("No entries in output") return False - -def merge_skims(output_dir): - skim_files = glob.glob(f"{output_dir}/*") - - if len(skim_files) == 0: - print("No output files to merge; exiting...") - return True - elif len(skim_files) == 1: - shutil.move(skim_files[0], f"{output_dir}/output.root") - return True - else: - merge_cmd = ["hadd", f"{output_dir}/output.root"] + skim_files - print(" ".join(merge_cmd)) - result = subprocess.run(merge_cmd) - return result.returncode == 0 - - def determine_output_paths(input_file, is_signal, output_tag): if not is_signal: - era = input_file.split('/')[3] - sample_name = input_file.split('/')[4] - campaign = input_file.split('/')[6] + sub_output_dir = "/".join(input_file.split('/')[3:5] + input_file.split('/')[8:]) else: - era = input_file.split('/')[6] - sample_name = input_file.split('/')[7] - campaign = "private" + sub_output_dir = "/".join(input_file.split('/')[7:]) - output_dir = f"{OUTPUT_XRD}/skims_{output_tag}/{campaign}/{sample_name}" + output_dir = f"{OUTPUT_XRD}/skims_{output_tag}/{sub_output_dir}" return output_dir def check_output_liveness(file): @@ -272,7 +217,6 @@ def copy_output_file(source, destination): parser = ArgumentParser(description='Run the NanoAOD skimmer with file transfer.') parser.add_argument('proxy', help="Path to the X509 proxy") parser.add_argument('input_file', help="Input file path") - parser.add_argument('job_id', help="Job ID") parser.add_argument('is_signal', help='Flag indicating if this is a signal sample', type=int) parser.add_argument('output_tag', help='Output tag, including version of skims eg. v2', type=str) args = parser.parse_args() @@ -284,13 +228,13 @@ def copy_output_file(source, destination): if not success: print("Skimmer failed; retrying one more time...") success = run_skimmer(args.input_file, CONDOR_OUTPUT_DIR, args.is_signal) - - merge_skims(CONDOR_OUTPUT_DIR) - - output_dir = determine_output_paths(args.input_file, args.is_signal, args.output_tag) + + if not success: + raise ValueError("Skimmer failed twice; exiting...") + copy_src = os.path.join(os.getcwd(), f"{CONDOR_OUTPUT_DIR}/output.root") - copy_dest = f"{output_dir}/output_{args.job_id}.root" + copy_dest = determine_output_paths(args.input_file, args.is_signal, args.output_tag) for attempt in range(MAX_RETRIES + 1): success = copy_output_file(copy_src, copy_dest) @@ -302,7 +246,6 @@ def copy_output_file(source, destination): time.sleep(SLEEP_DURATION) if not success: - print(f"Failed to copy output file after {MAX_RETRIES} attempts") - sys.exit(1) + raise ValueError(f"Failed to copy output file after {MAX_RETRIES} attempts; exiting...") - sys.exit(0) + sys.exit(0) \ No newline at end of file diff --git a/skimmer/jetId.h b/skimmer/jetId.h new file mode 100644 index 0000000..6aeea87 --- /dev/null +++ b/skimmer/jetId.h @@ -0,0 +1,46 @@ +#include +#include "correction.h" +#include +using namespace ROOT::VecOps; + +#define JET_ID_JSON_2024 "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/JME/2024_Summer24/jetid.json.gz" + +RVec evalJetID(const std::string& jet_id_json, const RVec& eta, const RVec& chHEF, const RVec& neHEF, + const RVec& chEmEF, const RVec& neEmEF, + const RVec& muEF, const RVec& chMultiplicity, + const RVec& neMultiplicity, const RVec& multiplicity) { + auto cset_jetId = correction::CorrectionSet::from_file(jet_id_json); + RVec jetId(eta.size(), 0.0); + for (size_t i = 0; i < eta.size(); ++i) { + jetId[i] += 2 * cset_jetId->at("AK4PUPPI_Tight")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); + jetId[i] += 4 * cset_jetId->at("AK4PUPPI_TightLeptonVeto")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); + } + return jetId; +} + +RVec evalFatJetID(const std::string& jet_id_json, const RVec& eta, const RVec& chHEF, const RVec& neHEF, + const RVec& chEmEF, const RVec& neEmEF, + const RVec& muEF, const RVec& chMultiplicity, + const RVec& neMultiplicity, const RVec& multiplicity) { + auto cset_fatJetId = correction::CorrectionSet::from_file(jet_id_json); + RVec fatJetId(eta.size(), 0.0); + for (size_t i = 0; i < eta.size(); ++i) { + fatJetId[i] += 2 * cset_fatJetId->at("AK8PUPPI_Tight")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); + fatJetId[i] += 4 * cset_fatJetId->at("AK8PUPPI_TightLeptonVeto")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]}); + } + return fatJetId; +} + +RVec evalJetID2024(const RVec& eta, const RVec& chHEF, const RVec& neHEF, + const RVec& chEmEF, const RVec& neEmEF, + const RVec& muEF, const RVec& chMultiplicity, + const RVec& neMultiplicity, const RVec& multiplicity) { + return evalJetID(JET_ID_JSON_2024, eta, chHEF, neHEF, chEmEF, neEmEF, muEF, chMultiplicity, neMultiplicity, multiplicity); +} + +RVec evalFatJetID2024(const RVec& eta, const RVec& chHEF, const RVec& neHEF, + const RVec& chEmEF, const RVec& neEmEF, + const RVec& muEF, const RVec& chMultiplicity, + const RVec& neMultiplicity, const RVec& multiplicity) { + return evalFatJetID(JET_ID_JSON_2024, eta, chHEF, neHEF, chEmEF, neEmEF, muEF, chMultiplicity, neMultiplicity, multiplicity); +} \ No newline at end of file diff --git a/skimmer/runSkimmer.sh b/skimmer/runSkimmer.sh index 0263050..ed58c89 100755 --- a/skimmer/runSkimmer.sh +++ b/skimmer/runSkimmer.sh @@ -4,8 +4,6 @@ set -euo pipefail # Configuration CPUS=1 MEMORY=2G -OUTPUT_TAG=v1 -LOGDIR=logs/${OUTPUT_TAG} X509_USER_PROXY="${X509_USER_PROXY:-/tmp/x509up_u$(id -u)}" @@ -30,8 +28,8 @@ parse_options() { case "$opt" in i) INPUT_LIST=$OPTARG ;; s) IS_SIG=1 ;; - h) usage ;; v) VERSION=$OPTARG ;; + h) usage ;; *) usage ;; esac done @@ -46,6 +44,10 @@ parse_options() { echo "Error: -v is required." usage fi + + # Set OUTPUT_TAG and LOGDIR after VERSION is validated + OUTPUT_TAG="${VERSION}" + LOGDIR="logs/${OUTPUT_TAG}" } # Get list of files for a dataset @@ -99,11 +101,12 @@ request_cpus = ${CPUS} request_memory = ${MEMORY} executable = executable.py transfer_executable = True -transfer_input_files = keep_and_drop_skim.txt, truthSelections.h -arguments = ${proxy_path} \$(FILE) \$(Process) ${sigflag} ${OUTPUT_TAG} +transfer_input_files = keep_and_drop_skim.txt, truthSelections.h, jetId.h +arguments = ${proxy_path} \$(FILE) ${sigflag} ${OUTPUT_TAG} log = ${LOGDIR}/\$(Cluster).\$(Process).log output = ${LOGDIR}/\$(Cluster).\$(Process).out error = ${LOGDIR}/\$(Cluster).\$(Process).err ++JobFlavour = "tomorrow" queue FILE from ${file_list} EOF From 61add13e568eaf2b6173359709cd6239ffb3648d Mon Sep 17 00:00:00 2001 From: Aashay Date: Thu, 16 Oct 2025 07:05:40 +0200 Subject: [PATCH 2/6] don't need it --- skimmer/executable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skimmer/executable.py b/skimmer/executable.py index 2bfcd48..07e7490 100755 --- a/skimmer/executable.py +++ b/skimmer/executable.py @@ -109,7 +109,7 @@ def analyze(self): return self.df.Count().GetValue() def Snapshot(self, tag): - all_cols = [str(col) for col in self.df.GetColumnNames() if not col.startswith("__")] + all_cols = [str(col) for col in self.df.GetColumnNames()] keep_cols = {col: 0 for col in all_cols} comment = re.compile(r"#.*") ops = [] From 31dcffbb898bc2971afb632ca24169507da0bfa8 Mon Sep 17 00:00:00 2001 From: Aashay Date: Wed, 5 Nov 2025 23:24:47 +0100 Subject: [PATCH 3/6] don't save logs for each job separately, it breaks the entire file system --- skimmer/runSkimmer.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skimmer/runSkimmer.sh b/skimmer/runSkimmer.sh index ed58c89..8130a74 100755 --- a/skimmer/runSkimmer.sh +++ b/skimmer/runSkimmer.sh @@ -103,9 +103,9 @@ executable = executable.py transfer_executable = True transfer_input_files = keep_and_drop_skim.txt, truthSelections.h, jetId.h arguments = ${proxy_path} \$(FILE) ${sigflag} ${OUTPUT_TAG} -log = ${LOGDIR}/\$(Cluster).\$(Process).log -output = ${LOGDIR}/\$(Cluster).\$(Process).out -error = ${LOGDIR}/\$(Cluster).\$(Process).err +log = ${LOGDIR}/\$(Cluster).log +output = ${LOGDIR}/\$(Cluster).out +error = ${LOGDIR}/\$(Cluster).err +JobFlavour = "tomorrow" queue FILE from ${file_list} From 5931dde58d450035a172fa4a2e8fccd85001da97 Mon Sep 17 00:00:00 2001 From: Aashay Date: Wed, 5 Nov 2025 23:26:07 +0100 Subject: [PATCH 4/6] add new signal xsecs --- preselection/etc/xsecs-sig-2024-inclusive.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 preselection/etc/xsecs-sig-2024-inclusive.json diff --git a/preselection/etc/xsecs-sig-2024-inclusive.json b/preselection/etc/xsecs-sig-2024-inclusive.json new file mode 100644 index 0000000..9aac1f4 --- /dev/null +++ b/preselection/etc/xsecs-sig-2024-inclusive.json @@ -0,0 +1,17 @@ +{ + "VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO": 1.714e-03, + "VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO": 2.221e-03, + "VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO": 3.585e-04, + + "VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO": 3.654e-03, + "VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO": 7.280e-04, + "VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO": 1.788e-03, + + "VBSWZH_C2V1p0_13p6TeV_5f_LO": 7.470e-04, + "VBSWZH_C2V1p5_13p6TeV_5f_LO": 1.106e-03, + "VBSWZH_C2V2p0_13p6TeV_5f_LO": 2.123e-03, + + "VBSZZH_C2V1p0_13p6TeV_5f_LO": 1.302e-04, + "VBSZZH_C2V1p5_13p6TeV_5f_LO": 4.435e-04, + "VBSZZH_C2V2p0_13p6TeV_5f_LO": 1.376e-03 +} \ No newline at end of file From 1712add496417f395ce96f37c0c0f6cf10226b32 Mon Sep 17 00:00:00 2001 From: aashayarora <42879633+aashayarora@users.noreply.github.com> Date: Thu, 8 Jan 2026 09:19:02 -0800 Subject: [PATCH 5/6] add Rho_* for JECs --- skimmer/keep_and_drop_skim.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skimmer/keep_and_drop_skim.txt b/skimmer/keep_and_drop_skim.txt index 167bc05..105d6a4 100644 --- a/skimmer/keep_and_drop_skim.txt +++ b/skimmer/keep_and_drop_skim.txt @@ -17,4 +17,5 @@ keep event keep run keep luminosityBlock keep truth* -keep gen* \ No newline at end of file +keep gen* +keep Rho.* From bd95016d11dc1cc0dddfc30db9a85cf1207a8638 Mon Sep 17 00:00:00 2001 From: aashayarora <42879633+aashayarora@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:00:44 -0800 Subject: [PATCH 6/6] rename xsecs-sig-13p6TeV-InclusiveHDecay.json file --- ...24-inclusive.json => xsecs-sig-13p6TeV-InclusiveHDecay.json} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename preselection/etc/{xsecs-sig-2024-inclusive.json => xsecs-sig-13p6TeV-InclusiveHDecay.json} (99%) diff --git a/preselection/etc/xsecs-sig-2024-inclusive.json b/preselection/etc/xsecs-sig-13p6TeV-InclusiveHDecay.json similarity index 99% rename from preselection/etc/xsecs-sig-2024-inclusive.json rename to preselection/etc/xsecs-sig-13p6TeV-InclusiveHDecay.json index 9aac1f4..ab8a1b7 100644 --- a/preselection/etc/xsecs-sig-2024-inclusive.json +++ b/preselection/etc/xsecs-sig-13p6TeV-InclusiveHDecay.json @@ -14,4 +14,4 @@ "VBSZZH_C2V1p0_13p6TeV_5f_LO": 1.302e-04, "VBSZZH_C2V1p5_13p6TeV_5f_LO": 4.435e-04, "VBSZZH_C2V2p0_13p6TeV_5f_LO": 1.376e-03 -} \ No newline at end of file +}