Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
validation/
validation/
skimmer/logs/
17 changes: 17 additions & 0 deletions preselection/etc/xsecs-sig-13p6TeV-InclusiveHDecay.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO": 1.714e-03,
"VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO": 2.221e-03,
"VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO": 3.585e-04,

"VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO": 3.654e-03,
"VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO": 7.280e-04,
"VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO": 1.788e-03,

"VBSWZH_C2V1p0_13p6TeV_5f_LO": 7.470e-04,
"VBSWZH_C2V1p5_13p6TeV_5f_LO": 1.106e-03,
"VBSWZH_C2V2p0_13p6TeV_5f_LO": 2.123e-03,

"VBSZZH_C2V1p0_13p6TeV_5f_LO": 1.302e-04,
"VBSZZH_C2V1p5_13p6TeV_5f_LO": 4.435e-04,
"VBSZZH_C2V2p0_13p6TeV_5f_LO": 1.376e-03
}
61 changes: 61 additions & 0 deletions skimmer/datasets-data-2024.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/EGamma0/Run2024C-MINIv6NANOv15-v1/NANOAOD
/EGamma1/Run2024C-MINIv6NANOv15-v1/NANOAOD
/Muon0/Run2024C-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024C-MINIv6NANOv15-v1/NANOAOD
/JetMET0/Run2024C-MINIv6NANOv15-v1/NANOAOD
/JetMET1/Run2024C-MINIv6NANOv15-v1/NANOAOD

/EGamma0/Run2024D-MINIv6NANOv15-v1/NANOAOD
/EGamma1/Run2024D-MINIv6NANOv15-v1/NANOAOD
/Muon0/Run2024D-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024D-MINIv6NANOv15-v1/NANOAOD
/JetMET0/Run2024D-MINIv6NANOv15-v1/NANOAOD
/JetMET1/Run2024D-MINIv6NANOv15-v1/NANOAOD


/EGamma0/Run2024E-MINIv6NANOv15-v1/NANOAOD
/EGamma1/Run2024E-MINIv6NANOv15-v1/NANOAOD
/JetMET0/Run2024E-MINIv6NANOv15-v1/NANOAOD
/JetMET1/Run2024E-MINIv6NANOv15-v1/NANOAOD
/Muon0/Run2024E-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024E-MINIv6NANOv15-v1/NANOAOD


/EGamma0/Run2024F-MINIv6NANOv15-v1/NANOAOD
/EGamma1/Run2024F-MINIv6NANOv15-v1/NANOAOD
/JetMET0/Run2024F-MINIv6NANOv15-v2/NANOAOD
/JetMET1/Run2024F-MINIv6NANOv15-v2/NANOAOD
/Muon0/Run2024F-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024F-MINIv6NANOv15-v1/NANOAOD


/EGamma0/Run2024G-MINIv6NANOv15-v2/NANOAOD
/EGamma1/Run2024G-MINIv6NANOv15-v2/NANOAOD
/JetMET0/Run2024G-MINIv6NANOv15-v2/NANOAOD
/JetMET1/Run2024G-MINIv6NANOv15-v2/NANOAOD
/Muon0/Run2024G-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024G-MINIv6NANOv15-v2/NANOAOD


/EGamma0/Run2024H-MINIv6NANOv15-v2/NANOAOD
/EGamma1/Run2024H-MINIv6NANOv15-v1/NANOAOD
/JetMET0/Run2024H-MINIv6NANOv15-v2/NANOAOD
/JetMET1/Run2024H-MINIv6NANOv15-v2/NANOAOD
/Muon0/Run2024H-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024H-MINIv6NANOv15-v2/NANOAOD


/EGamma0/Run2024I-MINIv6NANOv15-v1/NANOAOD
/EGamma0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD
/EGamma1/Run2024I-MINIv6NANOv15-v1/NANOAOD
/EGamma1/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD
/JetMET0/Run2024I-MINIv6NANOv15-v2/NANOAOD
/JetMET0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD
/JetMET1/Run2024I-MINIv6NANOv15-v1/NANOAOD
/JetMET1/Run2024I-MINIv6NANOv15_v2-v2/NANOAOD
/Muon0/Run2024I-MINIv6NANOv15-v1/NANOAOD
/Muon0/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD
/Muon1/Run2024I-MINIv6NANOv15-v1/NANOAOD
/Muon1/Run2024I-MINIv6NANOv15_v2-v1/NANOAOD


24 changes: 12 additions & 12 deletions skimmer/datasets-signal.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V1p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V1p0_13p6TeV_5f_LO_TuneCP5

/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V1p5_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V1p5_13p6TeV_5f_LO_TuneCP5

/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSWZH_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/Run3Summer24/VBSZZH_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_OSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWWH_SSWW_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSWZH_C2V2p0_13p6TeV_5f_LO_TuneCP5
/eos/user/a/aaarora/signal/NANOAOD/Run3Summer24/VBSZZH_C2V2p0_13p6TeV_5f_LO_TuneCP5
113 changes: 28 additions & 85 deletions skimmer/executable.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,30 @@

import ROOT as r

r.gInterpreter.Declare('#include "truthSelections.h"')

subprocess.run("python3 -m pip install --user --no-binary=correctionlib correctionlib", shell=True, check=True)
import importlib
correctionlib = importlib.import_module("correctionlib")
correctionlib.register_pyroot_binding()

r.gInterpreter.Declare('#include "truthSelections.h"')
r.gInterpreter.Declare('#include "jetId.h"')

# Constants
CONDOR_OUTPUT_DIR = "output"
XROOTD_REDIRECTOR = "root://xrootd-cms.infn.it/"
OUTPUT_XRD = "davs://redirector.t2.ucsd.edu:1095//store/user/USER_UAF_DIR/skim/" # Change to user's skim directory on UAF
MAX_RETRIES = 10
SLEEP_DURATION = 60 # 1 minute in seconds

JET_ID_JSONS = {"2024": "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/JME/2024_Summer24/jetid.json.gz"}

class Skimmer():
def __init__(self, inFiles, outDir, keepDropFile):
def __init__(self, inFiles, outDir, keepDropFile, is_signal):
self.inFiles = inFiles
self.outDir = outDir
self.keepDropFile = keepDropFile
self.is_signal = is_signal

self.df = r.RDataFrame("Events", self.inFiles)
r.RDF.Experimental.AddProgressBar(self.df)
columns = self.df.GetColumnNames()
for col in columns:
if col.startswith("Muon_") or col.startswith("Electron_") or col.startswith("Jet_") or col.startswith("FatJet_"):
Expand Down Expand Up @@ -79,56 +79,21 @@ def genSelection(df):

return df

def analyze(self, is_signal):
def analyze(self):
self.df = self.df.Define("__tight_mu_mask", "Muon_pt > 35. && abs(Muon_eta) < 2.4 && Muon_tightId") \
.Define("__tight_ele_mask", "Electron_pt > 35. && abs(Electron_eta) < 2.5 && Electron_cutBased >= 4") \
.Define("__n_tight_leptons", "Sum(__tight_mu_mask) + Sum(__tight_ele_mask)") \
.Define("__fatjet_mask", "FatJet_pt > 200 && FatJet_msoftdrop > 10") \
.Define("__n_fatjets", "Sum(__fatjet_mask)") \
.Filter("(__n_fatjets + __n_tight_leptons) >= 1")

if self.sample_year in JET_ID_JSONS:
jet_id_json = JET_ID_JSONS[self.sample_year]

self.df = self.df.Define("Jet_multiplicity", "Jet_chMultiplicity + Jet_neMultiplicity") \
.Define("FatJet_multiplicity", "FatJet_chMultiplicity + FatJet_neMultiplicity")

r.gInterpreter.Declare("""
#include <ROOT/RVec.hxx>
using namespace ROOT::VecOps;

RVec<float> evalJetID(const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
auto cset_jetId = correction::CorrectionSet::from_file(\"""" + jet_id_json + """\");
RVec<float> jetId(eta.size(), 0.0);
for (size_t i = 0; i < eta.size(); ++i) {
jetId[i] += 2 * cset_jetId->at(\"AK4PUPPI_Tight\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
jetId[i] += 4 * cset_jetId->at(\"AK4PUPPI_TightLeptonVeto\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
}
return jetId;
}

RVec<float> evalFatJetID(const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
auto cset_fatJetId = correction::CorrectionSet::from_file(\"""" + jet_id_json + """\");
RVec<float> fatJetId(eta.size(), 0.0);
for (size_t i = 0; i < eta.size(); ++i) {
fatJetId[i] += 2 * cset_fatJetId->at(\"AK8PUPPI_Tight\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
fatJetId[i] += 4 * cset_fatJetId->at(\"AK8PUPPI_TightLeptonVeto\")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
}
return fatJetId;
}

""")

self.df = self.df.Define("Jet_jetId", "evalJetID(Jet_eta, Jet_chHEF, Jet_neHEF, Jet_chEmEF, Jet_neEmEF, Jet_muEF, Jet_chMultiplicity, Jet_neMultiplicity, Jet_multiplicity)") \
.Define("FatJet_jetId", "evalFatJetID(FatJet_eta, FatJet_chHEF, FatJet_neHEF, FatJet_chEmEF, FatJet_neEmEF, FatJet_muEF, FatJet_chMultiplicity, FatJet_neMultiplicity, FatJet_multiplicity)")

if is_signal:
self.df = self.df.Define("Jet_jetId", f"evalJetID{self.sample_year}(Jet_eta, Jet_chHEF, Jet_neHEF, Jet_chEmEF, Jet_neEmEF, Jet_muEF, Jet_chMultiplicity, Jet_neMultiplicity, Jet_multiplicity)") \
.Define("FatJet_jetId", f"evalFatJetID{self.sample_year}(FatJet_eta, FatJet_chHEF, FatJet_neHEF, FatJet_chEmEF, FatJet_neEmEF, FatJet_muEF, FatJet_chMultiplicity, FatJet_neMultiplicity, FatJet_multiplicity)")

if self.is_signal:
self.df = self.genSelection(self.df)

# Run3 event filters
Expand Down Expand Up @@ -182,11 +147,12 @@ def Snapshot(self, tag):

@property
def sample_year(self):
match = re.search(r'Run3Summer24|RunIII2024Summer24NanoAODv15', self.inFiles[0])
if match:
return "2024"
match = re.search(r'Run3Summer24|RunIII2024Summer24NanoAODv15|Run2024', self.inFiles[0])
if not match:
raise ValueError("Could not determine sample year from filename")
else:
return None
return "2024"


def run_skimmer(input_file, output_dir, is_signal):
print(f"Running skimmer on {input_file}")
Expand All @@ -195,43 +161,22 @@ def run_skimmer(input_file, output_dir, is_signal):
inFiles = [XROOTD_REDIRECTOR + input_file if input_file.startswith('/store') else 'file://' + input_file]
keepDropFile = "keep_and_drop_skim.txt"

skimmer = Skimmer(inFiles, output_dir, keepDropFile)
passed = skimmer.analyze(is_signal)
skimmer = Skimmer(inFiles, output_dir, keepDropFile, is_signal)
passed = skimmer.analyze()
if passed:
skimmer.Snapshot("skim")
skimmer.Snapshot("output")
return True
else:
print("No entries in output")
return False


def merge_skims(output_dir):
skim_files = glob.glob(f"{output_dir}/*")

if len(skim_files) == 0:
print("No output files to merge; exiting...")
return True
elif len(skim_files) == 1:
shutil.move(skim_files[0], f"{output_dir}/output.root")
return True
else:
merge_cmd = ["hadd", f"{output_dir}/output.root"] + skim_files
print(" ".join(merge_cmd))
result = subprocess.run(merge_cmd)
return result.returncode == 0


def determine_output_paths(input_file, is_signal, output_tag):
if not is_signal:
era = input_file.split('/')[3]
sample_name = input_file.split('/')[4]
campaign = input_file.split('/')[6]
sub_output_dir = "/".join(input_file.split('/')[3:5] + input_file.split('/')[8:])
else:
era = input_file.split('/')[6]
sample_name = input_file.split('/')[7]
campaign = "private"
sub_output_dir = "/".join(input_file.split('/')[7:])

output_dir = f"{OUTPUT_XRD}/skims_{output_tag}/{campaign}/{sample_name}"
output_dir = f"{OUTPUT_XRD}/skims_{output_tag}/{sub_output_dir}"
return output_dir

def check_output_liveness(file):
Expand Down Expand Up @@ -272,7 +217,6 @@ def copy_output_file(source, destination):
parser = ArgumentParser(description='Run the NanoAOD skimmer with file transfer.')
parser.add_argument('proxy', help="Path to the X509 proxy")
parser.add_argument('input_file', help="Input file path")
parser.add_argument('job_id', help="Job ID")
parser.add_argument('is_signal', help='Flag indicating if this is a signal sample', type=int)
parser.add_argument('output_tag', help='Output tag, including version of skims eg. v2', type=str)
args = parser.parse_args()
Expand All @@ -284,13 +228,13 @@ def copy_output_file(source, destination):
if not success:
print("Skimmer failed; retrying one more time...")
success = run_skimmer(args.input_file, CONDOR_OUTPUT_DIR, args.is_signal)
merge_skims(CONDOR_OUTPUT_DIR)

output_dir = determine_output_paths(args.input_file, args.is_signal, args.output_tag)

if not success:
raise ValueError("Skimmer failed twice; exiting...")


copy_src = os.path.join(os.getcwd(), f"{CONDOR_OUTPUT_DIR}/output.root")
copy_dest = f"{output_dir}/output_{args.job_id}.root"
copy_dest = determine_output_paths(args.input_file, args.is_signal, args.output_tag)

for attempt in range(MAX_RETRIES + 1):
success = copy_output_file(copy_src, copy_dest)
Expand All @@ -302,7 +246,6 @@ def copy_output_file(source, destination):
time.sleep(SLEEP_DURATION)

if not success:
print(f"Failed to copy output file after {MAX_RETRIES} attempts")
sys.exit(1)
raise ValueError(f"Failed to copy output file after {MAX_RETRIES} attempts; exiting...")

sys.exit(0)
sys.exit(0)
46 changes: 46 additions & 0 deletions skimmer/jetId.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <ROOT/RVec.hxx>
#include "correction.h"
#include <string>
using namespace ROOT::VecOps;

#define JET_ID_JSON_2024 "/cvmfs/cms.cern.ch/rsync/cms-nanoAOD/jsonpog-integration/POG/JME/2024_Summer24/jetid.json.gz"

RVec<float> evalJetID(const std::string& jet_id_json, const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
auto cset_jetId = correction::CorrectionSet::from_file(jet_id_json);
RVec<float> jetId(eta.size(), 0.0);
for (size_t i = 0; i < eta.size(); ++i) {
jetId[i] += 2 * cset_jetId->at("AK4PUPPI_Tight")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
jetId[i] += 4 * cset_jetId->at("AK4PUPPI_TightLeptonVeto")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
}
return jetId;
}

RVec<float> evalFatJetID(const std::string& jet_id_json, const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
auto cset_fatJetId = correction::CorrectionSet::from_file(jet_id_json);
RVec<float> fatJetId(eta.size(), 0.0);
for (size_t i = 0; i < eta.size(); ++i) {
fatJetId[i] += 2 * cset_fatJetId->at("AK8PUPPI_Tight")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
fatJetId[i] += 4 * cset_fatJetId->at("AK8PUPPI_TightLeptonVeto")->evaluate({eta[i], chHEF[i], neHEF[i], chEmEF[i], neEmEF[i], muEF[i], chMultiplicity[i], neMultiplicity[i], multiplicity[i]});
}
return fatJetId;
}

RVec<float> evalJetID2024(const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
return evalJetID(JET_ID_JSON_2024, eta, chHEF, neHEF, chEmEF, neEmEF, muEF, chMultiplicity, neMultiplicity, multiplicity);
}

RVec<float> evalFatJetID2024(const RVec<float>& eta, const RVec<float>& chHEF, const RVec<float>& neHEF,
const RVec<float>& chEmEF, const RVec<float>& neEmEF,
const RVec<float>& muEF, const RVec<int>& chMultiplicity,
const RVec<int>& neMultiplicity, const RVec<int>& multiplicity) {
return evalFatJetID(JET_ID_JSON_2024, eta, chHEF, neHEF, chEmEF, neEmEF, muEF, chMultiplicity, neMultiplicity, multiplicity);
}
3 changes: 2 additions & 1 deletion skimmer/keep_and_drop_skim.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ keep event
keep run
keep luminosityBlock
keep truth*
keep gen*
keep gen*
keep Rho.*
Loading